In [None]:
html_content = '''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>CORAL ASR Evaluation Dashboard</title>
    <script src="https://cdn.tailwindcss.com"></script>
    <style>
        body {
            background: linear-gradient(to bottom right, #0f172a, #581c87, #0f172a);
            min-height: 100vh;
        }
        .glass {
            background: rgba(255, 255, 255, 0.1);
            backdrop-filter: blur(10px);
            border: 1px solid rgba(255, 255, 255, 0.2);
        }
        .spinner {
            border: 3px solid rgba(255, 255, 255, 0.3);
            border-top: 3px solid white;
            border-radius: 50%;
            width: 40px;
            height: 40px;
            animation: spin 1s linear infinite;
        }
        @keyframes spin {
            0% { transform: rotate(0deg); }
            100% { transform: rotate(360deg); }
        }
        .progress-bar {
            transition: width 0.5s ease;
        }
    </style>
</head>
<body class="p-8">
    <div class="max-w-7xl mx-auto">
        <div class="text-center mb-8">
            <h1 class="text-5xl font-bold text-white mb-2">CORAL ASR Evaluation</h1>
            <p class="text-purple-200">Urdu Speech Recognition Model Testing</p>
        </div>

        <div id="errorBox" class="hidden bg-red-500/20 border border-red-500 rounded-lg p-4 mb-6 flex items-center gap-3">
            <svg class="text-red-400" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
                <circle cx="12" cy="12" r="10"></circle>
                <line x1="12" y1="8" x2="12" y2="12"></line>
                <line x1="12" y1="16" x2="12.01" y2="16"></line>
            </svg>
            <span id="errorText" class="text-red-100"></span>
        </div>

        <div id="uploadSection" class="glass rounded-2xl p-8 border border-white/20">
            <h2 class="text-2xl font-bold text-white mb-6 flex items-center gap-3">
                <svg width="28" height="28" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
                    <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"></path>
                    <polyline points="17 8 12 3 7 8"></polyline>
                    <line x1="12" y1="3" x2="12" y2="15"></line>
                </svg>
                Upload Dataset
            </h2>
            <div class="space-y-4">
                <div class="border-2 border-dashed border-purple-400 rounded-lg p-8 text-center hover:border-purple-300 transition">
                    <input type="file" accept=".zip" id="fileInput" class="hidden">
                    <label for="fileInput" class="cursor-pointer">
                        <svg class="mx-auto mb-4 text-purple-300" width="48" height="48" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
                            <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"></path>
                            <polyline points="17 8 12 3 7 8"></polyline>
                            <line x1="12" y1="3" x2="12" y2="15"></line>
                        </svg>
                        <p class="text-white text-lg mb-2" id="fileName">Click to upload dataset</p>
                        <p class="text-purple-200 text-sm">ZIP file containing ur/other.tsv and ur/clips/</p>
                    </label>
                </div>
                <button id="uploadBtn" disabled class="w-full bg-purple-600 hover:bg-purple-700 disabled:bg-gray-600 disabled:cursor-not-allowed text-white font-semibold py-3 rounded-lg transition">
                    Upload Dataset
                </button>
            </div>
        </div>

        <div id="uploadingSection" class="hidden glass rounded-2xl p-8 border border-white/20 text-center">
            <div class="spinner mx-auto mb-4"></div>
            <p class="text-white text-xl">Uploading dataset...</p>
        </div>

        <div id="configSection" class="hidden glass rounded-2xl p-8 border border-white/20">
            <h2 class="text-2xl font-bold text-white mb-6">Configure Evaluation</h2>
            
            <div class="mb-6">
                <label class="text-white font-semibold mb-3 block">Select Models:</label>
                <div id="modelsList" class="grid grid-cols-2 gap-3"></div>
            </div>

            <div class="grid grid-cols-2 gap-6 mb-6">
                <div>
                    <label class="text-white font-semibold mb-2 block">Max Samples:</label>
                    <input type="number" id="maxSamples" value="10" min="1" class="w-full bg-white/10 text-white border border-white/20 rounded-lg px-4 py-2">
                </div>
                <div class="flex items-center gap-3">
                    <input type="checkbox" id="useFP16" checked class="w-5 h-5">
                    <label for="useFP16" class="text-white font-semibold">Use FP16 (Faster)</label>
                </div>
            </div>

            <button id="startBtn" class="w-full bg-gradient-to-r from-purple-600 to-pink-600 hover:from-purple-700 hover:to-pink-700 text-white font-bold py-4 rounded-lg flex items-center justify-center gap-3 transition">
                <svg width="24" height="24" viewBox="0 0 24 24" fill="currentColor">
                    <polygon points="5 3 19 12 5 21 5 3"></polygon>
                </svg>
                Start Evaluation
            </button>
        </div>

        <div id="runningSection" class="hidden glass rounded-2xl p-8 border border-white/20">
            <h2 class="text-2xl font-bold text-white mb-6 flex items-center gap-3">
                <div class="spinner"></div>
                Evaluation in Progress
            </h2>
            <div class="space-y-4">
                <div>
                    <div class="flex justify-between text-white mb-2">
                        <span>Progress</span>
                        <span id="progressPercent">0%</span>
                    </div>
                    <div class="w-full bg-white/20 rounded-full h-4 overflow-hidden">
                        <div id="progressBar" class="progress-bar bg-gradient-to-r from-purple-500 to-pink-500 h-full" style="width: 0%"></div>
                    </div>
                </div>
                <p id="currentModel" class="text-purple-200 text-center"></p>
            </div>
        </div>

        <div id="resultsSection" class="hidden space-y-6">
            <div class="glass rounded-2xl p-8 border border-white/20">
                <div class="flex items-center justify-between mb-6">
                    <h2 class="text-2xl font-bold text-white flex items-center gap-3">
                        <svg class="text-green-400" width="28" height="28" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
                            <path d="M22 11.08V12a10 10 0 1 1-5.93-9.14"></path>
                            <polyline points="22 4 12 14.01 9 11.01"></polyline>
                        </svg>
                        Evaluation Complete
                    </h2>
                    <button id="downloadBtn" class="bg-green-600 hover:bg-green-700 text-white font-semibold px-6 py-2 rounded-lg flex items-center gap-2 transition">
                        <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
                            <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"></path>
                            <polyline points="7 10 12 15 17 10"></polyline>
                            <line x1="12" y1="15" x2="12" y2="3"></line>
                        </svg>
                        Download CSV
                    </button>
                </div>

                <div class="grid grid-cols-2 gap-6 mb-6">
                    <div class="bg-gradient-to-br from-purple-600/30 to-pink-600/30 rounded-xl p-6 border border-purple-400/30">
                        <p class="text-purple-200 text-sm mb-2">Best Model</p>
                        <p id="bestModel" class="text-white text-3xl font-bold"></p>
                    </div>
                    <div class="bg-gradient-to-br from-green-600/30 to-blue-600/30 rounded-xl p-6 border border-green-400/30">
                        <p class="text-green-200 text-sm mb-2">Best WER</p>
                        <p id="bestWER" class="text-white text-3xl font-bold"></p>
                    </div>
                </div>

                <div class="grid grid-cols-3 gap-4 mb-6">
                    <img id="werComparison" class="rounded-lg border border-white/20 w-full">
                    <img id="werDistribution" class="rounded-lg border border-white/20 w-full">
                    <img id="calibration" class="rounded-lg border border-white/20 w-full">
                </div>
            </div>

            <div class="glass rounded-2xl p-8 border border-white/20">
                <div class="mb-6">
                    <label class="text-white font-semibold mb-3 block">View Results by Model:</label>
                    <select id="modelSelect" class="w-full bg-white/10 text-white border border-white/20 rounded-lg px-4 py-2">
                    </select>
                </div>

                <div id="modelMetrics" class="grid grid-cols-4 gap-4 mb-6"></div>

                <div class="overflow-x-auto">
                    <table class="w-full text-left">
                        <thead>
                            <tr class="border-b border-white/20">
                                <th class="text-purple-200 font-semibold p-3">Audio ID</th>
                                <th class="text-purple-200 font-semibold p-3">Reference</th>
                                <th class="text-purple-200 font-semibold p-3">Hypothesis</th>
                                <th class="text-purple-200 font-semibold p-3">WER</th>
                                <th class="text-purple-200 font-semibold p-3">CER</th>
                            </tr>
                        </thead>
                        <tbody id="resultsTable"></tbody>
                    </table>
                </div>
            </div>
        </div>
    </div>

    <script>
        const API_URL = window.location.origin;
        let availableModels = [];
        let selectedModels = [];
        let jobId = null;
        let results = null;

        function showError(message) {
            document.getElementById('errorText').textContent = message;
            document.getElementById('errorBox').classList.remove('hidden');
            setTimeout(() => {
                document.getElementById('errorBox').classList.add('hidden');
            }, 5000);
        }

        function showSection(sectionId) {
            ['uploadSection', 'uploadingSection', 'configSection', 'runningSection', 'resultsSection'].forEach(id => {
                document.getElementById(id).classList.add('hidden');
            });
            document.getElementById(sectionId).classList.remove('hidden');
        }

        async function loadModels() {
            try {
                const response = await fetch(`${API_URL}/api/models`);
                const data = await response.json();
                availableModels = data.models;
                renderModels();
            } catch (error) {
                showError('Failed to load models: ' + error.message);
            }
        }

        function renderModels() {
            const container = document.getElementById('modelsList');
            container.innerHTML = '';
            availableModels.forEach(model => {
                const label = document.createElement('label');
                label.className = 'flex items-center gap-3 bg-white/5 p-3 rounded-lg cursor-pointer hover:bg-white/10 transition';
                label.innerHTML = `
                    <input type="checkbox" value="${model}" class="w-5 h-5 model-checkbox">
                    <span class="text-white">${model}</span>
                `;
                container.appendChild(label);
            });

            document.querySelectorAll('.model-checkbox').forEach(cb => {
                cb.addEventListener('change', (e) => {
                    if (e.target.checked) {
                        selectedModels.push(e.target.value);
                    } else {
                        selectedModels = selectedModels.filter(m => m !== e.target.value);
                    }
                });
            });
        }

        document.getElementById('fileInput').addEventListener('change', (e) => {
            const file = e.target.files[0];
            if (file) {
                document.getElementById('fileName').textContent = file.name;
                document.getElementById('uploadBtn').disabled = false;
            }
        });

        document.getElementById('uploadBtn').addEventListener('click', async () => {
            const fileInput = document.getElementById('fileInput');
            const file = fileInput.files[0];
            if (!file) {
                showError('Please select a file');
                return;
            }

            showSection('uploadingSection');

            const formData = new FormData();
            formData.append('file', file);

            try {
                const response = await fetch(`${API_URL}/api/upload`, {
                    method: 'POST',
                    body: formData
                });

                const data = await response.json();
                if (response.ok) {
                    jobId = data.job_id;
                    showSection('configSection');
                } else {
                    showError(data.error);
                    showSection('uploadSection');
                }
            } catch (error) {
                showError('Upload failed: ' + error.message);
                showSection('uploadSection');
            }
        });

        document.getElementById('startBtn').addEventListener('click', async () => {
            if (selectedModels.length === 0) {
                showError('Please select at least one model');
                return;
            }

            const maxSamples = parseInt(document.getElementById('maxSamples').value);
            const useFP16 = document.getElementById('useFP16').checked;

            showSection('runningSection');

            try {
                await fetch(`${API_URL}/api/evaluate`, {
                    method: 'POST',
                    headers: { 'Content-Type': 'application/json' },
                    body: JSON.stringify({
                        job_id: jobId,
                        models: selectedModels,
                        max_samples: maxSamples,
                        use_fp16: useFP16
                    })
                });

                pollStatus();
            } catch (error) {
                showError('Evaluation failed: ' + error.message);
                showSection('configSection');
            }
        });

        async function pollStatus() {
            const interval = setInterval(async () => {
                try {
                    const response = await fetch(`${API_URL}/api/status/${jobId}`);
                    const data = await response.json();

                    document.getElementById('progressPercent').textContent = data.progress + '%';
                    document.getElementById('progressBar').style.width = data.progress + '%';

                    if (data.current_model) {
                        document.getElementById('currentModel').textContent = 
                            `Currently evaluating: ${data.current_model}`;
                    }

                    if (data.status === 'completed') {
                        clearInterval(interval);
                        await loadResults();
                    } else if (data.status === 'failed') {
                        clearInterval(interval);
                        showError(data.error);
                        showSection('configSection');
                    }
                } catch (error) {
                    console.error('Status poll error:', error);
                }
            }, 2000);
        }

        async function loadResults() {
            try {
                const response = await fetch(`${API_URL}/api/results/${jobId}`);
                results = await response.json();

                document.getElementById('bestModel').textContent = results.best_model;
                document.getElementById('bestWER').textContent = (results.best_wer * 100).toFixed(2) + '%';

                document.getElementById('werComparison').src = `${API_URL}/api/plot/${jobId}/wer_comparison.png`;
                document.getElementById('werDistribution').src = `${API_URL}/api/plot/${jobId}/wer_distribution.png`;
                document.getElementById('calibration').src = `${API_URL}/api/plot/${jobId}/calibration.png`;

                const uniqueModels = [...new Set(results.detailed.map(r => r.model_name))];
                const select = document.getElementById('modelSelect');
                select.innerHTML = '';
                uniqueModels.forEach(model => {
                    const option = document.createElement('option');
                    option.value = model;
                    option.textContent = model;
                    select.appendChild(option);
                });

                select.addEventListener('change', () => updateModelView(select.value));
                updateModelView(uniqueModels[0]);

                showSection('resultsSection');
            } catch (error) {
                showError('Failed to load results: ' + error.message);
            }
        }

        function updateModelView(modelName) {
            const modelData = results.detailed.filter(r => r.model_name === modelName);
            
            const avgWER = modelData.reduce((sum, r) => sum + r.wer, 0) / modelData.length;
            const avgCER = modelData.reduce((sum, r) => sum + r.cer, 0) / modelData.length;
            const avgConf = modelData.reduce((sum, r) => sum + r.avg_confidence, 0) / modelData.length;
            const avgECE = modelData.reduce((sum, r) => sum + r.ece, 0) / modelData.length;

            const metrics = document.getElementById('modelMetrics');
            metrics.innerHTML = `
                <div class="bg-white/5 rounded-lg p-4 border border-white/10">
                    <p class="text-purple-200 text-xs mb-1">WER</p>
                    <p class="text-white text-xl font-bold">${(avgWER * 100).toFixed(2)}%</p>
                </div>
                <div class="bg-white/5 rounded-lg p-4 border border-white/10">
                    <p class="text-purple-200 text-xs mb-1">CER</p>
                    <p class="text-white text-xl font-bold">${(avgCER * 100).toFixed(2)}%</p>
                </div>
                <div class="bg-white/5 rounded-lg p-4 border border-white/10">
                    <p class="text-purple-200 text-xs mb-1">CONFIDENCE</p>
                    <p class="text-white text-xl font-bold">${(avgConf * 100).toFixed(2)}%</p>
                </div>
                <div class="bg-white/5 rounded-lg p-4 border border-white/10">
                    <p class="text-purple-200 text-xs mb-1">ECE</p>
                    <p class="text-white text-xl font-bold">${(avgECE * 100).toFixed(2)}%</p>
                </div>
            `;

            const tbody = document.getElementById('resultsTable');
            tbody.innerHTML = '';
            modelData.forEach(result => {
                const row = document.createElement('tr');
                row.className = 'border-b border-white/10 hover:bg-white/5';
                row.innerHTML = `
                    <td class="text-white p-3 text-sm">${result.audio_id}</td>
                    <td class="text-white p-3 text-sm max-w-xs truncate" title="${result.reference}">${result.reference}</td>
                    <td class="text-white p-3 text-sm max-w-xs truncate" title="${result.hypothesis}">${result.hypothesis}</td>
                    <td class="text-white p-3 font-mono">${(result.wer * 100).toFixed(2)}%</td>
                    <td class="text-white p-3 font-mono">${(result.cer * 100).toFixed(2)}%</td>
                `;
                tbody.appendChild(row);
            });
        }

        document.getElementById('downloadBtn').addEventListener('click', () => {
            window.open(`${API_URL}/api/download/${jobId}`, '_blank');
        });

        loadModels();
    </script>
</body>
</html>
'''

with open('/kaggle/working/index.html', 'w', encoding='utf-8') as f:
    f.write(html_content)

print("Frontend saved to /kaggle/working/index.html")

In [None]:
import sys
print("Installing dependencies...")
!{sys.executable} -m pip install -q editdistance flask flask-cors pyngrok

print("Loading libraries...")
import torch
import gc
import librosa
import numpy as np
import pandas as pd
from pathlib import Path
from typing import List, Tuple, Dict
import warnings
import json
import csv
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import editdistance
from collections import defaultdict
from datetime import datetime
import os
import threading
import uuid
import shutil
import zipfile

warnings.filterwarnings('ignore')
sns.set_style("whitegrid")

from transformers import (
    WhisperProcessor, WhisperForConditionalGeneration,
    Wav2Vec2Processor, Wav2Vec2ForCTC,
    SeamlessM4TForSpeechToText, SeamlessM4TProcessor,
    AutoProcessor, AutoModelForCTC
)

print(f"Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")

if not torch.cuda.is_available():
    print("WARNING: CUDA not available. Please enable GPU in Kaggle:")
    print("Settings -> Accelerator -> GPU T4 x2")
    print("This will run VERY slow on CPU!")
    import sys
    sys.exit(1)

class UrduASRWrapper:
    SUPPORTED_MODELS = {
        "whisper-large": "openai/whisper-large-v3",
        "whisper-medium": "openai/whisper-medium",
        "whisper-small": "openai/whisper-small",
        "seamless-large": "facebook/seamless-m4t-v2-large",
        "seamless-medium": "facebook/seamless-m4t-medium",
        "mms-1b": "facebook/mms-1b-all",
        "mms-300m": "facebook/mms-300m",
        "wav2vec2-urdu": "kingabzpro/wav2vec2-large-xls-r-300m-Urdu"
    }
    
    def __init__(self, device: str = None, use_fp16: bool = True):
        if device is None:
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
        else:
            self.device = device
        self.use_fp16 = use_fp16 and self.device == "cuda"
        print(f"ASR Wrapper initialized on: {self.device} (FP16: {self.use_fp16})")
        self.current_model = None
        self.processor = None
        self.current_model_name = None
        self.audio_cache = {}
    
    def _preprocess_audio(self, file_path: str, target_sr: int = 16000) -> np.ndarray:
        if file_path in self.audio_cache:
            return self.audio_cache[file_path]
        
        try:
            audio, sr = librosa.load(file_path, sr=target_sr, mono=True)
            if audio.dtype != np.float32:
                audio = audio.astype(np.float32)
            max_val = np.abs(audio).max()
            if max_val > 0:
                audio = audio / max_val
            self.audio_cache[file_path] = audio
            return audio
        except Exception as e:
            raise ValueError(f"Error loading audio file {file_path}: {str(e)}")
    
    def _load_model(self, model_name: str):
        if self.current_model_name == model_name:
            return
        
        self._cleanup()
        
        if model_name not in self.SUPPORTED_MODELS:
            raise ValueError(f"Model {model_name} not supported. Choose from: {list(self.SUPPORTED_MODELS.keys())}")
        
        model_id = self.SUPPORTED_MODELS[model_name]
        print(f"Loading {model_name} ({model_id})...")
        
        try:
            if "whisper" in model_name:
                self.processor = WhisperProcessor.from_pretrained(model_id)
                self.current_model = WhisperForConditionalGeneration.from_pretrained(model_id)
            elif "seamless" in model_name:
                self.processor = SeamlessM4TProcessor.from_pretrained(model_id)
                self.current_model = SeamlessM4TForSpeechToText.from_pretrained(model_id)
            elif "mms" in model_name:
                self.processor = AutoProcessor.from_pretrained(model_id)
                self.current_model = AutoModelForCTC.from_pretrained(model_id)
            elif "wav2vec2" in model_name:
                self.processor = Wav2Vec2Processor.from_pretrained(model_id)
                self.current_model = Wav2Vec2ForCTC.from_pretrained(model_id)
            
            self.current_model = self.current_model.to(self.device)
            if self.use_fp16:
                self.current_model = self.current_model.half()
            self.current_model.eval()
            self.current_model_name = model_name
            print(f"{model_name} loaded successfully")
        except Exception as e:
            raise RuntimeError(f"Failed to load model {model_name}: {str(e)}")
    
    def _extract_whisper_probabilities(self, audio_array: np.ndarray) -> List[Tuple[str, float]]:
        input_features = self.processor(audio_array, sampling_rate=16000, return_tensors="pt").input_features.to(self.device)
        if self.use_fp16:
            input_features = input_features.half()
        
        with torch.inference_mode():
            predicted_ids = self.current_model.generate(input_features, return_dict_in_generate=True, output_scores=True)
        
        transcription = self.processor.batch_decode(predicted_ids.sequences, skip_special_tokens=True)[0]
        word_probs = []
        
        if hasattr(predicted_ids, 'scores') and predicted_ids.scores:
            all_probs = []
            for score in predicted_ids.scores:
                probs = torch.softmax(score, dim=-1)
                max_prob = probs.max().item()
                all_probs.append(max_prob)
            words = transcription.strip().split()
            if len(words) > 0 and len(all_probs) > 0:
                avg_prob = np.mean(all_probs)
                word_probs = [(word, avg_prob) for word in words]
            else:
                word_probs = [(word, 0.5) for word in words]
        else:
            words = transcription.strip().split()
            word_probs = [(word, 0.8) for word in words]
        
        return word_probs
    
    def _extract_ctc_probabilities(self, audio_array: np.ndarray) -> List[Tuple[str, float]]:
        inputs = self.processor(audio_array, sampling_rate=16000, return_tensors="pt", padding=True)
        input_values = inputs.input_values.to(self.device)
        if self.use_fp16:
            input_values = input_values.half()
        
        with torch.inference_mode():
            logits = self.current_model(input_values).logits
        
        probs = torch.softmax(logits, dim=-1)
        predicted_ids = torch.argmax(logits, dim=-1)
        transcription = self.processor.batch_decode(predicted_ids)[0]
        words = transcription.strip().split()
        word_probs = []
        
        if len(words) > 0:
            max_probs = probs.max(dim=-1).values.squeeze()
            avg_confidence = max_probs.mean().item()
            word_probs = [(word, avg_confidence) for word in words]
        
        return word_probs
    
    def _extract_seamless_probabilities(self, audio_array: np.ndarray) -> List[Tuple[str, float]]:
        audio_inputs = self.processor(audios=audio_array, sampling_rate=16000, return_tensors="pt").to(self.device)
        
        with torch.inference_mode():
            output = self.current_model.generate(**audio_inputs, tgt_lang="urd", return_dict_in_generate=True, output_scores=True)
        
        transcription = self.processor.decode(output.sequences[0].tolist(), skip_special_tokens=True)
        word_probs = []
        
        if hasattr(output, 'scores') and output.scores:
            all_probs = []
            for score in output.scores:
                probs = torch.softmax(score, dim=-1)
                max_prob = probs.max().item()
                all_probs.append(max_prob)
            words = transcription.strip().split()
            if len(words) > 0 and len(all_probs) > 0:
                avg_prob = np.mean(all_probs)
                word_probs = [(word, avg_prob) for word in words]
            else:
                word_probs = [(word, 0.7) for word in words]
        else:
            words = transcription.strip().split()
            word_probs = [(word, 0.7) for word in words]
        
        return word_probs
    
    def _cleanup(self):
        if self.current_model is not None:
            del self.current_model
            self.current_model = None
        if self.processor is not None:
            del self.processor
            self.processor = None
        self.current_model_name = None
        if self.device == "cuda":
            torch.cuda.empty_cache()
        gc.collect()
    
    def clear_audio_cache(self):
        self.audio_cache.clear()
        gc.collect()
    
    def word_probabilities(self, audio_file_path: str, model_name: str) -> List[Tuple[str, float]]:
        try:
            audio_array = self._preprocess_audio(audio_file_path)
            self._load_model(model_name)
            
            if "whisper" in model_name:
                results = self._extract_whisper_probabilities(audio_array)
            elif "mms" in model_name or "wav2vec2" in model_name:
                results = self._extract_ctc_probabilities(audio_array)
            elif "seamless" in model_name:
                results = self._extract_seamless_probabilities(audio_array)
            else:
                raise ValueError(f"Unknown model type: {model_name}")
            
            return results
        except Exception as e:
            raise RuntimeError(f"Error processing audio with {model_name}: {str(e)}")

def compute_wer(reference: str, hypothesis: str) -> float:
    ref_words = reference.split()
    hyp_words = hypothesis.split()
    if len(ref_words) == 0:
        return 0.0 if len(hyp_words) == 0 else 1.0
    return editdistance.eval(ref_words, hyp_words) / len(ref_words)

def compute_cer(reference: str, hypothesis: str) -> float:
    if len(reference) == 0:
        return 0.0 if len(hypothesis) == 0 else 1.0
    return editdistance.eval(reference, hypothesis) / len(reference)

def compute_ece(confidences: np.ndarray, accuracies: np.ndarray, n_bins: int = 10) -> float:
    if len(confidences) == 0:
        return 0.0
    bin_boundaries = np.linspace(0, 1, n_bins + 1)
    ece = 0.0
    for i in range(n_bins):
        bin_lower = bin_boundaries[i]
        bin_upper = bin_boundaries[i + 1]
        in_bin = (confidences > bin_lower) & (confidences <= bin_upper)
        prop_in_bin = in_bin.mean()
        if prop_in_bin > 0:
            accuracy_in_bin = accuracies[in_bin].mean()
            avg_confidence_in_bin = confidences[in_bin].mean()
            ece += np.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin
    return ece

def load_test_samples(dataset_path, max_samples):
    dataset_path = Path(dataset_path)
    tsv_file = dataset_path / "other.tsv"
    
    if not tsv_file.exists():
        raise FileNotFoundError(f"other.tsv not found at {tsv_file}")
    
    samples = []
    with open(tsv_file, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f, delimiter='\t')
        for i, row in enumerate(reader):
            if i >= max_samples:
                break
            audio_path = dataset_path / "clips" / row['path']
            if audio_path.exists():
                samples.append({
                    'audio_id': row['path'],
                    'audio_path': str(audio_path),
                    'reference': row['sentence'],
                    'duration': 0.0
                })
    return samples

def evaluate_model(asr_wrapper, model_name, test_samples):
    results = []
    try:
        for sample in tqdm(test_samples, desc=model_name):
            try:
                word_probs = asr_wrapper.word_probabilities(sample['audio_path'], model_name)
                hypothesis = ' '.join([w for w, p in word_probs])
                reference = sample['reference']
                wer = compute_wer(reference, hypothesis)
                cer = compute_cer(reference, hypothesis)
                avg_conf = np.mean([p for w, p in word_probs]) if word_probs else 0.0
                ref_words = reference.split()
                confidences = [p for w, p in word_probs]
                accuracies = [1.0 if i < len(ref_words) and w == ref_words[i] else 0.0 
                             for i, (w, p) in enumerate(word_probs)]
                ece = compute_ece(np.array(confidences), np.array(accuracies)) if confidences else 0.0
                results.append({
                    'audio_id': sample['audio_id'],
                    'model_name': model_name,
                    'reference': reference,
                    'hypothesis': hypothesis,
                    'wer': wer,
                    'cer': cer,
                    'avg_confidence': avg_conf,
                    'ece': ece,
                    'duration': sample['duration']
                })
            except Exception as e:
                print(f"\nError on {sample['audio_id']}: {str(e)}")
                continue
    finally:
        asr_wrapper._cleanup()
    
    return results

def generate_plots(df, output_dir):
    output_dir = Path(output_dir)
    
    plt.figure(figsize=(10, 6))
    model_wer = df.groupby('model_name')['wer'].mean().sort_values()
    plt.barh(model_wer.index, model_wer.values, color='steelblue')
    plt.xlabel('Word Error Rate (WER)')
    plt.title('Model Comparison: Average WER', fontweight='bold')
    plt.grid(axis='x', alpha=0.3)
    plt.tight_layout()
    plt.savefig(output_dir / 'wer_comparison.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    plt.figure(figsize=(12, 6))
    df.boxplot(column='wer', by='model_name')
    plt.ylabel('WER')
    plt.title('WER Distribution by Model', fontweight='bold')
    plt.suptitle('')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(output_dir / 'wer_distribution.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    plt.figure(figsize=(10, 6))
    model_ece = df.groupby('model_name')['ece'].mean().sort_values()
    plt.barh(model_ece.index, model_ece.values, color='coral')
    plt.xlabel('Expected Calibration Error (ECE)')
    plt.title('Confidence Calibration by Model', fontweight='bold')
    plt.grid(axis='x', alpha=0.3)
    plt.tight_layout()
    plt.savefig(output_dir / 'calibration.png', dpi=300, bbox_inches='tight')
    plt.close()

from flask import Flask, request, jsonify, send_file, send_from_directory
from flask_cors import CORS
from werkzeug.utils import secure_filename

app = Flask(__name__)
CORS(app)

UPLOAD_FOLDER = '/kaggle/working/uploads'
RESULTS_FOLDER = '/kaggle/working/results'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(RESULTS_FOLDER, exist_ok=True)

jobs = {}

@app.route('/')
def index():
    return send_file('/kaggle/working/index.html')

@app.route('/api/models', methods=['GET'])
def get_models():
    return jsonify({
        'models': list(UrduASRWrapper.SUPPORTED_MODELS.keys())
    })

@app.route('/api/upload', methods=['POST'])
def upload_dataset():
    if 'file' not in request.files:
        return jsonify({'error': 'No file provided'}), 400
    
    file = request.files['file']
    if file.filename == '':
        return jsonify({'error': 'No file selected'}), 400
    
    job_id = str(uuid.uuid4())
    job_dir = os.path.join(UPLOAD_FOLDER, job_id)
    os.makedirs(job_dir, exist_ok=True)
    
    filename = secure_filename(file.filename)
    filepath = os.path.join(job_dir, filename)
    file.save(filepath)
    
    extract_dir = os.path.join(job_dir, 'dataset')
    os.makedirs(extract_dir, exist_ok=True)
    
    try:
        if filename.endswith('.zip'):
            with zipfile.ZipFile(filepath, 'r') as zip_ref:
                zip_ref.extractall(extract_dir)
        else:
            return jsonify({'error': 'Only ZIP files supported'}), 400
        
        ur_path = None
        for root, dirs, files in os.walk(extract_dir):
            if 'ur' in dirs:
                potential_path = os.path.join(root, 'ur')
                if os.path.exists(os.path.join(potential_path, 'other.tsv')):
                    ur_path = potential_path
                    break
        
        if not ur_path:
            return jsonify({'error': 'Could not find ur/other.tsv in dataset'}), 400
        
        clips_path = os.path.join(ur_path, 'clips')
        if not os.path.exists(clips_path):
            return jsonify({'error': 'clips/ folder not found in ur/'}), 400
        
        jobs[job_id] = {
            'status': 'uploaded',
            'dataset_path': ur_path,
            'progress': 0,
            'current_model': None,
            'results': None,
            'error': None
        }
        
        return jsonify({
            'job_id': job_id,
            'dataset_path': ur_path,
            'message': 'Dataset uploaded successfully'
        })
    
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/evaluate', methods=['POST'])
def start_evaluation():
    data = request.json
    job_id = data.get('job_id')
    models = data.get('models', [])
    max_samples = data.get('max_samples', 10)
    use_fp16 = data.get('use_fp16', True)
    
    if not job_id or job_id not in jobs:
        return jsonify({'error': 'Invalid job_id'}), 400
    
    if not models:
        return jsonify({'error': 'No models selected'}), 400
    
    if jobs[job_id]['status'] != 'uploaded':
        return jsonify({'error': 'Job already running or completed'}), 400
    
    jobs[job_id]['status'] = 'running'
    jobs[job_id]['models'] = models
    jobs[job_id]['max_samples'] = max_samples
    
    thread = threading.Thread(
        target=run_evaluation,
        args=(job_id, models, max_samples, use_fp16)
    )
    thread.daemon = True
    thread.start()
    
    return jsonify({
        'job_id': job_id,
        'message': 'Evaluation started'
    })

def run_evaluation(job_id, models, max_samples, use_fp16):
    try:
        dataset_path = jobs[job_id]['dataset_path']
        result_dir = os.path.join(RESULTS_FOLDER, job_id)
        os.makedirs(result_dir, exist_ok=True)
        
        jobs[job_id]['progress'] = 5
        test_samples = load_test_samples(dataset_path, max_samples)
        
        jobs[job_id]['progress'] = 10
        
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        asr_wrapper = UrduASRWrapper(device=device, use_fp16=use_fp16 and device == 'cuda')
        
        all_results = []
        total_models = len(models)
        
        for idx, model in enumerate(models):
            jobs[job_id]['current_model'] = model
            progress_base = 10 + (idx * 80 // total_models)
            jobs[job_id]['progress'] = progress_base
            
            model_results = evaluate_model(asr_wrapper, model, test_samples)
            all_results.extend(model_results)
            
            jobs[job_id]['progress'] = progress_base + (80 // total_models)
        
        asr_wrapper.clear_audio_cache()
        
        if len(all_results) == 0:
            raise ValueError("No results generated - all samples failed")
        
        df = pd.DataFrame(all_results)
        
        df.to_csv(os.path.join(result_dir, 'detailed_results.csv'), index=False, encoding='utf-8')
        
        aggregate = df.groupby('model_name').agg({
            'wer': ['mean', 'std', 'min', 'max'],
            'cer': ['mean', 'std'],
            'avg_confidence': ['mean', 'std'],
            'ece': ['mean', 'std']
        }).round(4)
        
        aggregate.columns = ['_'.join(col).strip() for col in aggregate.columns.values]
        aggregate = aggregate.reset_index()
        
        aggregate.to_csv(os.path.join(result_dir, 'aggregate_metrics.csv'), index=False)
        
        jobs[job_id]['progress'] = 95
        generate_plots(df, result_dir)
        
        aggregate_dict = {}
        for _, row in aggregate.iterrows():
            model = row['model_name']
            aggregate_dict[model] = {
                'wer_mean': float(row['wer_mean']),
                'wer_std': float(row['wer_std']),
                'wer_min': float(row['wer_min']),
                'wer_max': float(row['wer_max']),
                'cer_mean': float(row['cer_mean']),
                'cer_std': float(row['cer_std']),
                'avg_confidence_mean': float(row['avg_confidence_mean']),
                'avg_confidence_std': float(row['avg_confidence_std']),
                'ece_mean': float(row['ece_mean']),
                'ece_std': float(row['ece_std'])
            }
        
        results_json = {
            'detailed': df.to_dict('records'),
            'aggregate': aggregate_dict,
            'best_model': df.groupby('model_name')['wer'].mean().idxmin(),
            'best_wer': float(df.groupby('model_name')['wer'].mean().min())
        }
        
        with open(os.path.join(result_dir, 'results.json'), 'w') as f:
            json.dump(results_json, f)
        
        jobs[job_id]['status'] = 'completed'
        jobs[job_id]['progress'] = 100
        jobs[job_id]['results'] = results_json
        jobs[job_id]['result_dir'] = result_dir
        
    except Exception as e:
        jobs[job_id]['status'] = 'failed'
        jobs[job_id]['error'] = str(e)
        print(f"Error in evaluation: {str(e)}")

@app.route('/api/status/<job_id>', methods=['GET'])
def get_status(job_id):
    if job_id not in jobs:
        return jsonify({'error': 'Job not found'}), 404
    
    job = jobs[job_id]
    return jsonify({
        'status': job['status'],
        'progress': job['progress'],
        'current_model': job.get('current_model'),
        'error': job.get('error')
    })

@app.route('/api/results/<job_id>', methods=['GET'])
def get_results(job_id):
    if job_id not in jobs:
        return jsonify({'error': 'Job not found'}), 404
    
    job = jobs[job_id]
    if job['status'] != 'completed':
        return jsonify({'error': 'Job not completed'}), 400
    
    return jsonify(job['results'])

@app.route('/api/plot/<job_id>/<plot_name>', methods=['GET'])
def get_plot(job_id, plot_name):
    if job_id not in jobs:
        return jsonify({'error': 'Job not found'}), 404
    
    job = jobs[job_id]
    if job['status'] != 'completed':
        return jsonify({'error': 'Job not completed'}), 400
    
    plot_path = os.path.join(job['result_dir'], plot_name)
    if not os.path.exists(plot_path):
        return jsonify({'error': 'Plot not found'}), 404
    
    return send_file(plot_path, mimetype='image/png')

@app.route('/api/download/<job_id>', methods=['GET'])
def download_results(job_id):
    if job_id not in jobs:
        return jsonify({'error': 'Job not found'}), 404
    
    job = jobs[job_id]
    if job['status'] != 'completed':
        return jsonify({'error': 'Job not completed'}), 400
    
    csv_path = os.path.join(job['result_dir'], 'detailed_results.csv')
    return send_file(csv_path, as_attachment=True, download_name='results.csv')

if __name__ == '__main__':
    from pyngrok import ngrok, conf
    
    NGROK_TOKEN = "33ehiLEKpwJ4WB7GRmFnoy1NZOm_3kVFGQFf1z4NCDncvXHQa"
    
    conf.get_default().auth_token = NGROK_TOKEN
    
    public_url = ngrok.connect(5000)
    print(f"\n{'='*80}")
    print(f"🚀 Flask API Running")
    print(f"{'='*80}")
    print(f"Public URL: {public_url}")
    print(f"{'='*80}\n")
    
    app.run(port=5000, debug=False)