In [None]:
# Install Kaggle CLI
!pip install kaggle

import os
os.makedirs("/root/.kaggle/", exist_ok=True)

# Move kaggle.json to the correct directory
import shutil
shutil.move("kaggle.json", "/root/.kaggle/")

# Download the dataset
!kaggle datasets download -d asupreethgupta/sandalwood-kanada

# Unzip the dataset
!unzip sandalwood-kanada.zip -d /content/sandalwood_dataset

Dataset URL: https://www.kaggle.com/datasets/asupreethgupta/sandalwood-kanada
License(s): unknown
Downloading sandalwood-kanada.zip to /content
 99% 624M/629M [00:04<00:00, 151MB/s]
100% 629M/629M [00:04<00:00, 152MB/s]
Archive:  sandalwood-kanada.zip
  inflating: /content/sandalwood_dataset/SandalWoodNewsStories_1.mp3  
  inflating: /content/sandalwood_dataset/SandalWoodNewsStories_107.mp3  
  inflating: /content/sandalwood_dataset/SandalWoodNewsStories_112.mp3  
  inflating: /content/sandalwood_dataset/SandalWoodNewsStories_144.mp3  
  inflating: /content/sandalwood_dataset/SandalWoodNewsStories_146.mp3  
  inflating: /content/sandalwood_dataset/SandalWoodNewsStories_148.mp3  
  inflating: /content/sandalwood_dataset/SandalWoodNewsStories_156.mp3  
  inflating: /content/sandalwood_dataset/SandalWoodNewsStories_158.mp3  
  inflating: /content/sandalwood_dataset/SandalWoodNewsStories_159.mp3  
  inflating: /content/sandalwood_dataset/SandalWoodNewsStories_167.mp3  
  inflating: /conten

In [None]:
# !huggingface-cli login

In [None]:
# !pip uninstall -y tensorflow
# !pip install tensorflow-cpu


In [None]:
import torch
import torchaudio
import numpy as np
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import sqlite3
import os
import json
from tqdm import tqdm
import soundfile as sf
from typing import List, Dict
import logging

logging.basicConfig(level=logging.ERROR)


In [None]:
from huggingface_hub import login
login("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")


In [None]:
import os
import torch
import torchaudio
import pandas as pd
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline, Wav2Vec2Processor, Wav2Vec2ForCTC


class KannadaSpeechQA:
    def __init__(self, transcription_csv: str, device: str = None):
        self.transcription_csv = transcription_csv
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.processor = Wav2Vec2Processor.from_pretrained("amoghsgopadi/wav2vec2-large-xlsr-kn")
        self.transcriber_model = Wav2Vec2ForCTC.from_pretrained("amoghsgopadi/wav2vec2-large-xlsr-kn").to(self.device)
        self.embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
        self.qa_model = pipeline("question-answering", model="Sindhu/muril-large-squad2")

        # Load transcriptions from CSV
        self.transcriptions = pd.read_csv(self.transcription_csv, encoding="utf-8")

    def transcribe_audio(self, audio_path: str) -> str:
        waveform, sample_rate = torchaudio.load(audio_path)
        if waveform.shape[0] > 1:  # Convert stereo to mono
            waveform = torch.mean(waveform, dim=0, keepdim=True)
        if sample_rate != 16000:  # Resample to 16kHz
            resampler = torchaudio.transforms.Resample(sample_rate, 16000)
            waveform = resampler(waveform)
        inputs = self.processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt", padding=True).to(self.device)
        with torch.no_grad():
            logits = self.transcriber_model(inputs.input_values).logits
        predicted_ids = torch.argmax(logits, dim=-1)
        transcription = self.processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
        print(f"Transcribed Question: {transcription}")
        return transcription

    def search_answers(self, question: str, top_k: int = 5) -> list:
        question_embedding = self.embedding_model.encode(question)

        results = []
        for _, row in self.transcriptions.iterrows():
            context = str(row["Transcription"])
            context_embedding = self.embedding_model.encode(context)
            similarity = cosine_similarity(
                question_embedding.reshape(1, -1),
                context_embedding.reshape(1, -1)
            )[0][0]
            results.append({
                "file": row["File"],
                "chunk_index": row["Chunk_Index"],
                "transcription": context,
                "similarity": similarity,
            })

        # Sort results by similarity
        sorted_results = sorted(results, key=lambda x: x["similarity"], reverse=True)[:top_k]

        answers = []
        for result in sorted_results:
            qa_result = self.qa_model(question=question, context=result["transcription"])
            answers.append({
                "answer": qa_result["answer"],
                "confidence": qa_result["score"],
                "source_file": result["file"],
                "chunk_index": result["chunk_index"],
                "context": result["transcription"],
            })
        return answers

    def answer_question(self, question_audio_path: str):
        # Transcribe the question audio
        question_text = self.transcribe_audio(question_audio_path)

        # Search for answers
        answers = self.search_answers(question_text)

        # Print answers in Kannada
        print("\n‡≤â‡≤§‡≥ç‡≤§‡≤∞‡≤ó‡≤≥‡≥Å:")
        for ans in answers:
            print(f"‡≤â‡≤§‡≥ç‡≤§‡≤∞: {ans['answer']}, ‡≤®‡≤Ç‡≤¨‡≤ø‡≤ï‡≥Ü: {ans['confidence']:.2f}, ‡≤Æ‡≥Ç‡≤≤ ‡≤´‡≥à‡≤≤‡≥ç: {ans['source_file']}, ‡≤ö‡≤Ç‡≤ï‡≥ç ‡≤∏‡≥Ç‡≤ö‡≥ç‡≤Ø‡≤Ç‡≤ï: {ans['chunk_index']}")


if __name__ == "__main__":
    transcription_csv = "/content/kannada_transcriptions.csv"
    question_audio_path = "/content/queation.mp3"

    qa_system = KannadaSpeechQA(transcription_csv=transcription_csv)
    qa_system.answer_question(question_audio_path)




Transcribed Question: ‡≤ó‡≤Ç‡≤¶ ‡≤Æ‡≤∞ ‡≤Ö‡≤Ç‡≤¶‡≤∞‡≥Ü‡≤Ø‡≤®‡≥Å

‡≤â‡≤§‡≥ç‡≤§‡≤∞‡≤ó‡≤≥‡≥Å:
‡≤â‡≤§‡≥ç‡≤§‡≤∞: ‡≤Ö‡≤¶‡≥Å ‡≤§‡≤æ‡≤Ç‡≤§‡≤® ‡≤¨‡≤≤‡≥ç‡≤≤‡≤ó‡≥à‡≤ó‡≥Ä‡≤§‡≤∞‡≥Ü, ‡≤®‡≤Ç‡≤¨‡≤ø‡≤ï‡≥Ü: 0.03, ‡≤Æ‡≥Ç‡≤≤ ‡≤´‡≥à‡≤≤‡≥ç: SandalWoodNewsStories_287.mp3, ‡≤ö‡≤Ç‡≤ï‡≥ç ‡≤∏‡≥Ç‡≤ö‡≥ç‡≤Ø‡≤Ç‡≤ï: 4
‡≤â‡≤§‡≥ç‡≤§‡≤∞: ‡≤®‡≥Ç‡≤Æ‡≥Ç‡≤∞‡≥Å ‡≤∏‡≤æ‡≥à‡≤ó‡≤ø ‡≤™‡≥Ç‡≤∞‡≤ú‡≤Æ‡≥Ç‡≤∞‡≥Å‡≤∞‡≤∏‡≥Ü, ‡≤®‡≤Ç‡≤¨‡≤ø‡≤ï‡≥Ü: 0.08, ‡≤Æ‡≥Ç‡≤≤ ‡≤´‡≥à‡≤≤‡≥ç: SandalWoodNewsStories_287.mp3, ‡≤ö‡≤Ç‡≤ï‡≥ç ‡≤∏‡≥Ç‡≤ö‡≥ç‡≤Ø‡≤Ç‡≤ï: 22
‡≤â‡≤§‡≥ç‡≤§‡≤∞: ‡≤í‡≤Ç‡≤¶ ‡≤í‡≤≤‡≥ç‡≤≤‡≤ï‡≥ç‡≤∑‡≥ç ‡≤Æ‡≤ü‡≥ç‡≤ü‡≥Ü‡≤ó‡≥ç‡≤≥‡≥Ç, ‡≤®‡≤Ç‡≤¨‡≤ø‡≤ï‡≥Ü: 0.06, ‡≤Æ‡≥Ç‡≤≤ ‡≤´‡≥à‡≤≤‡≥ç: SandalWoodNewsStories_287.mp3, ‡≤ö‡≤Ç‡≤ï‡≥ç ‡≤∏‡≥Ç‡≤ö‡≥ç‡≤Ø‡≤Ç‡≤ï: 29
‡≤â‡≤§‡≥ç‡≤§‡≤∞: ‡≤®‡≥Å‡≤¶‡≥Å‡≤Æ‡≤£‡≥ç‡≤Ø‡≤§‡≥ç‡≤§‡≥Å ‡≤ó‡≤ø‡≤≥‡≤¶‡≤™‡≥ç‡≤™‡≤æ‡≤ó‡≥ç‡≤ó‡≤ø‡≤§‡≥ç‡≤§, ‡≤®‡≤Ç‡≤¨‡≤ø‡≤ï‡≥Ü: 0.02, ‡≤Æ‡≥Ç‡≤≤ ‡≤´‡≥à‡≤≤‡≥ç: SandalWoodNewsStories_287.mp3, ‡≤ö‡≤Ç‡≤ï‡≥ç ‡≤∏‡≥Ç‡≤ö‡≥ç‡≤Ø‡≤Ç‡≤ï: 42
‡≤â‡≤§‡≥ç‡≤§‡≤∞: ‡≤¶‡≥ç‡≤∞‡≥Ü ‡≤∏‡≥ç‡≤∞‡≤ø‡≤ó‡≤æ‡≤Ç‡≤¶‡≤µ‡≤ø‡≤ó‡≤≥ ‡≤π‡≤ø‡≤®‡≤æ‡≤∞‡

In [5]:
import re

def normalize_answer(s):
    """Normalize text to remove articles, punctuation, and whitespace."""
    s = s.lower()
    s = re.sub(r'\b(a|an|the)\b', ' ', s)  # Remove articles
    s = re.sub(r'[^a-zA-Z0-9\u0C80-\u0CFF]', ' ', s)  # Keep Kannada and alphanumeric
    s = ' '.join(s.split())  # Remove extra whitespace
    return s

def compute_exact_match(predicted_answers, ground_truth_answers):
    """Compute Exact Match (EM) score."""
    exact_matches = 0
    for pred, gt in zip(predicted_answers, ground_truth_answers):
        if normalize_answer(pred) == normalize_answer(gt):
            exact_matches += 1
    return exact_matches / len(ground_truth_answers) * 100

def compute_f1(predicted_answers, ground_truth_answers):
    """Compute F1 Score."""
    f1_scores = []
    for pred, gt in zip(predicted_answers, ground_truth_answers):
        pred_tokens = normalize_answer(pred).split()
        gt_tokens = normalize_answer(gt).split()

        common_tokens = set(pred_tokens) & set(gt_tokens)
        if not common_tokens:
            f1_scores.append(0)
            continue

        precision = len(common_tokens) / len(pred_tokens)
        recall = len(common_tokens) / len(gt_tokens)
        f1 = 2 * (precision * recall) / (precision + recall)
        f1_scores.append(f1)

    return sum(f1_scores) / len(f1_scores) * 100

def evaluate(predicted_answers, ground_truth_answers):
    """Evaluate predictions with Exact Match and F1 metrics."""
    em_score = compute_exact_match(predicted_answers, ground_truth_answers)
    f1_score_value = compute_f1(predicted_answers, ground_truth_answers)

    print("=" * 50)
    print("üìä Evaluation Metrics".center(50))
    print("=" * 50)
    print(f"üîç Exact Match (EM): {em_score:.2f}%")
    print(f"üèÜ F1 Score: {f1_score_value:.2f}%")
    print("=" * 50)
    print(f"{'Prediction':<30} | {'Ground Truth':<30} | {'Match'}")
    print("-" * 50)

    for pred, gt in zip(predicted_answers, ground_truth_answers):
        normalized_pred = normalize_answer(pred)
        normalized_gt = normalize_answer(gt)
        match = "‚úÖ" if normalized_pred == normalized_gt else "‚ùå"
        print(f"{pred:<30} | {gt:<30} | {match}")

    print("=" * 50)
    return em_score, f1_score_value

# Example Usage
if __name__ == "__main__":
    # Sample predicted answers and ground truth answers
    predicted_answers = [
        "‡≤ï‡≤≥‡≥Ü‡≤¶ ‡≤µ‡≤æ‡≤∞‡≤¶ ‡≤µ‡≤∞‡≤¶‡≤ø",
        "‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤ï‡≥ã‡≤°‡≥ç ‡≤â‡≤§‡≥ç‡≤§‡≤Æ‡≤µ‡≤æ‡≤ó‡≤ø‡≤¶‡≥Ü",
        "‡≤Ö‡≤µ‡≤∞‡≥Å ‡≤∏‡≤Ç‡≤ú‡≥Ü ‡≤¨‡≤Ç‡≤¶‡≥Å ‡≤π‡≥ã‡≤ó‡≤ø‡≤¶‡≥ç‡≤¶‡≤∞‡≥Å",
        "‡≤∂‡≤ø‡≤ï‡≥ç‡≤∑‡≤ï‡≤∞ ‡≤∏‡≤≠‡≥Ü ‡≤∏‡≥ã‡≤Æ‡≤µ‡≤æ‡≤∞ ‡≤®‡≤°‡≥Ü‡≤Ø‡≤ø‡≤§‡≥Å",
        "‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤π‡≤æ‡≤≤‡≥Å ‡≤§‡≤ø‡≤®‡≥ç‡≤®‡≥Å‡≤µ ‡≤¨‡≤ó‡≥ç‡≤ó‡≥Ü‡≤Ø‡≥Ü‡≤®‡≥ç‡≤®‡≤ø‡≤∏‡≥Å‡≤§‡≥ç‡≤§‡≤ø‡≤¶‡≥Ü",
        "‡≤Ö‡≤µ‡≤∞‡≥Å ‡≤®‡≤®‡≤ó‡≥Ü ‡≤ü‡≤ø‡≤ï‡≥Ü‡≤ü‡≥ç ‡≤®‡≥Ä‡≤°‡≤ø‡≤¶‡≤∞‡≥Å",
        "‡≤™‡≥ç‡≤∞‡≤Ø‡≤æ‡≤£ ‡≤â‡≤§‡≥ç‡≤§‡≤Æ‡≤µ‡≤æ‡≤ó‡≤ø‡≤§‡≥ç‡≤§‡≥Å",
        "‡≤Ö‡≤µ‡≤∞‡≥Å ‡≤Æ‡≤®‡≥Ü‡≤Ø‡≤≤‡≥ç‡≤≤‡≤ø‡≤≤‡≥ç‡≤≤",
        "‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤Ø‡≥ã‡≤ú‡≤®‡≥Ü ‡≤§‡≤§‡≥ç‡≤§‡≥ç‡≤µ‡≤æ‡≤§‡≥ç‡≤Æ‡≤ï‡≤µ‡≤æ‡≤ó‡≤ø‡≤¶‡≥Ü",
        "‡≤®‡≤æ‡≤®‡≥Å ‡≤™‡≥Å‡≤∏‡≥ç‡≤§‡≤ï‡≤µ‡≤®‡≥ç‡≤®‡≥Å ‡≤ì‡≤¶‡≤ø‡≤¶‡≥Ü"
    ]

    ground_truth_answers = [
        "‡≤ï‡≤≥‡≥Ü‡≤¶ ‡≤µ‡≤æ‡≤∞‡≤¶ ‡≤µ‡≤∞‡≤¶‡≤ø",
        "‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤ï‡≥ã‡≤°‡≥ç ‡≤â‡≤§‡≥ç‡≤§‡≤Æ‡≤µ‡≤æ‡≤ó‡≤ø‡≤¶‡≥Ü",
        "‡≤Ö‡≤µ‡≤∞‡≥Å ‡≤∏‡≤Ç‡≤ú‡≥Ü ‡≤¨‡≤Ç‡≤¶‡≤ø‡≤¶‡≥ç‡≤¶‡≤∞‡≥Å",
        "‡≤∂‡≤ø‡≤ï‡≥ç‡≤∑‡≤ï‡≤∞ ‡≤∏‡≤≠‡≥Ü ‡≤∏‡≥ã‡≤Æ‡≤µ‡≤æ‡≤∞ ‡≤®‡≤°‡≥Ü‡≤Ø‡≤ø‡≤§‡≥Å",
        "‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤π‡≤æ‡≤≤‡≥Å ‡≤§‡≤ø‡≤®‡≥ç‡≤®‡≥Å‡≤µ ‡≤¨‡≤ó‡≥ç‡≤ó‡≥Ü ‡≤™‡≥ç‡≤∞‡≤∂‡≥ç‡≤®‡≥Ü ‡≤ï‡≥á‡≤≥‡≤ø‡≤¶‡≥ç‡≤¶‡≤∞‡≥Å",
        "‡≤Ö‡≤µ‡≤∞‡≥Å ‡≤®‡≤®‡≤ó‡≥Ü ‡≤ü‡≤ø‡≤ï‡≥Ü‡≤ü‡≥ç ‡≤ï‡≥ä‡≤ü‡≥ç‡≤ü‡≤∞‡≥Å",
        "‡≤™‡≥ç‡≤∞‡≤Ø‡≤æ‡≤£ ‡≤â‡≤§‡≥ç‡≤§‡≤Æ‡≤µ‡≤æ‡≤ó‡≤ø‡≤§‡≥ç‡≤§‡≥Å",
        "‡≤Ö‡≤µ‡≤∞‡≥Å ‡≤Æ‡≤®‡≥Ü‡≤Ø‡≤≤‡≥ç‡≤≤‡≤ø‡≤≤‡≥ç‡≤≤",
        "‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤Ø‡≥ã‡≤ú‡≤®‡≥Ü ‡≤§‡≤§‡≥ç‡≤§‡≥ç‡≤µ‡≤æ‡≤§‡≥ç‡≤Æ‡≤ï‡≤µ‡≤æ‡≤ó‡≤ø‡≤¶‡≥Ü",
        "‡≤®‡≤æ‡≤®‡≥Å ‡≤™‡≥Å‡≤∏‡≥ç‡≤§‡≤ï ‡≤ì‡≤¶‡≤ø‡≤¶‡≥ç‡≤¶‡≥á‡≤®‡≥Ü"
    ]

    # Evaluate
    evaluate(predicted_answers, ground_truth_answers)


               üìä Evaluation Metrics               
üîç Exact Match (EM): 60.00%
üèÜ F1 Score: 82.55%
Prediction                     | Ground Truth                   | Match
--------------------------------------------------
‡≤ï‡≤≥‡≥Ü‡≤¶ ‡≤µ‡≤æ‡≤∞‡≤¶ ‡≤µ‡≤∞‡≤¶‡≤ø                 | ‡≤ï‡≤≥‡≥Ü‡≤¶ ‡≤µ‡≤æ‡≤∞‡≤¶ ‡≤µ‡≤∞‡≤¶‡≤ø                 | ‚úÖ
‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤ï‡≥ã‡≤°‡≥ç ‡≤â‡≤§‡≥ç‡≤§‡≤Æ‡≤µ‡≤æ‡≤ó‡≤ø‡≤¶‡≥Ü         | ‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤ï‡≥ã‡≤°‡≥ç ‡≤â‡≤§‡≥ç‡≤§‡≤Æ‡≤µ‡≤æ‡≤ó‡≤ø‡≤¶‡≥Ü         | ‚úÖ
‡≤Ö‡≤µ‡≤∞‡≥Å ‡≤∏‡≤Ç‡≤ú‡≥Ü ‡≤¨‡≤Ç‡≤¶‡≥Å ‡≤π‡≥ã‡≤ó‡≤ø‡≤¶‡≥ç‡≤¶‡≤∞‡≥Å       | ‡≤Ö‡≤µ‡≤∞‡≥Å ‡≤∏‡≤Ç‡≤ú‡≥Ü ‡≤¨‡≤Ç‡≤¶‡≤ø‡≤¶‡≥ç‡≤¶‡≤∞‡≥Å            | ‚ùå
‡≤∂‡≤ø‡≤ï‡≥ç‡≤∑‡≤ï‡≤∞ ‡≤∏‡≤≠‡≥Ü ‡≤∏‡≥ã‡≤Æ‡≤µ‡≤æ‡≤∞ ‡≤®‡≤°‡≥Ü‡≤Ø‡≤ø‡≤§‡≥Å     | ‡≤∂‡≤ø‡≤ï‡≥ç‡≤∑‡≤ï‡≤∞ ‡≤∏‡≤≠‡≥Ü ‡≤∏‡≥ã‡≤Æ‡≤µ‡≤æ‡≤∞ ‡≤®‡≤°‡≥Ü‡≤Ø‡≤ø‡≤§‡≥Å     | ‚úÖ
‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤π‡≤æ‡≤≤‡≥Å ‡≤§‡≤ø‡≤®‡≥ç‡≤®‡≥Å‡≤µ ‡≤¨‡≤ó‡≥ç‡≤ó‡≥Ü‡≤Ø‡≥Ü‡≤®‡≥ç‡≤®‡≤ø‡≤∏‡≥Å‡≤§‡≥ç‡≤§‡≤ø‡≤¶‡≥Ü | ‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤π‡≤æ‡≤≤‡≥Å ‡≤§‡≤ø‡≤®‡≥ç‡≤®‡≥Å‡≤µ ‡≤¨‡≤ó‡≥ç‡≤ó‡≥Ü ‡≤™‡≥ç‡≤∞‡≤∂‡≥ç‡≤

In [3]:
import numpy as np
from jiwer import wer

def calculate_wer(predicted_answers, ground_truth_answers):
    """
    Calculate the Word Error Rate (WER) for a set of predicted answers and ground truth answers.

    Args:
        predicted_answers (list of str): List of predicted answers.
        ground_truth_answers (list of str): List of ground truth answers.

    Returns:
        float: Average WER across all samples.
    """
    total_wer = []
    for pred, gt in zip(predicted_answers, ground_truth_answers):
        # Calculate WER for each pair
        error = wer(gt, pred)
        print(f"Ground Truth: {gt}")
        print(f"Predicted: {pred}")
        print(f"WER: {error:.2f}")
        total_wer.append(error)

    # Average WER across all samples
    avg_wer = np.mean(total_wer)
    print(f"\nAverage WER: {avg_wer:.2f}")
    return avg_wer

if __name__ == "__main__":
    # Sample predicted answers and ground truth answers
    predicted_answers = [
        "‡≤ï‡≤≥‡≥Ü‡≤¶ ‡≤µ‡≤æ‡≤∞‡≤¶ ‡≤µ‡≤∞‡≤¶‡≤ø",
        "‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤ï‡≥ã‡≤°‡≥ç ‡≤â‡≤§‡≥ç‡≤§‡≤Æ‡≤µ‡≤æ‡≤ó‡≤ø‡≤¶‡≥Ü",
        "‡≤Ö‡≤µ‡≤∞‡≥Å ‡≤∏‡≤Ç‡≤ú‡≥Ü ‡≤¨‡≤Ç‡≤¶‡≥Å ‡≤π‡≥ã‡≤ó‡≤ø‡≤¶‡≥ç‡≤¶‡≤∞‡≥Å",
        "‡≤∂‡≤ø‡≤ï‡≥ç‡≤∑‡≤ï‡≤∞ ‡≤∏‡≤≠‡≥Ü ‡≤∏‡≥ã‡≤Æ‡≤µ‡≤æ‡≤∞ ‡≤®‡≤°‡≥Ü‡≤Ø‡≤ø‡≤§‡≥Å",
        "‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤π‡≤æ‡≤≤‡≥Å ‡≤§‡≤ø‡≤®‡≥ç‡≤®‡≥Å‡≤µ ‡≤¨‡≤ó‡≥ç‡≤ó‡≥Ü‡≤Ø‡≥Ü‡≤®‡≥ç‡≤®‡≤ø‡≤∏‡≥Å‡≤§‡≥ç‡≤§‡≤¶‡≥Ü",
    ]

    ground_truth_answers = [
        "‡≤ï‡≤≥‡≥Ü‡≤¶ ‡≤µ‡≤æ‡≤∞‡≤¶ ‡≤µ‡≤∞‡≤¶‡≤ø",
        "‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤ï‡≥ã‡≤°‡≥ç ‡≤â‡≤§‡≥ç‡≤§‡≤Æ‡≤µ‡≤æ‡≤ó‡≤ø‡≤¶‡≥Ü",
        "‡≤Ö‡≤µ‡≤∞‡≥Å ‡≤∏‡≤Ç‡≤ú‡≥Ü ‡≤¨‡≤Ç‡≤¶‡≤ø‡≤¶‡≥ç‡≤¶‡≤∞‡≥Å",
        "‡≤∂‡≤ø‡≤ï‡≥ç‡≤∑‡≤ï‡≤∞ ‡≤∏‡≤≠‡≥Ü ‡≤∏‡≥ã‡≤Æ‡≤µ‡≤æ‡≤∞ ‡≤®‡≤°‡≥Ü‡≤Ø‡≤ø‡≤§‡≥Å",
        "‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤π‡≤æ‡≤≤‡≥Å ‡≤§‡≤ø‡≤®‡≥ç‡≤®‡≥Å‡≤µ ‡≤¨‡≤ó‡≥ç‡≤ó‡≥Ü ‡≤™‡≥ç‡≤∞‡≤∂‡≥ç‡≤®‡≥Ü ‡≤ï‡≥á‡≤≥‡≤ø‡≤¶‡≤∞‡≥Å",
    ]

    calculate_wer(predicted_answers, ground_truth_answers)


Ground Truth: ‡≤ï‡≤≥‡≥Ü‡≤¶ ‡≤µ‡≤æ‡≤∞‡≤¶ ‡≤µ‡≤∞‡≤¶‡≤ø
Predicted: ‡≤ï‡≤≥‡≥Ü‡≤¶ ‡≤µ‡≤æ‡≤∞‡≤¶ ‡≤µ‡≤∞‡≤¶‡≤ø
WER: 0.00
Ground Truth: ‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤ï‡≥ã‡≤°‡≥ç ‡≤â‡≤§‡≥ç‡≤§‡≤Æ‡≤µ‡≤æ‡≤ó‡≤ø‡≤¶‡≥Ü
Predicted: ‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤ï‡≥ã‡≤°‡≥ç ‡≤â‡≤§‡≥ç‡≤§‡≤Æ‡≤µ‡≤æ‡≤ó‡≤ø‡≤¶‡≥Ü
WER: 0.00
Ground Truth: ‡≤Ö‡≤µ‡≤∞‡≥Å ‡≤∏‡≤Ç‡≤ú‡≥Ü ‡≤¨‡≤Ç‡≤¶‡≤ø‡≤¶‡≥ç‡≤¶‡≤∞‡≥Å
Predicted: ‡≤Ö‡≤µ‡≤∞‡≥Å ‡≤∏‡≤Ç‡≤ú‡≥Ü ‡≤¨‡≤Ç‡≤¶‡≥Å ‡≤π‡≥ã‡≤ó‡≤ø‡≤¶‡≥ç‡≤¶‡≤∞‡≥Å
WER: 0.67
Ground Truth: ‡≤∂‡≤ø‡≤ï‡≥ç‡≤∑‡≤ï‡≤∞ ‡≤∏‡≤≠‡≥Ü ‡≤∏‡≥ã‡≤Æ‡≤µ‡≤æ‡≤∞ ‡≤®‡≤°‡≥Ü‡≤Ø‡≤ø‡≤§‡≥Å
Predicted: ‡≤∂‡≤ø‡≤ï‡≥ç‡≤∑‡≤ï‡≤∞ ‡≤∏‡≤≠‡≥Ü ‡≤∏‡≥ã‡≤Æ‡≤µ‡≤æ‡≤∞ ‡≤®‡≤°‡≥Ü‡≤Ø‡≤ø‡≤§‡≥Å
WER: 0.00
Ground Truth: ‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤π‡≤æ‡≤≤‡≥Å ‡≤§‡≤ø‡≤®‡≥ç‡≤®‡≥Å‡≤µ ‡≤¨‡≤ó‡≥ç‡≤ó‡≥Ü ‡≤™‡≥ç‡≤∞‡≤∂‡≥ç‡≤®‡≥Ü ‡≤ï‡≥á‡≤≥‡≤ø‡≤¶‡≤∞‡≥Å
Predicted: ‡≤®‡≤ø‡≤Æ‡≥ç‡≤Æ ‡≤π‡≤æ‡≤≤‡≥Å ‡≤§‡≤ø‡≤®‡≥ç‡≤®‡≥Å‡≤µ ‡≤¨‡≤ó‡≥ç‡≤ó‡≥Ü‡≤Ø‡≥Ü‡≤®‡≥ç‡≤®‡≤ø‡≤∏‡≥Å‡≤§‡≥ç‡≤§‡≤¶‡≥Ü
WER: 0.50

Average WER: 0.23
