# Import required Libraries

In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torchaudio
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import classification_report
from transformers import Wav2Vec2Model, Wav2Vec2Processor, HubertModel
from speechbrain.pretrained import SpeakerRecognition

  from speechbrain.pretrained import SpeakerRecognition


# Path Variables

In [2]:
root_path = "../../data/Speech-to-Text/"
mix_selection = "Hindi-English_test" # "Bengali-English_test", "Hindi-English_test"
segment_file = os.path.join(root_path, mix_selection, "test/transcripts/segments")
transcript_file = os.path.join(root_path, mix_selection, "test/transcripts/text")
file_directory = os.path.join(root_path, mix_selection, "test")
ground_truth_csv = os.path.join(root_path, mix_selection + "_test.csv")

# Ground Truth Preparation

In [3]:
if not os.path.exists(ground_truth_csv):

    with open(segment_file, "r+") as read_file:
        lines = read_file.read().split("\n")
    
    speaker_id, utt_id, file_id, start, end = [], [], [], [], [] 
    for line in lines:
        words = line.strip().split()
        if len(words) == 4:
            speaker_id.append(words[0].split("_")[0])
            utt_id.append(words[0])
            file_id.append(words[1])
            start.append(words[2])
            end.append(words[3])
    
    df1 = pd.DataFrame.from_dict(
        {
            "speaker_id": speaker_id,
            "utt_id": utt_id,
            "file_id": file_id,
            "start": start,
            "end": end
        }
    )

    with open(transcript_file, "r+") as read_file:
        lines = read_file.read().split("\n")
    
    utt_id, transript = [], []
    for line in lines:
        words = line.strip().split()
        if len(words) >= 2:
            utt_id.append(words[0])
            transript.append(" ".join(words[1:]))
    
    df2 = pd.DataFrame.from_dict(
        {
            "utt_id": utt_id,
            "transript": transript,
        }
    )

    df = df1.merge(df2, on=["utt_id"])

    df["file_path"] = df.file_id.apply(lambda x: os.path.join(file_directory, x + ".wav")) 
    
    df.to_csv(ground_truth_csv, index=False)

else:
    df = pd.read_csv(ground_truth_csv)

In [4]:
df

Unnamed: 0,speaker_id,utt_id,file_id,start,end,transript,file_path,chunk_path,Whisper-Base,Whisper-Medium,Whisper-Large-v2,Wav2Vec2-Large
0,103085,103085_w5Jyq3XMbb3WwiKQ_0000,w5Jyq3XMbb3WwiKQ,0.0,8.00,लिबर ऑफिस impress में एक प्रस्तुति document बन...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,"Libber Office Imprasme, 1 Prasthuti Document ...",لیبر آفس ایمپریس میں ایک پرستوٹی ڈاکیومنٹ بنا...,لبر آفس ایمپریس میں ایک پرستوطی ڈاکیومنٹ بنان...,LIBERA OFFICE IMPRESS ME EPUSTODY DOCUMENT BAN...
1,103085,103085_w5Jyq3XMbb3WwiKQ_0001,w5Jyq3XMbb3WwiKQ,8.0,21.00,इस tutorial में हम impress window के भागों के ...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,आपान्ता भान्ता भान्ता भान्ता भान्ता भान्ता भा...,हम इंप्रैस विंडो के भागो के बारे में सीखेंगे ...,इस टीटूरल में हम इंप्रेस विंडो के भागो के बार...,OTTE ISTIDEL ME HUM IMPRESS VINDUGIB HAGOGIBAR...
2,103085,103085_w5Jyq3XMbb3WwiKQ_0002,w5Jyq3XMbb3WwiKQ,21.0,29.00,यहाँ हम अपने ऑपरेटिंग सिस्टम के रूप में gnu/li...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,یہاں ہم اپنے اور پڑڑھڑھڑھڑھڑھڑھڑھڑھڑھڑھڑھڑھڑھ...,यहां हम अपने अपरेटिंग सिस्टम के रूप में जेनू ...,यहाँ हम अपने Operating System के रूप में JNU ...,YAHAM OBNE OR BREDDING SYSTEM GEROPE ME GENU L...
3,103085,103085_w5Jyq3XMbb3WwiKQ_0003,w5Jyq3XMbb3WwiKQ,29.0,35.00,चलिए अपनी प्रस्तुति प्रेजैटेशन sample impress ...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,तो दो लो लो लो लो लो लो लो लो लो लो लो लो लो ...,चलिये अपनी प्रस्थुती सामपल इम्प्रस्थ अपन करते...,चल्ये अपनी प्रस्तुति सैंपल इम्प्लस ओपन करते ह...,JELLY ABNIPOSTUDI SAMPLE IMPLAS POPEN GARTINER...
4,103085,103085_w5Jyq3XMbb3WwiKQ_0004,w5Jyq3XMbb3WwiKQ,35.0,39.00,चलिए देखते हैं कि screen पर क्या क्या है,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,", we will see what is on the screen.",बनाया था।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।...,चलये देखते हैं कि स्क्रीन पर क्या-क्या है,EBONAIATA JELLY DE TANKEES GREENBERD KARKYAHER
...,...,...,...,...,...,...,...,...,...,...,...,...
3131,957491,957491_4xyIm2P6Xzlin341_0166,4xyIm2P6Xzlin341,866.0,872.00,अधिक जानकारी के लिए कृपया contact @spokentutor...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,دھی جنگہری کے لئے کرپیا contact at spoken hyp...,"अधिक जानकारी के लिए क्रिप्या, contact at spok...",आथिक जानकारी के लिए क्रिप्प्या Contact at spo...,TIJANGARIGILIAGRIPIA CONTACT AT SPOKEN HIGH FW...
3132,957491,957491_4xyIm2P6Xzlin341_0167,4xyIm2P6Xzlin341,872.0,876.00,spoken tutorial project talktoa teacher projec...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,"Spoken to total project, Talk to a teacher po...",spoken tutorial project talk to a teacher pro...,سپوکن ٹیٹول پروجیکٹ ڈاکٹ ور ٹیچر پویٹ کا حصہ ...,SPOKEN TO THE OLD PROJECT DOCTOR DJHEDBARD GOT...
3133,957491,957491_4xyIm2P6Xzlin341_0168,4xyIm2P6Xzlin341,876.0,884.00,यह भारत सरकार के एमएचआरडी के आईसीटी के माध्यम ...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,بھرسرگار کے MHRD کے ICT کے مادم سے راستجیہ سا...,भारा सर्गार के MHRD के ICT के माध्यम से राश्च...,भारत सरकार के MHRD के ICT के माध्यम से राष्ट्...,PARATHARGARKI EMMAJORDIKI ITITIKIM ADEMTI RASH...
3134,957491,957491_4xyIm2P6Xzlin341_0169,4xyIm2P6Xzlin341,884.0,895.00,इस mission पर अधिक जानकारी दिए गए लिंक पर उपलब...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,"In this video, I will show you how to change ...",दिये गई लिंग पर उप्लबद है। spoken-tutorial.or...,दिये गए लिंग पर उपलब्द है spoken-tutorial.org...,GADDI DIAGALING PARO PLABDHE SPOKEN HYPON TORE...


In [5]:
# Number of unique speakers
num_speakers = df["speaker_id"].nunique()
print(f" Total number of unique speakers: {num_speakers}")

# Number of files per speaker
files_per_speaker = df.groupby("speaker_id")["file_path"].count()
print("\n Number of audio segments per speaker:")
print(files_per_speaker)

 Total number of unique speakers: 30

 Number of audio segments per speaker:
speaker_id
103085     77
103725     53
118638     65
124478     81
133511     81
137494    164
146881     68
161768     86
270589     71
318923    123
323507    117
347099     69
360746     66
388577    114
408467    112
453832    114
478254    161
508088    103
521245    182
598753    114
610773     86
628028    159
656144    142
656852     83
791308    128
834022     91
847066    102
918821     48
921151    107
957491    169
Name: file_path, dtype: int64


# Speaker-Identification

In [6]:
split_folder = os.path.join(file_directory, "splits")
os.makedirs(split_folder, exist_ok=True)

### Create Chunks from Time Stamp

In [7]:
def process_chunk(row):
    try:
        audio = AudioSegment.from_file(row["file_path"])
        start_ms = int(float(row["start"]) * 1000)
        end_ms = int(float(row["end"]) * 1000)

        chunk = audio[start_ms:end_ms]

        # Define output file path
        chunk_filename = f"{row['utt_id']}.wav"
        chunk_path = os.path.join(split_folder, chunk_filename)

        # Export chunk
        chunk.export(chunk_path, format="wav")

        return chunk_path

    except Exception as e:
        print(f"Error processing {row['utt_id']}: {e}")
        return None

In [8]:
if "chunk_path" not in df.columns:
    df["chunk_path"] = df.apply(process_chunk, axis=1)
    df.to_csv(ground_truth_csv, index=False)

### Speaker Identification

In [9]:
# Pick N chunks per speaker
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

df = pd.read_csv(ground_truth_csv)
N_Chunks = 10
df_sampled = df.groupby("speaker_id").apply(lambda x: x.sample(n=min(N_Chunks, len(x)), random_state=42)).reset_index(drop=True)

  df_sampled = df.groupby("speaker_id").apply(lambda x: x.sample(n=min(N_Chunks, len(x)), random_state=42)).reset_index(drop=True)


In [10]:
# Encode speaker labels
le = LabelEncoder()
df_sampled["label_id"] = le.fit_transform(df_sampled["speaker_id"])

In [11]:
df_sampled

Unnamed: 0,speaker_id,utt_id,file_id,start,end,transript,file_path,chunk_path,Whisper-Base,Whisper-Medium,Whisper-Large-v2,Wav2Vec2-Large,label_id
0,103085,103085_w5Jyq3XMbb3WwiKQ_0004,w5Jyq3XMbb3WwiKQ,35.0,39.0,चलिए देखते हैं कि screen पर क्या क्या है,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,", we will see what is on the screen.",बनाया था।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।...,चलये देखते हैं कि स्क्रीन पर क्या-क्या है,EBONAIATA JELLY DE TANKEES GREENBERD KARKYAHER,0
1,103085,103085_w5Jyq3XMbb3WwiKQ_0035,w5Jyq3XMbb3WwiKQ,195.0,200.0,long term goal नामक slide पर double क्लिक करके...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,"LONG TUM GOAL NAMAKS LIDE PLE WELCLEKKAR, UTH...",लोंग टम गोल नामक स्लाइड पर डबर कॉलिक करके उसे...,Long term goal नमक स्लाइड पर डबर क्लिक करके उ...,HE LONG TOM GOLL NAMMOCKS LIGHT BY DEBLGLE KER...,0
2,103085,103085_w5Jyq3XMbb3WwiKQ_0010,w5Jyq3XMbb3WwiKQ,63.0,70.0,notes view प्रत्येक slide में notes जोड़ने की ...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,nodes view putt x slide میں node jordnay ke s...,notes view प्रत्यक स्लाइड में notes जोडने की ...,Notes View प्रत्यक स्लाइड में नोट जोड़ने की स...,NORT VIEW BUT THE EXLIDE MAR NOR JOR NEGITO WI...,0
3,103085,103085_w5Jyq3XMbb3WwiKQ_0000,w5Jyq3XMbb3WwiKQ,0.0,8.0,लिबर ऑफिस impress में एक प्रस्तुति document बन...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,"Libber Office Imprasme, 1 Prasthuti Document ...",لیبر آفس ایمپریس میں ایک پرستوٹی ڈاکیومنٹ بنا...,لبر آفس ایمپریس میں ایک پرستوطی ڈاکیومنٹ بنان...,LIBERA OFFICE IMPRESS ME EPUSTODY DOCUMENT BAN...,0
4,103085,103085_w5Jyq3XMbb3WwiKQ_0045,w5Jyq3XMbb3WwiKQ,254.0,259.0,इस डायलॉग बॉक्स को बंद करते हैं,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,phoant style or size set kursak te hai. iis d...,साइज सेट कर सकते हैं। इस द्यालोक बौक्स को बंद...,"फउण्ट, स्टाइल और साइज सेट कर सकते हैं। इस ड्य...",ARFORMED STYLE OR THIGH TAT GASSUCTE IF THE LO...,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,957491,957491_4xyIm2P6Xzlin341_0164,4xyIm2P6Xzlin341,858.0,862.0,spoken tutorials का उपयोग करके कार्यशालाएँ भी ...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,Team. Spoken to door ka upyokar ke kare shala...,कारेशालायें भी चलाते हैं।,Spoken Tutorial का उप्योग करके कारेशालाएं भी ...,TEM SPOKEN TO THE RUCOO PUKARKIK AR TE CHALAIN...,29
296,957491,957491_4xyIm2P6Xzlin341_0167,4xyIm2P6Xzlin341,872.0,876.0,spoken tutorial project talktoa teacher projec...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,"Spoken to total project, Talk to a teacher po...",spoken tutorial project talk to a teacher pro...,سپوکن ٹیٹول پروجیکٹ ڈاکٹ ور ٹیچر پویٹ کا حصہ ...,SPOKEN TO THE OLD PROJECT DOCTOR DJHEDBARD GOT...,29
297,957491,957491_4xyIm2P6Xzlin341_0051,4xyIm2P6Xzlin341,261.0,270.0,अब युज़र नेम stuserone@ gmailcom और फिर पासवर्...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,Now user name STUZER1 at gmail.com and then s...,अब यूजर नेम स्टी यूजर वान एट जीमेल डौट कोम और...,اب username stuser1 at gmail.com اور پھر pass...,UB YOUTHE NAME S TOUTHERVON G MILORDCOMB OLFI...,29
298,957491,957491_4xyIm2P6Xzlin341_0107,4xyIm2P6Xzlin341,560.0,569.0,मेल में टेक्स्ट hi i now have an email account...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,"Mail me text Hi, I now have an email account ...","In the mail, type the text, Hi, I now have an...",मेल में टेक्स्ट हाई आई नौ हैव एमेल अकाउंट इन ...,Y MEELME TEXT HAI I NOW HAVING E MILL ACCOUNT ...,29


In [12]:
available_models = [
    ("Wav2Vec2", "facebook/wav2vec2-large-960h", Wav2Vec2Model),
    ("HuBERT", "facebook/hubert-large-ls960-ft", HubertModel),
    ("Ecapa-tdnn", "speechbrain/spkrec-ecapa-voxceleb", None)
]

hf_token = "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"

In [13]:
def extract_embedding(model_name, model, processor, audio_path):
    waveform, sr = torchaudio.load(audio_path)

    if model_name == "Ecapa-tdnn":
        if waveform.dim() == 1:
            waveform = waveform.unsqueeze(0)  # shape: (1, num_samples)
        waveform = waveform.float()  
        lengths = torch.tensor([waveform.shape[1] / sr])
        with torch.no_grad():
            emb = model.encode_batch(waveform, lengths).squeeze().cpu().numpy()

    else:
        # Wav2Vec2 and HuBERT path
        inputs = processor(
            waveform.squeeze().cpu().numpy(),
            sampling_rate=sr,
            return_tensors="pt",
            padding=True
        )
        with torch.no_grad():
            outputs = model(**{k: v.to(DEVICE) for k, v in inputs.items()})
        emb = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()

    return emb

In [14]:
for i in range(1, 4):
    choose_model = i 
    model_name, model_id, model_class = available_models[choose_model - 1]

    # Load model and processor
    if model_name == "Wav2Vec2":
        processor = Wav2Vec2Processor.from_pretrained(model_id, token=hf_token)
        model = Wav2Vec2Model.from_pretrained(model_id, token=hf_token).to(DEVICE)
    
    elif model_name == "HuBERT":
        processor = Wav2Vec2Processor.from_pretrained(model_id, token=hf_token)
        model = HubertModel.from_pretrained(model_id, token=hf_token).to(DEVICE)
    
    elif model_name == "Ecapa-tdnn":
        model = SpeakerRecognition.from_hparams(source=model_id, savedir=f"pretrained/{model_name}")
        processor = None
    
    else:
        raise ValueError(f"Unknown model: {model_name}")
    
    # Extract embeddings
    embeddings, speaker_ids = [], []
    for _, row in tqdm(df_sampled.iterrows(), total=len(df_sampled)):
        try:
            emb = extract_embedding(model_name, model, processor, row["chunk_path"])
            embeddings.append(emb)
            speaker_ids.append(row["label_id"])
        except Exception as e:
            print(f"Error on {row['chunk_path']}: {e}")
    
    embeddings = np.stack(embeddings)
    speaker_ids = np.array(speaker_ids)
    
    # Cosine similarity
    sim_matrix = cosine_similarity(embeddings)
    np.fill_diagonal(sim_matrix, -1)  # Ignore self
    
    # Predict speaker by nearest neighbor
    pred_ids = []
    for i in range(len(embeddings)):
        nearest_idx = np.argmax(sim_matrix[i])
        pred_ids.append(speaker_ids[nearest_idx])
    df_sampled[f"{model_name}_pred"] = le.inverse_transform(pred_ids)

Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:04<00:00, 69.60it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:04<00:00, 67.63it/s]
  wrapped_fwd = torch.cuda.amp.custom_fwd(fwd, cast_inputs=cast_inputs)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:58<00:00,  5.15it/s]


In [15]:
df_sampled

Unnamed: 0,speaker_id,utt_id,file_id,start,end,transript,file_path,chunk_path,Whisper-Base,Whisper-Medium,Whisper-Large-v2,Wav2Vec2-Large,label_id,Wav2Vec2_pred,HuBERT_pred,Ecapa-tdnn_pred
0,103085,103085_w5Jyq3XMbb3WwiKQ_0004,w5Jyq3XMbb3WwiKQ,35.0,39.0,चलिए देखते हैं कि screen पर क्या क्या है,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,", we will see what is on the screen.",बनाया था।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।।...,चलये देखते हैं कि स्क्रीन पर क्या-क्या है,EBONAIATA JELLY DE TANKEES GREENBERD KARKYAHER,0,347099,918821,146881
1,103085,103085_w5Jyq3XMbb3WwiKQ_0035,w5Jyq3XMbb3WwiKQ,195.0,200.0,long term goal नामक slide पर double क्लिक करके...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,"LONG TUM GOAL NAMAKS LIDE PLE WELCLEKKAR, UTH...",लोंग टम गोल नामक स्लाइड पर डबर कॉलिक करके उसे...,Long term goal नमक स्लाइड पर डबर क्लिक करके उ...,HE LONG TOM GOLL NAMMOCKS LIGHT BY DEBLGLE KER...,0,347099,118638,347099
2,103085,103085_w5Jyq3XMbb3WwiKQ_0010,w5Jyq3XMbb3WwiKQ,63.0,70.0,notes view प्रत्येक slide में notes जोड़ने की ...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,nodes view putt x slide میں node jordnay ke s...,notes view प्रत्यक स्लाइड में notes जोडने की ...,Notes View प्रत्यक स्लाइड में नोट जोड़ने की स...,NORT VIEW BUT THE EXLIDE MAR NOR JOR NEGITO WI...,0,453832,146881,103085
3,103085,103085_w5Jyq3XMbb3WwiKQ_0000,w5Jyq3XMbb3WwiKQ,0.0,8.0,लिबर ऑफिस impress में एक प्रस्तुति document बन...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,"Libber Office Imprasme, 1 Prasthuti Document ...",لیبر آفس ایمپریس میں ایک پرستوٹی ڈاکیومنٹ بنا...,لبر آفس ایمپریس میں ایک پرستوطی ڈاکیومنٹ بنان...,LIBERA OFFICE IMPRESS ME EPUSTODY DOCUMENT BAN...,0,270589,508088,161768
4,103085,103085_w5Jyq3XMbb3WwiKQ_0045,w5Jyq3XMbb3WwiKQ,254.0,259.0,इस डायलॉग बॉक्स को बंद करते हैं,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,phoant style or size set kursak te hai. iis d...,साइज सेट कर सकते हैं। इस द्यालोक बौक्स को बंद...,"फउण्ट, स्टाइल और साइज सेट कर सकते हैं। इस ड्य...",ARFORMED STYLE OR THIGH TAT GASSUCTE IF THE LO...,0,137494,103085,103085
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,957491,957491_4xyIm2P6Xzlin341_0164,4xyIm2P6Xzlin341,858.0,862.0,spoken tutorials का उपयोग करके कार्यशालाएँ भी ...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,Team. Spoken to door ka upyokar ke kare shala...,कारेशालायें भी चलाते हैं।,Spoken Tutorial का उप्योग करके कारेशालाएं भी ...,TEM SPOKEN TO THE RUCOO PUKARKIK AR TE CHALAIN...,29,453832,103725,137494
296,957491,957491_4xyIm2P6Xzlin341_0167,4xyIm2P6Xzlin341,872.0,876.0,spoken tutorial project talktoa teacher projec...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,"Spoken to total project, Talk to a teacher po...",spoken tutorial project talk to a teacher pro...,سپوکن ٹیٹول پروجیکٹ ڈاکٹ ور ٹیچر پویٹ کا حصہ ...,SPOKEN TO THE OLD PROJECT DOCTOR DJHEDBARD GOT...,29,137494,103725,161768
297,957491,957491_4xyIm2P6Xzlin341_0051,4xyIm2P6Xzlin341,261.0,270.0,अब युज़र नेम stuserone@ gmailcom और फिर पासवर्...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,Now user name STUZER1 at gmail.com and then s...,अब यूजर नेम स्टी यूजर वान एट जीमेल डौट कोम और...,اب username stuser1 at gmail.com اور پھر pass...,UB YOUTHE NAME S TOUTHERVON G MILORDCOMB OLFI...,29,118638,847066,137494
298,957491,957491_4xyIm2P6Xzlin341_0107,4xyIm2P6Xzlin341,560.0,569.0,मेल में टेक्स्ट hi i now have an email account...,../../data/Speech-to-Text/Hindi-English_test/t...,../../data/Speech-to-Text/Hindi-English_test/t...,"Mail me text Hi, I now have an email account ...","In the mail, type the text, Hi, I now have an...",मेल में टेक्स्ट हाई आई नौ हैव एमेल अकाउंट इन ...,Y MEELME TEXT HAI I NOW HAVING E MILL ACCOUNT ...,29,318923,270589,521245


# Evaluation

In [16]:
print(f"Language pair: {mix_selection}")
for i in range(1, 4):
    choose_model = i 
    model_name, model_id, model_class = available_models[choose_model - 1]
    print(f"\nClassification Report for {model_name}:")
    print(classification_report(df_sampled["speaker_id"], df_sampled[f"{model_name}_pred"]))

Language pair: Hindi-English_test

Classification Report for Wav2Vec2:
              precision    recall  f1-score   support

      103085       0.07      0.10      0.08        10
      103725       0.00      0.00      0.00        10
      118638       0.09      0.30      0.14        10
      124478       0.00      0.00      0.00        10
      133511       0.27      0.30      0.29        10
      137494       0.00      0.00      0.00        10
      146881       0.23      0.30      0.26        10
      161768       0.00      0.00      0.00        10
      270589       0.00      0.00      0.00        10
      318923       0.00      0.00      0.00        10
      323507       0.00      0.00      0.00        10
      347099       0.07      0.10      0.08        10
      360746       0.00      0.00      0.00        10
      388577       0.00      0.00      0.00        10
      408467       0.00      0.00      0.00        10
      453832       0.11      0.20      0.14        10
      4782