In [1]:
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
from datasets import load_dataset, load_metric, load_from_disk
from evaluate import load
import librosa
import pandas as pd
import re
import unicodedata

In [8]:
models = ["openai/whisper-small", "afr_FLEURS_model","afr_de_FLEURS_model","afr_nl_FLEURS_model","afr_NCHLT_FLEURS_model",
         "afr_de_FLEURS_model_5h", "afr_south_hollandic_model", "afr_flemish_model", "afr_south_hollandic_v2_model", 
          "afr_flemish_v2_model"]

dataset = load_dataset("google/fleurs", "af_za", split="test", trust_remote_code=True)

dest_folder = 'evaluation_output'



def get_metrics(model_name):
    
    processor = WhisperProcessor.from_pretrained("openai/whisper-small")
    model = WhisperForConditionalGeneration.from_pretrained(model_name).to("cuda")

    def map_to_pred(batch):

        audio = batch["audio"]

        input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features

        batch["reference"] = processor.tokenizer._normalize(batch['transcription'])


        with torch.no_grad():

            predicted_ids = model.generate(input_features.to("cuda"))[0]

        transcription = processor.decode(predicted_ids)

        batch["prediction"] = processor.tokenizer._normalize(transcription)

        return batch
    
    
    result = dataset.map(map_to_pred)

    wer = load("wer")
    cer = load("cer")
    
    wer_res = 100 * wer.compute(references=result["reference"], predictions=result["prediction"])
    cer_res = 100 * cer.compute(references=result["reference"], predictions=result["prediction"])
    
    #Save the references and predictions to file for qualitative evaluation
    df = pd.DataFrame({'reference': result['reference'], 'prediction': result['prediction']})
    df.to_csv(f"{dest_folder}/{model_name.replace('/','_')}.csv",sep=',', index=False)
    
    return wer_res, cer_res

In [9]:
wers = []
cers = []
for model in models:
    w, c = get_metrics(model)
    wers.append(w)
    cers.append(c)

df = pd.DataFrame({'model': models, 'wer': wers, 'cer':cers})
df.to_csv(f'{dest_folder}/model_metrics.csv',sep=',', index=False)
df

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Map:   0%|          | 0/264 [00:00<?, ? examples/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Map:   0%|          | 0/264 [00:00<?, ? examples/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Map:   0%|          | 0/264 [00:00<?, ? examples/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Map:   0%|          | 0/264 [00:00<?, ? examples/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Map:   0%|          | 0/264 [00:00<?, ? examples/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Map:   0%|          | 0/264 [00:00<?, ? examples/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Map:   0%|          | 0/264 [00:00<?, ? examples/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Map:   0%|          | 0/264 [00:00<?, ? examples/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Map:   0%|          | 0/264 [00:00<?, ? examples/s]



Unnamed: 0,model,wer,cer
0,openai/whisper-small,69.881991,31.408633
1,afr_FLEURS_model,28.818197,10.653083
2,afr_de_FLEURS_model,25.209509,9.690242
3,afr_nl_FLEURS_model,29.26287,10.868029
4,afr_NCHLT_FLEURS_model,26.081751,11.247865
5,afr_de_FLEURS_model_5h,28.972122,10.656027
6,afr_south_hollandic_model,37.848469,17.016077
7,afr_flemish_model,47.306311,21.580001
8,afr_south_hollandic_v2_model,33.55567,15.655733
9,afr_flemish_v2_model,34.308192,15.611566


In [10]:
df.sort_values(by='wer')

Unnamed: 0,model,wer,cer
2,afr_de_FLEURS_model,25.209509,9.690242
4,afr_NCHLT_FLEURS_model,26.081751,11.247865
1,afr_FLEURS_model,28.818197,10.653083
5,afr_de_FLEURS_model_5h,28.972122,10.656027
3,afr_nl_FLEURS_model,29.26287,10.868029
8,afr_south_hollandic_v2_model,33.55567,15.655733
9,afr_flemish_v2_model,34.308192,15.611566
6,afr_south_hollandic_model,37.848469,17.016077
7,afr_flemish_model,47.306311,21.580001
0,openai/whisper-small,69.881991,31.408633


In [11]:
df.sort_values(by='cer')

Unnamed: 0,model,wer,cer
2,afr_de_FLEURS_model,25.209509,9.690242
1,afr_FLEURS_model,28.818197,10.653083
5,afr_de_FLEURS_model_5h,28.972122,10.656027
3,afr_nl_FLEURS_model,29.26287,10.868029
4,afr_NCHLT_FLEURS_model,26.081751,11.247865
9,afr_flemish_v2_model,34.308192,15.611566
8,afr_south_hollandic_v2_model,33.55567,15.655733
6,afr_south_hollandic_model,37.848469,17.016077
7,afr_flemish_model,47.306311,21.580001
0,openai/whisper-small,69.881991,31.408633
