In [1]:
!pip install -U torchaudio librosa jiwer datasets transformers huggingface_hub evaluate python-dotenv wandb

[0m

In [2]:
from transformers import pipeline
import evaluate
from datasets import load_dataset, Audio
import re
from pandas import DataFrame

wer_metric = evaluate.load("wer")

CHARS_TO_REMOVE_RE = '[\,\?\.\!\-\;\:\"\“\%\’\ʻ\”\�\']'

def remove_special_characters(batch):
    batch["sentence"] = re.sub(CHARS_TO_REMOVE_RE, '', batch["sentence"]).lower()
    return batch

In [3]:
dataset_card = "mozilla-foundation/common_voice_11_0"
SAMPLING_RATE = 16_000
common_voice_train, common_voice_test = [
    load_dataset(dataset_card, "ha", split=sp).\
        cast_column("audio", Audio(sampling_rate=SAMPLING_RATE)).\
        map(remove_special_characters)
    for sp in ["train+validation", "test"]
]

Found cached dataset common_voice_11_0 (/root/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/ha/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f)
Loading cached processed dataset at /root/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/ha/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-43a35fb51e895e44.arrow
Found cached dataset common_voice_11_0 (/root/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/ha/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f)
Loading cached processed dataset at /root/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/ha/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-48c0dd26597b88cb.arrow


In [4]:
model_cards = [
    "Akashpb13/Hausa_xlsr", "Mofe/xls-r-hausa-40", 
    "infinitejoy/wav2vec2-large-xls-r-300m-hausa", "Tiamz/hausa-4-ha-wa2vec-data-aug-xls-r-300m", 
]

all_metrics = []
for model_card in model_cards:
    print(model_card)
    pipe = pipeline(model=model_card, device=0)

    preds = [ds.map(
        lambda x: {'text': [r['text'] for r in pipe(x['audio'])]}, 
        batched=True, batch_size=128, remove_columns=common_voice_test.column_names
    ) for ds in [common_voice_train, common_voice_test]]

    metrics = {
        k: wer_metric.compute(predictions=ps['text'], references=ds['sentence']) 
        for k,ps,ds in zip(['train', 'test'], preds, [common_voice_train, common_voice_test])
    }
    print(metrics)
    metrics['model'] = model_card
    all_metrics.append(metrics)

Akashpb13/Hausa_xlsr


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/20 [00:00<?, ?ba/s]



  0%|          | 0/5 [00:00<?, ?ba/s]

{'train': 0.07000400881940269, 'test': 0.1796435272045028}
Mofe/xls-r-hausa-40


  0%|          | 0/20 [00:00<?, ?ba/s]

Loading cached processed dataset at /root/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/ha/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-4e0e4c8f6e770937.arrow


{'train': 0.19452796151533375, 'test': 0.5773921200750469}
infinitejoy/wav2vec2-large-xls-r-300m-hausa


Downloading:   0%|          | 0.00/2.03k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.26G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/260 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/315 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/36.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/407 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/212 [00:00<?, ?B/s]

  0%|          | 0/20 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

{'train': 1.0258568851473242, 'test': 1.025093808630394}
Tiamz/hausa-4-ha-wa2vec-data-aug-xls-r-300m


Downloading:   0%|          | 0.00/1.66k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.26G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/181 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/318 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Downloading:   0%|          | 0.00/214 [00:00<?, ?B/s]

  0%|          | 0/20 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

{'train': 0.16285828823411505, 'test': 0.49577861163227016}


In [6]:
DataFrame(all_metrics)

Unnamed: 0,train,test,model
0,0.070004,0.179644,Akashpb13/Hausa_xlsr
1,0.194528,0.577392,Mofe/xls-r-hausa-40
2,1.025857,1.025094,infinitejoy/wav2vec2-large-xls-r-300m-hausa
3,0.162858,0.495779,Tiamz/hausa-4-ha-wa2vec-data-aug-xls-r-300m
