In [27]:
from jiwer import wer
from datasets import load_dataset, DatasetDict, Audio
from whisper_normalizer.basic import BasicTextNormalizer
from transformers import pipeline

In [17]:
import gc

import torch
# from transformers.models.whisper.english_normalizer import BasicTextNormalizer
from numerize import numerize


In [18]:

# %% ../nbs/00_utils.ipynb 4
whisper_norm = BasicTextNormalizer()

# %% ../nbs/00_utils.ipynb 5
def is_target_text_in_range(ref):
    if ref.strip() == "ignore time segment in scoring":
        return False
    else:
        return ref.strip() != ""


def get_text(sample):
    if "text" in sample:
        return sample["text"]
    elif "sentence" in sample:
        return sample["sentence"]
    elif "normalized_text" in sample:
        return sample["normalized_text"]
    elif "transcript" in sample:
        return sample["transcript"]
    elif "transcription" in sample:
        return sample["transcription"]
    else:
        raise ValueError(
            f"Expected transcript column of either 'text', 'sentence', 'normalized_text' or 'transcript'. Got sample of "
            ".join{sample.keys()}. Ensure a text column name is present in the dataset."
        )


def normalise(batch):
    batch["norm_text"] = whisper_norm(get_text(batch))
    return batch


def data(dataset):
    for i, item in enumerate(dataset):
        yield {**item["audio"], "reference": item["norm_text"]}

# %% ../nbs/00_utils.ipynb 6
def get_model_size(model):
    total_params = sum(param.numel() for param in model.parameters())
    return numerize.numerize(total_params)

# %% ../nbs/00_utils.ipynb 7
def clear_gpu_memory():
    torch.cuda.empty_cache()
    gc.collect()

In [19]:
poly = DatasetDict()

poly["train"] = load_dataset(
    "PolyAI/minds14", "en-US", split="train[0%:80%]"
)
poly["test"] = load_dataset(
    "PolyAI/minds14", "en-US", split="train[80%:100%]"
)

print(poly)

Using the latest cached version of the module from /home/kurianbenoy/.cache/huggingface/modules/datasets_modules/datasets/PolyAI--minds14/65c7e0f3be79e18a6ffaf879a083daf706312d421ac90d25718459cbf3c42696 (last modified on Sat Jul  1 13:29:08 2023) since it couldn't be found locally at PolyAI/minds14., or remotely on the Hugging Face Hub.
Found cached dataset minds14 (/home/kurianbenoy/.cache/huggingface/datasets/PolyAI___minds14/en-US/1.0.0/65c7e0f3be79e18a6ffaf879a083daf706312d421ac90d25718459cbf3c42696)
Found cached dataset minds14 (/home/kurianbenoy/.cache/huggingface/datasets/PolyAI___minds14/en-US/1.0.0/65c7e0f3be79e18a6ffaf879a083daf706312d421ac90d25718459cbf3c42696)


DatasetDict({
    train: Dataset({
        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
        num_rows: 450
    })
    test: Dataset({
        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
        num_rows: 113
    })
})


In [8]:
whisper_asr = pipeline("automatic-speech-recognition", model="kurianbenoy/hfa-whisper-tiny-dv")

Downloading (…)lve/main/config.json: 100%|██████████| 2.24k/2.24k [00:00<00:00, 7.63MB/s]
Downloading pytorch_model.bin: 100%|██████████| 151M/151M [00:26<00:00, 5.76MB/s] 
Downloading (…)neration_config.json: 100%|██████████| 3.59k/3.59k [00:00<00:00, 12.8MB/s]
Downloading (…)okenizer_config.json: 100%|██████████| 805/805 [00:00<00:00, 3.54MB/s]
Downloading (…)olve/main/vocab.json: 100%|██████████| 1.04M/1.04M [00:00<00:00, 1.12MB/s]
Downloading (…)olve/main/merges.txt: 100%|██████████| 494k/494k [00:00<00:00, 9.98MB/s]
Downloading (…)main/normalizer.json: 100%|██████████| 52.7k/52.7k [00:00<00:00, 222kB/s]
Downloading (…)in/added_tokens.json: 100%|██████████| 2.08k/2.08k [00:00<00:00, 5.28MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 2.08k/2.08k [00:00<00:00, 9.13MB/s]
Downloading (…)rocessor_config.json: 100%|██████████| 339/339 [00:00<00:00, 2.09MB/s]


In [25]:
def ld():
    dataset = poly["test"]
    dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
    dataset = dataset.map(normalise)
    dataset = dataset.filter(is_target_text_in_range, input_columns=["norm_text"])
    return dataset

In [39]:
%%time
ds = ld()
predictions = []
predictions_raw = []
references = []
references_raw = []
normalizer = BasicTextNormalizer()
for out in whisper_asr(data(ds), batch_size=4):
    predictions_raw.append(out["text"])
    references_raw.append(out["reference"][0])
    predictions.append(normalizer(out["text"]))
    references.append(normalizer(out["reference"][0]))

Loading cached processed dataset at /home/kurianbenoy/.cache/huggingface/datasets/PolyAI___minds14/en-US/1.0.0/65c7e0f3be79e18a6ffaf879a083daf706312d421ac90d25718459cbf3c42696/cache-c4b3ccbc6595b7a9.arrow
Loading cached processed dataset at /home/kurianbenoy/.cache/huggingface/datasets/PolyAI___minds14/en-US/1.0.0/65c7e0f3be79e18a6ffaf879a083daf706312d421ac90d25718459cbf3c42696/cache-17bcfd0e03432841.arrow




CPU times: user 12min 58s, sys: 2min 28s, total: 15min 26s
Wall time: 2min 30s


In [40]:
wer(predictions_raw, references_raw)

0.7135306553911205

In [41]:
wer(predictions, references)

0.49076385421867197

In [42]:
predictions


[' i have a 100hz check and i was wondering how i can deposit that into my account ',
 ' hi i m onit today the project will be released soon ',
 ' hi i am likthul for the front money company please check the account please ',
 ' can you help me figure out how to hold it ',
 ' hello i was wondering how i could deposit money into my account ',
 ' oh yes get after me i m an easy man i m trying to deposit if you want money it s my account and how can i do that where can i make it deposit or how can i transfer money to my account ',
 ' hello i m going to tell you why i was wondering how can i deposit money in your bank okay okay so how can i do it okay alright so we re gonna deposit this money okay yeah i understand thank you ',
 ' now i m under your phone how cute it is it s just my camera ',
 ' i would like to give a pause in mani ',
 ' have some more time ',
 ' i ll be back soon ',
 ' i am orangea japan it s a money but i want to use cash how would i go about doing this process thank you

In [43]:
references

['i have a 100 check and i was wondering how i can deposit that into my account',
 'hi wanted to deposit a check that i received from work today or can i deposit it',
 'i d like to put some money into my checking account please',
 'can you help me figuring out how to deposit money into my account',
 'how i can deposit some money into my account',
 'oh yes good afternoon i need some help i m trash deposit some money into my account and how can i do that where can i make a deposit or how can i change the money to my account',
 'hello i m going to say i was wondering how can i deposit money in your bank',
 'how to deposit money',
 'i would like to deposit money what s the best way to accomplish that',
 'how can i deposit money into my account',
 'help me make a deposit into my account please',
 'i want to deposit some money but i want to use cash how would i go about doing it',
 'i m going to speak volumes about how i actually go about putting money into my account is transferred to someh