## Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%cd ..
import os, sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(os.getcwd()))))

/Users/Tony/Other Docs/distilling-and-forgetting-in-large-pre-trained-models


In [3]:
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
from datasets import load_dataset
import evaluate
from evaluation.string_edit_metrics import get_string_edit_metrics

metric = evaluate.load("wer")

In [4]:
from dataloader.dataloader import gen_from_dataset

## User input

## Load model

In [20]:
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny", language="english", task="transcribe")

model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(language="english", task="transcribe")  # type: ignore
model.config.suppress_tokens = []

whisper_norm = processor.tokenizer._normalize

## Load dataset

In [21]:
# load dummy dataset and read audio files
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")

Found cached dataset librispeech_asr_dummy (/Users/Tony/.cache/huggingface/datasets/hf-internal-testing___librispeech_asr_dummy/clean/2.1.0/d3bc4c2bc2078fcde3ad0f0f635862e4c0fef78ba94c4a34c4c250a097af240b)


In [22]:
whisper_asr = pipeline(task="automatic-speech-recognition",
                       model=model,
                       tokenizer=processor.tokenizer,  # type: ignore
                       feature_extractor=processor.feature_extractor,  # type: ignore
)

## Run pipeline

In [23]:
# Create placeholders for the predictions and references:
predictions = []
references = []

for out in whisper_asr(gen_from_dataset(ds),
                       batch_size=4,
                       generate_kwargs={"num_beams": 1}):  # type: ignore
    if not out["reference"][0].strip():  # type: ignore
        continue  # skip empty references to avoid error in WER computation
    predictions.append(whisper_norm(out["text"]))  # type: ignore
    references.append(whisper_norm(out["reference"][0]))  # type: ignore

In [24]:
predictions[:5]

['mister quilter is the apostle of the middle classes and we are glad to welcome his gospel',
 'nor is mister quilters manner less interesting than his matter',
 'he tells us that at this festive season of the year with christmas and roast beef looming before us similarly drawn from eating and its results occur most readily to the mind',
 'he has grave doubts whether sir frederick layton is work is really greek after all and can discover in it but little of rocky ithaca',
 'lennils pictures are a sort of upguards and atom paintings and mason is exquisite idols are as national as a jingo poem mister birkut foster is landscapes smile at one much in the same way that mister karker used to flash his teeth and mister john colier gives his sitter a cheerful slap on the back before he says like a shampoo or a turkish bath next man']

In [25]:
references[:5]

['mister quilter is the apostle of the middle classes and we are glad to welcome his gospel',
 'nor is mister quilter is manner less interesting than his matter',
 'he tells us that at this festive season of the year with christmas and roast beef looming before us similes drawn from eating and its results occur most readily to the mind',
 'he has grave doubts whether sir frederick leighton is work is really greek after all and can discover in it but little of rocky ithaca',
 'linnell is pictures are a sort of up guards and at em paintings and mason is exquisite idylls are as national as a jingo poem mister birket foster is landscapes smile at one much in the same way that mister carker used to flash his teeth and mister john collier gives his sitter a cheerful slap on the back before he says like a shampooer in a turkish bath next man']

## Compute string edit metrics

In [27]:
metric.compute(predictions=predictions, references=references)

0.11804961505560307

In [28]:
get_string_edit_metrics(predictions=predictions, references=references)

{'wer': 0.11804961505560307,
 'sub': 0.08297690333618478,
 'del': 0.013686911890504704,
 'ins': 0.0213857998289136}