In [2]:
!pip install datasets

Collecting datasets
  Downloading datasets-2.19.2-py3-none-any.whl (542 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.1/542.1 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Collecting requests>=2.32.1 (from datasets)
  Downloading requests-2.32.3-py3-none-any.whl (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.9/64.9 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K

In [40]:
from datasets import load_dataset, Audio, Dataset
import pandas as pd
import librosa
import soundfile
import random

In [36]:
dataset = load_dataset(
    "mayabedge/NNCES", split="train"
)
dataset

Dataset({
    features: ['audio', 'transcription'],
    num_rows: 500
})

In [37]:
sample = dataset[2]
print(sample)
print(sample["transcription"])

{'audio': {'path': 'M1_01_03.wav', 'array': array([ 0.00167847,  0.0015564 ,  0.00140381, ..., -0.00192261,
       -0.0027771 , -0.00332642]), 'sampling_rate': 44100}, 'transcription': 'Mom and Dad arrived at the bus station early but waited until the bus came.'}
Mom and Dad arrived at the bus station early but waited until the bus came.


In [38]:
import torch
from transformers import pipeline

device = "cuda:0" if torch.cuda.is_available() else "cpu"
pipe = pipeline(
    "automatic-speech-recognition", model="openai/whisper-tiny.en", device=device
)

In [41]:
preds = []
evals = random.sample(range(0, 500), 100)
for i in evals:
  sample = dataset[i]
  txt = pipe(sample["audio"], max_new_tokens=256)
  txt = txt['text']
  preds.append(txt)
print(preds)


[' mother and dad arrived at the bus station early but waited until the bus came', " I don't have time to play with you. I am playing quicken.", ' Mom and Dad arrived at the bus station early but waited until the bus came.', ' The oldest member of the family is my grandfather and the youngest member of my family is my baby brother.', ' which is as caring as a mother. We talk with sweet like', ' Ancient views are through the best habits and dreams to eat.', ' I love my family and that in that future I want to make my family proud of.', ' Mom and Dad and I were at the bus station and he went way down until the bus came.', ' We should respect and know all members of the family.', " I don't have time to whoa, it's you, I am playing cricket.", ' The nature is very attractive and full of my favorite ginkalurt.', ' The nature is very attractive and full of my few favorite green colors.', " I don't have time to go with you. I'm playing cricket.", " I don't have time to go with you. I'm playing

In [43]:
refs = []
for i in evals:
  sample = dataset[i]
  refs.append(sample["transcription"])

In [32]:
!pip install jiwer
!pip install evaluate

Collecting jiwer
  Downloading jiwer-3.0.4-py3-none-any.whl (21 kB)
Collecting rapidfuzz<4,>=3 (from jiwer)
  Downloading rapidfuzz-3.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz, jiwer
Successfully installed jiwer-3.0.4 rapidfuzz-3.9.3
Collecting evaluate
  Downloading evaluate-0.4.2-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: evaluate
Successfully installed evaluate-0.4.2


In [44]:
from evaluate import load
wer = load("wer")
wer_score = wer.compute(predictions=preds, references=refs)
print(f"WER score: {wer_score}")

WER score: 0.43286573146292584


In [35]:
print(refs)
print(preds)

['I wish to know all about my grandfather. Well he is nearly 93 years old with a long beard.', 'In the wenter when it snows, he slowly takes a short walk in the open air each day.', '100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80.', 'Dog eats bones. Mike likes bikes. Elsa wants a book. Adam plays basketball.', 'The students wear the uniform and go to school daily.', "I don't have time to go with you. I am playing cricket.", 'The nature is very attractive and full of my favorite green color.', 'Nature gives us fruits, vegetables, and grains to eat.', "I don't have time to go with you. I am playing cricket.", 'The nature is very attractive and full of my favorite green color.', 'I do not have time to go with you. I am playing cricket.', 'The nature is very attractive and full of my favorite green color.', "Nature is as caring as our mother. It's always sweet like her.", 'You wish to know all about my grandfather? Well he is nearly 93 years old with a 