In [None]:
# Install packages
!pip install jiwer
!pip install pandas

In [1]:
# Import libraries
import jiwer
import pandas as pd

In [5]:
def asr_evaluation(truth, performed):
    '''
    Input:
    
    truth: sample's transcripts (list of strings)
    performed: transcripts performed by the asr model (list of strings)
    
    
    Output:
    WER, CER, MER, and WIP scores
    '''
    
    ground_truths = truth
    hypothesis = performed
    
    print("Model: \n")
    
    wer_score = jiwer.wer(truth=ground_truths, hypothesis=hypothesis)
    print(f"Word Error Rate (WER) = {wer_score}")

    cer_score = jiwer.cer(truth=ground_truths, hypothesis=hypothesis)
    print(f"Character Error Rate (CER) = {cer_score}")
    
    mer_score = jiwer.mer(truth=ground_truths, hypothesis=hypothesis)
    print(f"Match Error Rate (MER) = {mer_score}")
    
    wip_score = jiwer.wip(truth=ground_truths, hypothesis=hypothesis)
    print(f"Word Information Preserved (WIP) = {wip_score}")

In [2]:
# Read data
csv_data = pd.read_csv('cv_translated_with_noise_snr5.csv', encoding='utf-8')
csv_data.head()

Unnamed: 0.1,Unnamed: 0,path,original_transcript,noise_type,asr_transcript,translation
0,0,samples_with_noise_snr5/common_voice_uk_328578...,я чув і знав лиш одне,bus,я чув і знов лиш одне дома,I only heard one at home.
1,1,samples_with_noise_snr5/common_voice_uk_328578...,я чув і знав лиш одне,cafeteria,я чи пізнав лиш не дає,I don't know if it's worth it.
2,2,samples_with_noise_snr5/common_voice_uk_328578...,я чув і знав лиш одне,car,я чув і знав лиш одне,I heard and knew only one thing.
3,3,samples_with_noise_snr5/common_voice_uk_328578...,я чув і знав лиш одне,field,я чув і знав лиш одне,I heard and knew only one thing.
4,4,samples_with_noise_snr5/common_voice_uk_328578...,я чув і знав лиш одне,hallway,я чув і знов лише не,I heard it and not again.


In [3]:
# Convert input transcripts to list
truth = csv_data['original_transcript'].tolist()
performed = csv_data['asr_transcript'].tolist()

In [6]:
# Model evaluation for the entire dataset
asr_evaluation(truth, performed)

Model: 

Word Error Rate (WER) = 0.3305626598465473
Character Error Rate (CER) = 0.15671306690615178
Match Error Rate (MER) = 0.3075550267697799
Word Information Preserved (WIP) = 0.5324534666756792


In [7]:
# Model evaluation for each sentence
wer = []
cer = []
mer = []
wip = []

for i in range(len(truth)):
    wer.append(jiwer.wer(truth=truth[i], hypothesis=performed[i]))
    cer.append(jiwer.cer(truth=truth[i], hypothesis=performed[i]))
    mer.append(jiwer.mer(truth=truth[i], hypothesis=performed[i]))
    wip.append(jiwer.wip(truth=truth[i], hypothesis=performed[i]))

In [8]:
# Save sentence's scores to the dataframe
csv_data['wer'] = wer
csv_data['cer'] = cer
csv_data['mer'] = mer
csv_data['wip'] = wip
csv_data.head()

Unnamed: 0.1,Unnamed: 0,path,original_transcript,noise_type,asr_transcript,translation,wer,cer,mer,wip
0,0,samples_with_noise_snr5/common_voice_uk_328578...,я чув і знав лиш одне,bus,я чув і знов лиш одне дома,I only heard one at home.,0.333333,0.285714,0.285714,0.595238
1,1,samples_with_noise_snr5/common_voice_uk_328578...,я чув і знав лиш одне,cafeteria,я чи пізнав лиш не дає,I don't know if it's worth it.,0.833333,0.428571,0.833333,0.027778
2,2,samples_with_noise_snr5/common_voice_uk_328578...,я чув і знав лиш одне,car,я чув і знав лиш одне,I heard and knew only one thing.,0.0,0.0,0.0,1.0
3,3,samples_with_noise_snr5/common_voice_uk_328578...,я чув і знав лиш одне,field,я чув і знав лиш одне,I heard and knew only one thing.,0.0,0.0,0.0,1.0
4,4,samples_with_noise_snr5/common_voice_uk_328578...,я чув і знав лиш одне,hallway,я чув і знов лише не,I heard it and not again.,0.5,0.190476,0.5,0.25


In [None]:
# Save scores to csv file
csv_data.to_csv('cv_translated_with_noise_snr5_classification.csv', encoding='utf-8')