In [1]:
import speech_recognition as sr
import pandas as pd
import os
import re
import jiwer 
sr.__version__

'3.8.1'

In [2]:
#read tsv file contains 'file name' ,'sentence', and 'up_votes'
dataset = pd.read_csv("validated.tsv", delimiter='\t')
dataset_ = dataset.loc[:,['path','sentence','up_votes']]

In [36]:
#sorting by up_votes
dataset__sorted = dataset_.sort_values(by='up_votes', ascending=False)[0:200] #the number of audio files
dataset__sorted[:10]

Unnamed: 0,path,sentence,up_votes
410679,common_voice_en_1.mp3,I'm interested only in the present.,1614
410680,common_voice_en_2.mp3,The boy was surprised.,1399
27509,common_voice_en_427.mp3,It was dropping off in flakes and raining down...,1218
410681,common_voice_en_3.mp3,"""I'm a woman of the desert,"" she said, avertin...",1185
21257,common_voice_en_1036.mp3,A large portion of the cylinder had been uncov...,1162
27511,common_voice_en_430.mp3,The turf and gravel around it seemed charred a...,1114
410682,common_voice_en_4.mp3,But he had to move on.,1048
196192,common_voice_en_2911.mp3,There were lights in the upper windows of the ...,1044
410683,common_voice_en_5.mp3,"""Thank you,"" said the boy.",974
196191,common_voice_en_2910.mp3,It was dropping off in flakes and raining down...,856


In [37]:
#cleaning data 
dataset__sorted = dataset__sorted.loc[:,['path','sentence']]

In [38]:
dataset__sorted[:10]

Unnamed: 0,path,sentence
410679,common_voice_en_1.mp3,I'm interested only in the present.
410680,common_voice_en_2.mp3,The boy was surprised.
27509,common_voice_en_427.mp3,It was dropping off in flakes and raining down...
410681,common_voice_en_3.mp3,"""I'm a woman of the desert,"" she said, avertin..."
21257,common_voice_en_1036.mp3,A large portion of the cylinder had been uncov...
27511,common_voice_en_430.mp3,The turf and gravel around it seemed charred a...
410682,common_voice_en_4.mp3,But he had to move on.
196192,common_voice_en_2911.mp3,There were lights in the upper windows of the ...
410683,common_voice_en_5.mp3,"""Thank you,"" said the boy."
196191,common_voice_en_2910.mp3,It was dropping off in flakes and raining down...


In [39]:
#convert mp3 to wav because format of audio file is wav.
dataset__sorted['path'] = dataset__sorted['path'].str.replace(".mp3",".wav")

In [40]:
#reset index
dataset__sorted = dataset__sorted.reset_index()

In [41]:
#export transcipts to check 
dataset__sorted.to_csv("transcripts.csv")

In [42]:
dataset__sorted[:10]

Unnamed: 0,index,path,sentence
0,410679,common_voice_en_1.wav,I'm interested only in the present.
1,410680,common_voice_en_2.wav,The boy was surprised.
2,27509,common_voice_en_427.wav,It was dropping off in flakes and raining down...
3,410681,common_voice_en_3.wav,"""I'm a woman of the desert,"" she said, avertin..."
4,21257,common_voice_en_1036.wav,A large portion of the cylinder had been uncov...
5,27511,common_voice_en_430.wav,The turf and gravel around it seemed charred a...
6,410682,common_voice_en_4.wav,But he had to move on.
7,196192,common_voice_en_2911.wav,There were lights in the upper windows of the ...
8,410683,common_voice_en_5.wav,"""Thank you,"" said the boy."
9,196191,common_voice_en_2910.wav,It was dropping off in flakes and raining down...


In [43]:
#preprocessing
df = pd.DataFrame(columns=['name', 'sentence'])
for i in range(200):
    orig_sentence =dataset__sorted.loc[i,'sentence'].strip()
    temp_sentence = re.sub('[^\s\'a-zA-Z0-9^]','',orig_sentence)
    temp_sentence2 = temp_sentence.replace('.','')
#     print(temp_sentence2)
    df.loc[i] = [dataset__sorted.loc[i,'path'],temp_sentence2] 


In [44]:
df[:10]

Unnamed: 0,name,sentence
0,common_voice_en_1.wav,I'm interested only in the present
1,common_voice_en_2.wav,The boy was surprised
2,common_voice_en_427.wav,It was dropping off in flakes and raining down...
3,common_voice_en_3.wav,I'm a woman of the desert she said averting he...
4,common_voice_en_1036.wav,A large portion of the cylinder had been uncov...
5,common_voice_en_430.wav,The turf and gravel around it seemed charred a...
6,common_voice_en_4.wav,But he had to move on
7,common_voice_en_2911.wav,There were lights in the upper windows of the ...
8,common_voice_en_5.wav,Thank you said the boy
9,common_voice_en_2910.wav,It was dropping off in flakes and raining down...


In [45]:
#using the Recognizer
r = sr.Recognizer()

In [47]:
#comparing 
input_path = './audio_files'
count = 200 
WER_sum = 0
error_count = 0
for i in range(count):
    try:
        temp_audio = sr.AudioFile(os.path.join(input_path,df.loc[i,'name']))
        with temp_audio as source:
    #         r.adjust_for_ambient_noise(source)
            audio = r.record(source)
            result = r.recognize_google(audio)
            temp_measures = jiwer.compute_measures(df.loc[i]['sentence'].lower(), result.lower())
            print("recognized sentence: ", result.lower())
            print("orginial sentence: ", df.loc[i]['sentence'].lower())
            print(temp_measures)
            WER_sum += temp_measures['wer']
    except Exception as e:
        print(e)
        error_count +=1
        continue
            
print("Average WER: ",WER_sum/(count-error_count)*100,'%')


recognized sentence:  i'm interested only in the present
orginial sentence:  i'm interested only in the present
{'wer': 0.0, 'mer': 0.0, 'wil': 0.0, 'wip': 1.0}
recognized sentence:  the boy was surprised
orginial sentence:  the boy was surprised
{'wer': 0.0, 'mer': 0.0, 'wil': 0.0, 'wip': 1.0}
recognized sentence:  it was dropping off in flakes and rain down on the sand
orginial sentence:  it was dropping off in flakes and raining down on the sand
{'wer': 0.08333333333333333, 'mer': 0.08333333333333333, 'wil': 0.15972222222222232, 'wip': 0.8402777777777777}
recognized sentence:  i'm a woman of the desert she said hurting her face
orginial sentence:  i'm a woman of the desert she said averting her face
{'wer': 0.09090909090909091, 'mer': 0.09090909090909091, 'wil': 0.17355371900826455, 'wip': 0.8264462809917354}
recognized sentence:  a large portion of the cylinder head being uncovered
orginial sentence:  a large portion of the cylinder had been uncovered
{'wer': 0.2222222222222222, 'm

recognized sentence:  about 11 i walked back to my home and mayberry because nothing seems to be happening
orginial sentence:  about eleven i walked back to my home in maybury because nothing seemed to be happening
{'wer': 0.25, 'mer': 0.25, 'wil': 0.4375, 'wip': 0.5625}
recognized sentence:  do interchange by the wind of the desert never changes
orginial sentence:  the dunes are changed by the wind but the desert never changes
{'wer': 0.4166666666666667, 'mer': 0.4166666666666667, 'wil': 0.5916666666666667, 'wip': 0.4083333333333333}
recognized sentence:  a large piece of me came off fell with a loud and sharp noise
orginial sentence:  a large piece suddenly came off and fell with a loud and sharp noise
{'wer': 0.21428571428571427, 'mer': 0.2, 'wil': 0.26530612244897966, 'wip': 0.7346938775510203}
recognized sentence:  the newspaper articles have prepared everyone for the reception of the idea
orginial sentence:  the newspaper articles had prepared everyone for the reception of the id

recognized sentence:  you watch them as they grow demonstrating how the world is always changing
orginial sentence:  you'll watch them as they grow demonstrating how the world is always changing
{'wer': 0.07692307692307693, 'mer': 0.07692307692307693, 'wil': 0.1479289940828401, 'wip': 0.8520710059171599}
recognized sentence:  it turned out to be a bitter tea
orginial sentence:  it turned out to be a bitter tea
{'wer': 0.0, 'mer': 0.0, 'wil': 0.0, 'wip': 1.0}
recognized sentence:  he didn't want any conversation at this point
orginial sentence:  he didn't want any conversation at this point
{'wer': 0.0, 'mer': 0.0, 'wil': 0.0, 'wip': 1.0}
recognized sentence:  turn heater on to the bar this time has expression
orginial sentence:  then he turned to the boy this time his expression was cold and distant
{'wer': 0.6428571428571429, 'mer': 0.6428571428571429, 'wil': 0.8214285714285714, 'wip': 0.17857142857142858}
recognized sentence:  strange images pass through my mind
orginial sentence:  s

recognized sentence:  at that time it had not occurred to him that it might be hollow
orginial sentence:  for at that time it had not occurred to him that it might be hollow
{'wer': 0.06666666666666667, 'mer': 0.06666666666666667, 'wil': 0.06666666666666665, 'wip': 0.9333333333333333}
recognized sentence:  revenge
orginial sentence:  revenge is not my style but obviously accidents will happen
{'wer': 0.9, 'mer': 0.9, 'wil': 0.9, 'wip': 0.1}
recognized sentence:  but he wanted to travel
orginial sentence:  that he wanted to travel
{'wer': 0.2, 'mer': 0.2, 'wil': 0.3599999999999999, 'wip': 0.6400000000000001}
recognized sentence:  but he certainly didn't desire that an army invade the oasis
orginial sentence:  but he certainly didn't desire that an army invade the oasis
{'wer': 0.0, 'mer': 0.0, 'wil': 0.0, 'wip': 1.0}
recognized sentence:  everyone seemed very excited
orginial sentence:  everyone seemed very excited
{'wer': 0.0, 'mer': 0.0, 'wil': 0.0, 'wip': 1.0}
recognized sentence:  i

recognized sentence:  ice showmakers samurai skins
orginial sentence:  i shall make us some ice cubes
{'wer': 1.0, 'mer': 1.0, 'wil': 1.0, 'wip': 0.0}
recognized sentence:  somebody pushed again
orginial sentence:  somebody pushed against me and i almost fell into the ditch
{'wer': 0.8181818181818182, 'mer': 0.8181818181818182, 'wil': 0.8787878787878788, 'wip': 0.12121212121212122}
[Errno 2] No such file or directory: './audio_files\\common_voice_en_16891.wav'
[Errno 2] No such file or directory: './audio_files\\common_voice_en_16899.wav'
[Errno 2] No such file or directory: './audio_files\\common_voice_en_16895.wav'
[Errno 2] No such file or directory: './audio_files\\common_voice_en_16900.wav'
recognized sentence:  the lights in the upper windows of the houses when the people went to bed
orginial sentence:  there were lights in the upper windows of the houses when the people went to bed
{'wer': 0.125, 'mer': 0.125, 'wil': 0.18333333333333335, 'wip': 0.8166666666666667}
[Errno 2] No s

In [48]:
print("Used Audio Files: ",count-error_count)

Used Audio Files:  167
