#Speech Recognition Experiment

## Install speech recognition library

In [None]:
!pip install SpeechRecognition

Collecting SpeechRecognition
[?25l  Downloading https://files.pythonhosted.org/packages/26/e1/7f5678cd94ec1234269d23756dbdaa4c8cfaed973412f88ae8adf7893a50/SpeechRecognition-3.8.1-py2.py3-none-any.whl (32.8MB)
[K     |████████████████████████████████| 32.8MB 123kB/s 
[?25hInstalling collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.8.1


##Imports

In [None]:
import speech_recognition as sr
import pandas as pd
import numpy as np

##Create a recognizer

In [None]:
recognizer = sr.Recognizer()

Note: only Google is freely available but SR works with other speech recognitions tools

##Open Audio file

In [None]:
harvard = sr.AudioFile('/content/harvard.wav')

with harvard as source:
  audio = recognizer.record(source)

In [None]:
transcript = recognizer.recognize_google(audio)

In [None]:
print(transcript)

the stale smell of old beer lingers it takes heat to bring out the odor a cold dip restores health and zest a salt pickle taste fine with ham tacos al Pastore are my favorite a zestful food is be hot cross bun


In [None]:
!apt install unzip

Reading package lists... Done
Building dependency tree       
Reading state information... Done
unzip is already the newest version (6.0-21ubuntu1).
0 upgraded, 0 newly installed, 0 to remove and 21 not upgraded.


#Audio to text pipeline

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold
from sklearn.utils import class_weight, shuffle
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.utils import np_utils

##Untar data

In [None]:
!tar -zxvf /content/dev-clean.tar.gz

LibriSpeech/LICENSE.TXT
LibriSpeech/README.TXT
LibriSpeech/CHAPTERS.TXT
LibriSpeech/SPEAKERS.TXT
LibriSpeech/BOOKS.TXT
LibriSpeech/dev-clean/
LibriSpeech/dev-clean/2277/
LibriSpeech/dev-clean/2277/149896/
LibriSpeech/dev-clean/2277/149896/2277-149896-0026.flac
LibriSpeech/dev-clean/2277/149896/2277-149896-0005.flac
LibriSpeech/dev-clean/2277/149896/2277-149896-0033.flac
LibriSpeech/dev-clean/2277/149896/2277-149896-0006.flac
LibriSpeech/dev-clean/2277/149896/2277-149896-0018.flac
LibriSpeech/dev-clean/2277/149896/2277-149896-0034.flac
LibriSpeech/dev-clean/2277/149896/2277-149896-0021.flac
LibriSpeech/dev-clean/2277/149896/2277-149896-0015.flac
LibriSpeech/dev-clean/2277/149896/2277-149896-0012.flac
LibriSpeech/dev-clean/2277/149896/2277-149896-0027.flac
LibriSpeech/dev-clean/2277/149896/2277-149896-0007.flac
LibriSpeech/dev-clean/2277/149896/2277-149896-0030.flac
LibriSpeech/dev-clean/2277/149896/2277-149896-0011.flac
LibriSpeech/dev-clean/2277/149896/2277-149896-0009.flac
LibriSpeech

##Speech recognition

###Trying out a snippet

In [None]:
test_speech = sr.AudioFile('/content/LibriSpeech/dev-clean/2035/147960/2035-147960-0000.flac')

with test_speech as source:
  libri_audio = recognizer.record(source)

In [None]:
libri_transcript = recognizer.recognize_google(libre_audio)

In [None]:
libri_transcript

'she was four years older than I to be sure and had seen more of the world but I was a boy and she was a girl and I resented her protecting Manor'

###Transcribing some more 

In [None]:
import os

audio = []
dir = '/content/LibriSpeech/dev-clean/2035/147960'

for filename in os.listdir('/content/LibriSpeech/dev-clean/2035/147960'):
  if not filename.endswith('.txt'):
    speech = sr.AudioFile(dir + '/' + filename)
    with speech as source:
      audio.append(recognizer.record(source))

In [None]:
transcription = []

for clip in audio:
  transcription.append(recognizer.recognize_google(clip))

##Try out fuzzy string matching

In [10]:
#look at data
print(transcription)

['she was four years older than I to be sure and had seen more of the world but I was a boy and she was a girl and I resented her protecting Manor', 'snake of his size in fighting trim would be more than any boy could handle', 'you can stand right up and talk to you he could did he Fight Hard', "look Tony that's his poison I said", "I know I'm awful Jim I was so scared", "iworld round and they're on one of those dry gravel beds was the biggest snake I had ever seen", "I don't think that mean", "one day when I rode over to the shimerdas I found engineer starting off on foot for Russian Peter's house to borrow a spade Ambrose needed", 'Otto Fuchs was the first one we met', 'we decided Antonia should ride dude home and I would walk', 'this change came about from an adventure we had together', 'it was on one of these gravel beds that I met my adventure', 'I never know you were so brave Jim she went on comfortingly', "there's been another black Frost the night before and the air was clear a

In [11]:
!pip install fuzzywuzzy

Collecting fuzzywuzzy
  Downloading https://files.pythonhosted.org/packages/43/ff/74f23998ad2f93b945c0309f825be92e04e0348e062026998b5eefef4c33/fuzzywuzzy-0.18.0-py2.py3-none-any.whl
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [12]:
from fuzzywuzzy import fuzz



In [29]:
#read actual transcript
fs = open('/content/LibriSpeech/dev-clean/2035/147960/2035-147960.trans.txt', 'r')
text = fs.read()
fs.close()

In [14]:
text

"2035-147960-0000 SHE WAS FOUR YEARS OLDER THAN I TO BE SURE AND HAD SEEN MORE OF THE WORLD BUT I WAS A BOY AND SHE WAS A GIRL AND I RESENTED HER PROTECTING MANNER\n2035-147960-0001 THIS CHANGE CAME ABOUT FROM AN ADVENTURE WE HAD TOGETHER\n2035-147960-0002 ONE DAY WHEN I RODE OVER TO THE SHIMERDAS I FOUND ANTONIA STARTING OFF ON FOOT FOR RUSSIAN PETER'S HOUSE TO BORROW A SPADE AMBROSCH NEEDED\n2035-147960-0003 THERE HAD BEEN ANOTHER BLACK FROST THE NIGHT BEFORE AND THE AIR WAS CLEAR AND HEADY AS WINE\n2035-147960-0004 IT WAS ON ONE OF THESE GRAVEL BEDS THAT I MET MY ADVENTURE\n2035-147960-0005 I WHIRLED ROUND AND THERE ON ONE OF THOSE DRY GRAVEL BEDS WAS THE BIGGEST SNAKE I HAD EVER SEEN\n2035-147960-0006 I KNOW I AM JUST AWFUL JIM I WAS SO SCARED\n2035-147960-0007 I NEVER KNOW YOU WAS SO BRAVE JIM SHE WENT ON COMFORTINGLY\n2035-147960-0008 A FAINT FETID SMELL CAME FROM HIM AND A THREAD OF GREEN LIQUID OOZED FROM HIS CRUSHED HEAD\n2035-147960-0009 LOOK TONY THAT'S HIS POISON I SAID\n20

In [30]:
text = text.splitlines()

In [31]:
clean_text = []
for sentence in text:
  res = "".join(filter(lambda x: not x.isdigit(), sentence))
  final = "".join(filter(lambda x: not x == '-', res))
  clean_text.append(final.lower()) 

In [32]:
clean_text

[' she was four years older than i to be sure and had seen more of the world but i was a boy and she was a girl and i resented her protecting manner',
 ' this change came about from an adventure we had together',
 " one day when i rode over to the shimerdas i found antonia starting off on foot for russian peter's house to borrow a spade ambrosch needed",
 ' there had been another black frost the night before and the air was clear and heady as wine',
 ' it was on one of these gravel beds that i met my adventure',
 ' i whirled round and there on one of those dry gravel beds was the biggest snake i had ever seen',
 ' i know i am just awful jim i was so scared',
 ' i never know you was so brave jim she went on comfortingly',
 ' a faint fetid smell came from him and a thread of green liquid oozed from his crushed head',
 " look tony that's his poison i said",
 ' i explained to antonia how this meant that he was twenty four years old that he must have been there when white men first came lef

In [33]:
audio_transcript = []
real_text = []
similarities = []

for statement in transcription:
  for text in clean_text:
    num = fuzz.token_set_ratio(statement, text)
    if num > 70:
      similarities.append(num)
      real_text.append(text)
      audio_transcript.append(statement)
      break


In [34]:
sentences = pd.DataFrame(data=list(zip(audio_transcript, 
                                       real_text, similarities)), 
                         columns=['audio', 'actual text', 'similarity'])

In [35]:
sentences.head(10)

Unnamed: 0,audio,actual text,similarity
0,she was four years older than I to be sure and...,she was four years older than i to be sure an...,99
1,snake of his size in fighting trim would be mo...,a snake of his size in fighting trim would be...,100
2,you can stand right up and talk to you he coul...,he could stand right up and talk to you he co...,100
3,look Tony that's his poison I said,look tony that's his poison i said,100
4,I know I'm awful Jim I was so scared,i know i am just awful jim i was so scared,97
5,iworld round and they're on one of those dry g...,i whirled round and there on one of those dry...,92
6,one day when I rode over to the shimerdas I fo...,one day when i rode over to the shimerdas i f...,95
7,Otto Fuchs was the first one we met,otto fuchs was the first one we met,100
8,we decided Antonia should ride dude home and I...,we decided that antonia should ride dude home...,100
9,this change came about from an adventure we ha...,this change came about from an adventure we h...,100


#Prep audio for classification

In [36]:
!pip install librosa



In [38]:
import librosa

In [39]:
data, sampling_rate = librosa.load('/content/LibriSpeech/dev-clean/1988/147956/1988-147956-0000.flac')