# Git Repository Bringup

In [9]:
!git version

git version 2.34.1


In [10]:
!git clone https://github.com/ramwtz/data-science-project.git

Cloning into 'data-science-project'...
remote: Enumerating objects: 128, done.[K
remote: Counting objects: 100% (128/128), done.[K
remote: Compressing objects: 100% (94/94), done.[K
remote: Total 128 (delta 52), reused 73 (delta 26), pack-reused 0[K
Receiving objects: 100% (128/128), 17.47 MiB | 17.97 MiB/s, done.
Resolving deltas: 100% (52/52), done.


In [11]:
!ls

data-science-project  sample_data


In [12]:
%cd data-science-project/

/content/data-science-project


In [13]:
!pwd

/content/data-science-project


In [14]:
!git switch dev/ram

Branch 'dev/ram' set up to track remote branch 'dev/ram' from 'origin'.
Switched to a new branch 'dev/ram'


In [15]:
!git pull

Already up to date.


In [16]:
%cd ..

/content


# Basic Bringup for Remote Environment

In [17]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [18]:
# Necessary packages for this nb
!pip install --quiet --upgrade -r '/content/data-science-project/dependencies.txt'
!pip install pocketsphinx

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m82.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.4/36.4 MB[0m [31m22.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m71.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m489.8/489.8 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m47.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m101.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m794.3/794.3 kB[0m [31m64.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml

# Initialize Env

In [1]:
# Imports
import speech_recognition as sr
from vosk import Model, KaldiRecognizer
import os
import librosa as lbrs
from tqdm import tqdm
import pandas as pd
import json
from difflib import SequenceMatcher
import timeit

# Configs
%load_ext autotime
tqdm.pandas()

time: 936 µs (started: 2023-09-29 17:39:35 +00:00)


In [2]:
gen_data_dir = os.path.join('drive', 'MyDrive', 'project', 'data')
calls_dir = os.path.join(gen_data_dir, 'calls')
new_data_dir = os.path.join(gen_data_dir, 'new')
models_path = os.path.join('drive', 'MyDrive', 'model')
metadata_path = os.path.join(calls_dir, '911_first6sec', '911_metadata_6sec.csv')

time: 735 µs (started: 2023-09-29 17:39:35 +00:00)


In [3]:
def get_path_from_filename(audio_file_name):
    type_dir, file_name = audio_file_name.split('/')
    return os.path.join(calls_dir, type_dir, file_name)

time: 500 µs (started: 2023-09-29 17:39:35 +00:00)


In [4]:
def create_audio_series(audio_file_name):
    audio_file_path = get_path_from_filename(audio_file_name)
    data, sample_rate = lbrs.load(audio_file_path)
    return data, sample_rate

time: 450 µs (started: 2023-09-29 17:39:35 +00:00)


# Fetching the Transcription
We're going to you the SpeechRecognition library to transform the audio to test to use as features for our model training

## Defs

In [5]:
r = sr.Recognizer()

time: 406 µs (started: 2023-09-29 17:39:36 +00:00)


In [6]:
def get_audio(audio_file_path, filter_noise=False):
  with sr.AudioFile(audio_file_path) as src:
    if filter_noise:
      r.adjust_for_ambient_noise(src, duration=0.5)
    return r.record(src)

time: 639 µs (started: 2023-09-29 17:39:36 +00:00)


In [7]:
def get_path_from_filename(audio_file_name):
    type_dir, file_name = audio_file_name.split('/')
    return os.path.join(calls_dir, type_dir, file_name)

time: 451 µs (started: 2023-09-29 17:39:36 +00:00)


## Raw Audio
We will first try to decipher the audio as is using four different speach to text engines: Google, Sphinx, Vosk & Whisper (engines that have public APIs).

### Aggregate Transcriptions

In [8]:
def recognize_vosk(audio, model=None):
  if model is None:
    model_path = os.path.join(models_path, 'vosk', 'eng_small')
  else:
    model_path = os.path.join(models_path, 'vosk', model)

  vosk_audio = audio.get_raw_data(convert_rate=16000, convert_width=2)

  #
  model = Model(model_path)
  recognizer = KaldiRecognizer(model, 16000)
  recognizer.AcceptWaveform(vosk_audio)
  return recognizer.FinalResult()

time: 812 µs (started: 2023-09-29 17:39:38 +00:00)


In [9]:
def transcribe_file(filename, filter_noise=False):
  path = get_path_from_filename(filename)
  audio = get_audio(path, filter_noise=filter_noise)

  try:
    g = r.recognize_google(audio)
  except:
    g = "UNABLE TO RECOGNIZE"

  s = r.recognize_sphinx(audio)

  w = r.recognize_whisper(audio)

  vt = recognize_vosk(audio)
  v = json.loads(vt)['text']

  return g, s, w, v

time: 1.3 ms (started: 2023-09-29 17:39:39 +00:00)


In [10]:
metadata_df = pd.read_csv(metadata_path)

time: 37.4 ms (started: 2023-09-29 17:39:41 +00:00)


In [11]:
manual_transcriptions = {
    '911_first6sec/call_8_0.wav': "um hello hello is everything okay",
    '911_first6sec/call_9_0.wav': 'yes um i need a police officer over here at whats going on',
    '911_first6sec/call_10_0.wav': "ma'am my pickup was stolen I had to go find it uhh",
    '911_first6sec/call_11_0.wav': "no ma'am i don't i don't have an emergency two police officers just at my house just now",
    '911_first6sec/call_15_0.wav': "I'm Dylon Peterson and i just killed two people",
    '911_first6sec/call_17_0.wav': "yeah we got a fire down here at on route seven right here at abington",
    '911_first6sec/call_20_0.wav': "are you out of the house yet no alright get out of the house we can't theres a",
    '911_first6sec/call_21_0.wav': "yes is this a police station yes it is may i help you yeah okay i put this is ridiculous",
    '911_first6sec/call_28_0.wav': "can you please send rescue to i think i'm",
    '911_first6sec/call_31_0.wav': "oh my god somebody is shooting at the sheriff's department at the sheriff department yes building?"
}

manual_transcriptions = pd.Series(manual_transcriptions)
manual_transcriptions = manual_transcriptions.rename('manual_transcriptions')
manual_transcriptions = pd.DataFrame(manual_transcriptions).reset_index()
manual_transcriptions = manual_transcriptions.rename({'index': 'filename'}, axis=1)

time: 14.1 ms (started: 2023-09-29 17:39:42 +00:00)


In [12]:
filenames = pd.Series(manual_transcriptions['filename'])

time: 625 µs (started: 2023-09-29 17:39:43 +00:00)


In [46]:
indices = ['google', 'sphinx', 'whisper', 'vosk']

transcriptions_series = filenames.progress_apply(transcribe_file)
column_names = {
    k: v for k, v in enumerate(indices)
  }
transcriptions_list = transcriptions_series.values.tolist()
transcriptions = pd.DataFrame(transcriptions_list).rename(column_names, axis=1)

100%|██████████| 10/10 [01:59<00:00, 11.93s/it]

time: 1min 59s (started: 2023-09-29 14:09:34 +00:00)





In [47]:
assessment = pd.DataFrame({"filename": filenames})
assessment = pd.concat([assessment, transcriptions], axis=1)

time: 1.98 ms (started: 2023-09-29 14:11:36 +00:00)


In [48]:
assessment = assessment.merge(manual_transcriptions, left_on='filename', right_on='filename')

time: 3.48 ms (started: 2023-09-29 14:11:38 +00:00)


In [49]:
assessment

Unnamed: 0,filename,google,sphinx,whisper,vosk,manual_transcriptions
0,911_first6sec/call_8_0.wav,hello hello is everything okay,and while the al and then again okay go ahead,Hello. Is everything okay? On.,ah now i now it's everything okay,um hello hello is everything okay
1,911_first6sec/call_9_0.wav,UNABLE TO RECOGNIZE,he had an idea of a pound and a coil of oil an...,"Yes, I need a police officer over here. What'...",that i'm anita police officer over here the co...,yes um i need a police officer over here at wh...
2,911_first6sec/call_10_0.wav,damn my pickup was born I had to go find it,no wonder the political had invited laugh,"Ma'am, my cat is a person. I had it offended.",now my pick up on had to go find that ah,ma'am my pickup was stolen I had to go find it...
3,911_first6sec/call_11_0.wav,UNABLE TO RECOGNIZE,i'd really a man am i to have an latent he cou...,I'm gonna go to the bathroom and close the le...,remember my mom i got my house now out,no ma'am i don't i don't have an emergency two...
4,911_first6sec/call_15_0.wav,I'm done Peterson and I just killed two people,glendon beard and that is killed two people that,I'm Joan Pearson and that has killed two people.,i'm i'm down there and that it killed few people,I'm Dylon Peterson and i just killed two people
5,911_first6sec/call_17_0.wav,yeah we got a fire down here it's on Route 7 r...,the little hard and long little doubt and let ...,"We've got a far down here, it's around 7, rig...",there we go too far down here at our or out th...,yeah we got a fire down here at on route seven...
6,911_first6sec/call_20_0.wav,are you out of the house yet no or I get out o...,we have our dead and now or i didn't have reac...,Are you out of the house yet? No. Or I get ou...,i get out of our now i get out of as we can't,are you out of the house yet no alright get ou...
7,911_first6sec/call_21_0.wav,UNABLE TO RECOGNIZE,whether you're completely hemmed and looking f...,"Yes, it is. Please return. Yes, it is. May he...",i can't help you okay i put is ridiculous,yes is this a police station yes it is may i h...
8,911_first6sec/call_28_0.wav,can you please send rescue to I think I'm,and i knew her out and adam had,Can you please send rescue to... I think I'm ...,you prefer back to to i think i'm having,can you please send rescue to i think i'm
9,911_first6sec/call_31_0.wav,oh my God somebody shooting at the sheriff's d...,so why when you hear more than that but i'd ha...,"Oh my God, so many students at this year's de...",oh look at how many shooting here for partner ...,oh my god somebody is shooting at the sheriff'...


time: 14.9 ms (started: 2023-09-29 14:11:39 +00:00)


### Assess Transcriptions

In [21]:
def similar(row, indices: list, compare_to='manual_transcriptions'):
    t = [
        SequenceMatcher(a=row[col], b=row[compare_to]).ratio()
        for col in indices
    ]
    return tuple(t)

time: 624 µs (started: 2023-09-29 16:44:07 +00:00)


In [52]:
scores_series = assessment.progress_apply(similar, axis=1, indices=indices)
scores_list = scores_series.values.tolist()
scores = pd.DataFrame(scores_list).rename(column_names, axis=1)

100%|██████████| 10/10 [00:00<00:00, 710.95it/s]

time: 23.3 ms (started: 2023-09-29 14:12:00 +00:00)





In [56]:
print(scores.mean().sort_values(ascending=False))

whisper    0.682378
google     0.639828
vosk       0.593321
sphinx     0.334570
dtype: float64
time: 4.41 ms (started: 2023-09-29 14:12:59 +00:00)


We can see that whisper has the best score, followed by google and vosk, while sphinx lags behind.

## Clean Audio

### Aggregate Transcriptions

In [57]:
transcriptions_series = filenames.progress_apply(transcribe_file, filter_noise=True)
transcriptions_list = transcriptions_series.values.tolist()
transcriptions = pd.DataFrame(transcriptions_list).rename(column_names, axis=1)

100%|██████████| 10/10 [01:51<00:00, 11.16s/it]

time: 1min 51s (started: 2023-09-29 14:13:21 +00:00)





In [58]:
assessment_clean = pd.DataFrame({"filename": filenames})
assessment_clean = pd.concat([assessment_clean, transcriptions], axis=1)

time: 1.73 ms (started: 2023-09-29 14:15:23 +00:00)


In [59]:
assessment_clean = assessment_clean.merge(manual_transcriptions, left_on='filename', right_on='filename')

time: 4.2 ms (started: 2023-09-29 14:15:25 +00:00)


In [60]:
assessment_clean

Unnamed: 0,filename,google,sphinx,whisper,vosk,manual_transcriptions
0,911_first6sec/call_8_0.wav,put on hello hello is everything okay,and while canal in the making of the coup,Hello. Hello. Is everything okay? I'm.,now i'm now it's everything okay,um hello hello is everything okay
1,911_first6sec/call_9_0.wav,what's going on,and i had had a war what owen adam had,I need a police officer over here at 7. What'...,anita a police officer of the here the corner ...,yes um i need a police officer over here at wh...
2,911_first6sec/call_10_0.wav,damn my pickup was born I had to go find it,my own mind at football has invited up,"Ma'am, my kekep was born. I had to go find it.",well my pick up on had to go find that ah,ma'am my pickup was stolen I had to go find it...
3,911_first6sec/call_11_0.wav,UNABLE TO RECOGNIZE,i'd really a man am i to have an latent he cou...,I'm gonna go to the bathroom and close the le...,remember my mom i got my house now,no ma'am i don't i don't have an emergency two...
4,911_first6sec/call_15_0.wav,I'm downstairs and I just killed two people,one delegate in the uphill to people that,I'm Don Pearson and I just killed two people.,i'm down here and that it killed people,I'm Dylon Peterson and i just killed two people
5,911_first6sec/call_17_0.wav,on Route 7 right here in Abington,god that the loaded on little doubt and let it,"We've got a far down here, it's on Route 7, r...",the far down here or out thousand right here t...,yeah we got a fire down here at on route seven...
6,911_first6sec/call_20_0.wav,get out of the house yet no I get out of the h...,that i'd have now or i didn't have you read th...,You're out of the house yet? No. Or I get out...,the out of as you know i get out of as we can't,are you out of the house yet no alright get ou...
7,911_first6sec/call_21_0.wav,UNABLE TO RECOGNIZE,i have heard of klingon camp but little more k...,"Yes, it is a police station. Yes, it is. May ...",i can't say help you okay i put the ridiculous,yes is this a police station yes it is may i h...
8,911_first6sec/call_28_0.wav,please send rescue to I think I'm,and i do her out and adam had,Please send a rescue to I think I'm having a,that do too i think i'm heading,can you please send rescue to i think i'm
9,911_first6sec/call_31_0.wav,oh my God somebody shooting at the sheriff's d...,i'm alive than he wanted identify have been he...,"Oh my God, some of these children that they s...",oh my god how many hidden a partner at the sha...,oh my god somebody is shooting at the sheriff'...


time: 20.9 ms (started: 2023-09-29 14:15:27 +00:00)


### Assess Transcriptions

In [62]:
scores_clean_series = assessment_clean.progress_apply(similar, axis=1, indices=indices)
scores_clean_list = scores_clean_series.values.tolist()
scores_clean = pd.DataFrame(scores_clean_list).rename(column_names, axis=1)

100%|██████████| 10/10 [00:00<00:00, 582.35it/s]

time: 24 ms (started: 2023-09-29 14:15:48 +00:00)





In [63]:
print(scores_clean.mean().sort_values(ascending=False))

whisper    0.741748
google     0.617476
vosk       0.594663
sphinx     0.319726
dtype: float64
time: 3.63 ms (started: 2023-09-29 14:15:52 +00:00)


We essentially got the same ranking, with google and sphinx actually worsening as a result of the cleaning, vosk staying roughly the same and whisper improving substantially.

Because Google is unreliable in producing a transcription, we will procced with whisper and vosk and check some of the different models available.

## Checking Vosk & Whisper Models

### VOSK

#### Aggregate Transcriptions

In [16]:
def transcribe_vosk(filename, filter_noise=False, models=[]):
  path = get_path_from_filename(filename)
  audio = get_audio(path, filter_noise=filter_noise)

  t = []
  for model in models:
    vt = recognize_vosk(audio, model)
    t.append(json.loads(vt)['text'])

  return tuple(t)

time: 735 µs (started: 2023-09-29 17:39:58 +00:00)


In [16]:
vosk_models = ['eng_large', 'eng_small', 'vosk-model-en-us-0.22-lgraph', 'vosk-model-en-us-0.42-gigaspeech']
vosk_series = filenames.progress_apply(transcribe_vosk, filter_noise=True, models=vosk_models)
vosk_column_names = {
    k: v for k, v in enumerate(vosk_models)
  }
vosk_list = vosk_series.values.tolist()
vosk_df = pd.DataFrame(vosk_list).rename(vosk_column_names, axis=1)

100%|██████████| 10/10 [16:56<00:00, 101.63s/it]

time: 16min 56s (started: 2023-09-29 16:26:32 +00:00)





In [17]:
assessment_vosk = pd.DataFrame({"filename": filenames})
assessment_vosk = pd.concat([assessment_vosk, vosk_df], axis=1)

time: 2.17 ms (started: 2023-09-29 16:43:44 +00:00)


In [18]:
assessment_vosk = assessment_vosk.merge(manual_transcriptions, left_on='filename', right_on='filename')

time: 74 ms (started: 2023-09-29 16:43:46 +00:00)


In [19]:
assessment_vosk

Unnamed: 0,filename,eng_large,eng_small,vosk-model-en-us-0.22-lgraph,vosk-model-en-us-0.42-gigaspeech,manual_transcriptions
0,911_first6sec/call_8_0.wav,i know i know is everything ok,now i'm now it's everything okay,i hello i know is everything okay,um hello hello is everything okay huh,um hello hello is everything okay
1,911_first6sec/call_9_0.wav,i need a police officer over here except what'...,anita a police officer of the hear the corner ...,i need a police officer over here it's of coin...,i need a police officer over here at some reco...,yes um i need a police officer over here at wh...
2,911_first6sec/call_10_0.wav,well my pickup was gone i had to go find it,well my pick up on a go find that,well my a post gone i to go find that,well my pickup was gone i had to go find it ah,ma'am my pickup was stolen I had to go find it...
3,911_first6sec/call_11_0.wav,i remember the man again i who am i to please ...,remember my mom i got my house personality,i remember a snowman again i who am i going to...,unfortunately no ma'am i don't want emergency ...,no ma'am i don't i don't have an emergency two...
4,911_first6sec/call_15_0.wav,i am down there and i just killed two people,i'm down here and that it killed people,i i'm down there in and that as killed two people,um i'm down here in ny this killed two people,I'm Dylon Peterson and i just killed two people
5,911_first6sec/call_17_0.wav,as far down here at all on route southern righ...,the far down here at or out thousand right her...,at the far down here at all on route southern ...,so far down here it's on route seven right her...,yeah we got a fire down here at on route seven...
6,911_first6sec/call_20_0.wav,out of the house yet no or i get out of the ho...,the out of as you know i get out of as we can't,it out of the house yet no but i get out of th...,you're out of the house yes no i get out of th...,are you out of the house yet no alright get ou...
7,911_first6sec/call_21_0.wav,i think i think may help you here okay i put r...,i can't say help you okay i put the ridiculous,in current events may help you here okay i put...,guess it is simply thinking yes it is may help...,yes is this a police station yes it is may i h...
8,911_first6sec/call_28_0.wav,we sent back due to epic i'm having,that do too i think i'm heading,we been back due to i think i'm having,please send rescue to i think i'm heading,can you please send rescue to i think i'm
9,911_first6sec/call_31_0.wav,oh my god somebody shooting at the sheriff's d...,oh my god how many hidden a partner at the sha...,oh my god somebody shooting at the sheriff's d...,oh my god somebody shooting the sheriff's depa...,oh my god somebody is shooting at the sheriff'...


time: 22.7 ms (started: 2023-09-29 16:43:47 +00:00)


#### Assess Transcriptions

In [27]:
scores_vosk_series = assessment_vosk.progress_apply(similar, axis=1, indices=vosk_models)
scores_vosk_list = scores_vosk_series.values.tolist()
scores_vosk = pd.DataFrame(scores_vosk_list).rename(vosk_column_names, axis=1)

100%|██████████| 10/10 [00:00<00:00, 539.26it/s]

time: 25.8 ms (started: 2023-09-29 16:45:08 +00:00)





In [28]:
print(scores_vosk.mean().sort_values(ascending=False))

vosk-model-en-us-0.42-gigaspeech    0.806011
eng_large                           0.728483
vosk-model-en-us-0.22-lgraph        0.706237
eng_small                           0.582618
dtype: float64
time: 2.31 ms (started: 2023-09-29 16:45:11 +00:00)


We've got some pretty good performances here from the heavy models, with the gigaspeech performing very well.

### Whisper

#### Aggregate Transcriptions

In [15]:
def transcribe_whisper(filename, filter_noise=False, models=[]):
  path = get_path_from_filename(filename)
  audio = get_audio(path, filter_noise=filter_noise)

  t = []
  for model in models:
      t.append(r.recognize_whisper(audio, model))

  return tuple(t)

time: 659 µs (started: 2023-09-29 17:39:54 +00:00)


In [None]:
whisper_models = [
    'base', 'medium', 'large'
    ]
whisper_series = filenames.progress_apply(transcribe_whisper, filter_noise=True, models=whisper_models)
whisper_column_names = {
    k: v for k, v in enumerate(whisper_models)
  }
whisper_list = whisper_series.values.tolist()
whisper_df = pd.DataFrame(whisper_list).rename(whisper_column_names, axis=1)

In [33]:
whisper_assessment = pd.DataFrame({"filename": filenames})
whisper_assessment = pd.concat([whisper_assessment, whisper_df], axis=1)

time: 2.36 ms (started: 2023-09-29 16:52:05 +00:00)


In [34]:
whisper_assessment = whisper_assessment.merge(manual_transcriptions, left_on='filename', right_on='filename')

time: 73 ms (started: 2023-09-29 16:52:06 +00:00)


In [35]:
whisper_assessment

Unnamed: 0,filename,base,medium,large,manual_transcriptions
0,911_first6sec/call_8_0.wav,Hello. Hello. Is everything okay? I'm.,Hello. Hello. Is everything OK?,"Um, hello. Hello. Is everything okay? Um... ...",um hello hello is everything okay
1,911_first6sec/call_9_0.wav,I need a police officer over here at 7. What'...,I need a police officer over here at 7th. Wha...,I need a police officer over here at 7th and ...,yes um i need a police officer over here at wh...
2,911_first6sec/call_10_0.wav,"Ma'am, my kekep was born. I had to go find it.","Ma'am, my pickup was stolen. I had to go find...","Ma'am, my pickup was stolen. I had to go find...",ma'am my pickup was stolen I had to go find it...
3,911_first6sec/call_11_0.wav,I'm gonna go to the bathroom and close the le...,No ma'am I don't. I don't have an emergency. ...,No ma'am I don't. I don't have an emergency t...,no ma'am i don't i don't have an emergency two...
4,911_first6sec/call_15_0.wav,I'm Don Pearson and I just killed two people.,I'm Dylan Beards and that has killed two people.,I'm Dylan Pierce and I just killed two people.,I'm Dylon Peterson and i just killed two people
5,911_first6sec/call_17_0.wav,"We've got a far down here, it's on Route 7, r...","We've got a fire down here on Route 7, right ...","We've got a fire down here on Route 7, right ...",yeah we got a fire down here at on route seven...
6,911_first6sec/call_20_0.wav,You're out of the house yet? No. Or I get out...,"Are you out of the house yet? No. All right, ...",you out of the house yet?,are you out of the house yet no alright get ou...
7,911_first6sec/call_21_0.wav,"Yes, it is a police station. Yes, it is. May ...","Yes, is this the police station? Yes it is, m...","Yes, is this the police station? Yes, it is. ...",yes is this a police station yes it is may i h...
8,911_first6sec/call_28_0.wav,Please send a rescue to I think I'm having a,police and rescue to. I think I'm having.,Please send rescue to... I think I'm having...,can you please send rescue to i think i'm
9,911_first6sec/call_31_0.wav,"Oh my God, some of these children that they s...","Oh my god, so many students at the sheriff's ...","Oh my god, somebody's shooting at the Sheriff...",oh my god somebody is shooting at the sheriff'...


time: 12.8 ms (started: 2023-09-29 16:52:08 +00:00)


### Assess Transcriptions

In [36]:
whisper_scores_series = whisper_assessment.progress_apply(similar, axis=1, indices=whisper_models)
whisper_scores_list = whisper_scores_series.values.tolist()
whisper_scores = pd.DataFrame(whisper_scores_list).rename(whisper_column_names, axis=1)

100%|██████████| 10/10 [00:00<00:00, 443.49it/s]

time: 31.2 ms (started: 2023-09-29 16:52:10 +00:00)





In [37]:
print(whisper_scores.mean().sort_values(ascending=False))

medium    0.808133
large     0.755207
base      0.741748
dtype: float64
time: 8.7 ms (started: 2023-09-29 16:52:13 +00:00)


Interestingly the medium model performed better than the large model, with performance roughlt equivalent to the vosk gigaspeech model, lets comoare their time benhcmarks next.

### Vosk Gigaspeech vs Whisper Medium - Time Benchmark

In [1]:
def time_transcribe(filename, trans_mod):
  t = []
  for transcriber, models in trans_mod.items():
    print(transcriber, models)
    timer = timeit.Timer(
      lambda: transcriber(filename, filter_noise=True, models=models))
    t.append(timer.timeit(1))

  return tuple(t)

In [None]:
trans_mod = {
    transcribe_whisper: ['medium'],
    transcribe_vosk: ['vosk-model-en-us-0.42-gigaspeech']
}
whisper_time = filenames.progress_apply(
    time_transcribe, trans_mod=trans_mod)

  0%|          | 0/10 [00:00<?, ?it/s]

<function transcribe_whisper at 0x783a8ce58940> ['medium']
<function transcribe_vosk at 0x783a8ce589d0> ['vosk-model-en-us-0.42-gigaspeech']


In [45]:
print(whisper_time)

0    5.710433
1    1.225427
2    1.281095
3    1.461308
4    1.222870
5    1.445041
6    2.793575
7    2.784608
8    1.466484
9    1.616682
Name: filename, dtype: float64
time: 2.86 ms (started: 2023-09-29 17:10:06 +00:00)


In [31]:
vosk_models =
vosk_time = filenames.progress_apply(time_transcribe, transcriber=transcribe_vosk, models=vosk_models)

100%|██████████| 10/10 [08:32<00:00, 51.23s/it]

time: 8min 32s (started: 2023-09-29 17:21:05 +00:00)





In [32]:
print(vosk_time)

0    62.904462
1    46.275364
2    52.470438
3    48.673863
4    44.459761
5    48.757273
6    52.969663
7    51.430900
8    54.428047
9    49.846103
Name: filename, dtype: float64
time: 4.46 ms (started: 2023-09-29 17:29:51 +00:00)


## Lets try to reduce the noise

In [None]:
import noisereduce as nr
from scipy.io import wavfile

In [None]:
audio_file_32_path = _os.path.join('data', 'calls', '911_first6sec', 'call_32_0.wav')

In [None]:
# load data
rate, data = wavfile.read(audio_file_32_path)
print(rate, data)
# perform noise reduction
reduced_noise = nr.reduce_noise(y=data, sr=rate)
wavfile.write("mywav_reduced_noise.wav", rate, reduced_noise)

48000 [[    0     0]
 [    0     0]
 [    0     0]
 ...
 [10792 10799]
 [10601 10608]
 [10238 10243]]


MemoryError: Unable to allocate 129. GiB for an array with shape (288000, 60002) and data type float64

# Sentiment Analysis

### UTILS

In [None]:
from nltk.corpus import stopwords
import pandas as pd
import string
from nltk.tokenize import RegexpTokenizer

def tokenize(raw):
    tokenizer = RegexpTokenizer(r"\w+\'?\w*")
    return tokenizer.tokenize(raw)

def preprocess_words_fast(tokens):
    t = pd.Series(tokens).str.lower()
    t = t[~(t.isin(stopwords.words('english')) | t.isin(list(string.punctuation)))]
    return t.tolist()

In [None]:
raw = " Ma'am, my pickup was stolen. I had to go find it. Ahh!"
tokens = tokenize(raw)
words = preprocess_words_fast(tokens)
print(words)

["ma'am", 'pickup', 'stolen', 'go', 'find', 'ahh']


### vader

In [None]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [None]:
# dirty
sid.polarity_scores(raw)

{'neg': 0.259, 'neu': 0.741, 'pos': 0.0, 'compound': -0.5411}

In [None]:
# clean
sid.polarity_scores(' '.join(words))

{'neg': 0.39, 'neu': 0.61, 'pos': 0.0, 'compound': -0.4939}

### textblob

In [None]:
from textblob import TextBlob

In [None]:
# dirty
testimonial = TextBlob(raw)
print(testimonial.sentiment)

Sentiment(polarity=0.0, subjectivity=0.0)


In [None]:
# clean
testimonial = TextBlob(' '.join(words))
print(testimonial.sentiment)

Sentiment(polarity=0.0, subjectivity=0.0)


### Flair

In [None]:
from flair.models import TextClassifier
from flair.data import Sentence

In [None]:
classifier = TextClassifier.load('en-sentiment')

In [None]:
sentence = Sentence(raw)
classifier.predict(sentence)

# print sentence with predicted labels
print('Sentence above is: ', sentence.labels)

Sentence above is:  ['Sentence[16]: " Ma'am, my pickup was stolen. I had to go find it. Ahh!"'/'NEGATIVE' (0.9804)]


In [None]:
sentence = Sentence(' '.join(words))
classifier.predict(sentence)

# print sentence with predicted labels
print('Sentence above is: ', sentence.labels)

Sentence above is:  ['Sentence[6]: "ma'am pickup stolen go find ahh"'/'NEGATIVE' (0.9505)]


### Transformers

In [None]:
from transformers import pipeline

In [None]:
classifier = pipeline("sentiment-analysis")

In [None]:
# dirty
classifier([raw])

[{'label': 'NEGATIVE', 'score': 0.9987456798553467}]

In [None]:
# clean
classifier([''.join(words)])

[{'label': 'NEGATIVE', 'score': 0.9751687049865723}]

# get the full sentiment table of all the audio

In [None]:
audio_file_csv = _os.path.join(audio_file_dir, '911_metadata.csv')
df = pd.read_csv(audio_file_csv)

In [None]:
import speech_recognition as sr

In [None]:
r = sr.Recognizer()
def get_text_from_audio(audio_file_name):
    dirr, name = audio_file_name.split('/')
    audio_file_path = _os.path.join('data', 'calls', dirr, name)
    with sr.AudioFile(audio_file_path) as src:
        r.adjust_for_ambient_noise(src, duration=0.5)
        audio = r.record(src)
        return r.recognize_whisper(audio, model='medium')

In [None]:
text_series = df['filename'][:100].apply(get_text_from_audio)
text_series

0                                My mom had a bad time.
1                     Hello. Hello. Is everything okay?
2      I need a police officer over here at 7th. Wha...
3      Ma'am, my pickup was stolen. I had to go find...
4      No ma'am I don't. I don't have an emergency. ...
                            ...                        
95     This is Gifford. Hello? Hello? Oh, number one...
96     across, forty seven oh nine fourty隔s avenue f...
97     I have, um, I need somebody escorted out of m...
98               Hello? Hello? Hello? Hello 911? Hello?
99     I got three zip-holes attacking people. Attac...
Name: filename, Length: 100, dtype: object

# lets use transformers pipeline to analyze sentiment

In [None]:
from transformers import pipeline

def get_sentiment_transformers(text):
    classifier = pipeline("sentiment-analysis")
    sent = classifier([text])
    return sent[0]['label']

In [None]:
sentiment_df = text_series.apply(get_sentiment_transformers)
sentiment_df

0     NEGATIVE
1     POSITIVE
2     NEGATIVE
3     NEGATIVE
4     NEGATIVE
        ...   
95    NEGATIVE
96    POSITIVE
97    NEGATIVE
98    NEGATIVE
99    NEGATIVE
Name: filename, Length: 100, dtype: object

In [None]:
full_df = pd.DataFrame([df['filename'][:100], text_series, sentiment_df]).T
full_df.columns=['filename', 'text', 'sentiment']
full_df

Unnamed: 0,filename,text,sentiment
0,911_first6sec/call_2_0.wav,My mom had a bad time.,NEGATIVE
1,911_first6sec/call_8_0.wav,Hello. Hello. Is everything okay?,POSITIVE
2,911_first6sec/call_9_0.wav,I need a police officer over here at 7th. Wha...,NEGATIVE
3,911_first6sec/call_10_0.wav,"Ma'am, my pickup was stolen. I had to go find...",NEGATIVE
4,911_first6sec/call_11_0.wav,No ma'am I don't. I don't have an emergency. ...,NEGATIVE
...,...,...,...
95,911_first6sec/call_110_0.wav,"This is Gifford. Hello? Hello? Oh, number one...",NEGATIVE
96,911_first6sec/call_111_0.wav,"across, forty seven oh nine fourty隔s avenue f...",POSITIVE
97,911_first6sec/call_112_0.wav,"I have, um, I need somebody escorted out of m...",NEGATIVE
98,911_first6sec/call_114_0.wav,Hello? Hello? Hello? Hello 911? Hello?,NEGATIVE


In [None]:
full_df[full_df['sentiment'] == 'NEGATIVE'].count()

filename     78
text         78
sentiment    78
dtype: int64

# lets use Vader pipeline to analyze sentiment

In [None]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

def get_sentiment_vader(text):
    sid = SentimentIntensityAnalyzer()
    sent = sid.polarity_scores(text)
    compund = sent['compound']
    if compund > 1/3:
        return 'POSITIVE'
    elif compund < - 1/3:
        return 'NEGATIVE'
    else:
        return 'UNKOWN'

In [None]:
sentiment_df_vader = text_series.apply(get_sentiment_vader)
sentiment_df_vader

0     NEGATIVE
1       UNKOWN
2       UNKOWN
3     NEGATIVE
4       UNKOWN
        ...   
95      UNKOWN
96    NEGATIVE
97    NEGATIVE
98      UNKOWN
99    NEGATIVE
Name: filename, Length: 100, dtype: object

In [None]:
full_df_vader = pd.DataFrame([df['filename'][:100], text_series, sentiment_df_vader]).T
full_df_vader.columns=['filename', 'text', 'sentiment']
full_df_vader

Unnamed: 0,filename,text,sentiment
0,911_first6sec/call_2_0.wav,My mom had a bad time.,NEGATIVE
1,911_first6sec/call_8_0.wav,Hello. Hello. Is everything okay?,UNKOWN
2,911_first6sec/call_9_0.wav,I need a police officer over here at 7th. Wha...,UNKOWN
3,911_first6sec/call_10_0.wav,"Ma'am, my pickup was stolen. I had to go find...",NEGATIVE
4,911_first6sec/call_11_0.wav,No ma'am I don't. I don't have an emergency. ...,UNKOWN
...,...,...,...
95,911_first6sec/call_110_0.wav,"This is Gifford. Hello? Hello? Oh, number one...",UNKOWN
96,911_first6sec/call_111_0.wav,"across, forty seven oh nine fourty隔s avenue f...",NEGATIVE
97,911_first6sec/call_112_0.wav,"I have, um, I need somebody escorted out of m...",NEGATIVE
98,911_first6sec/call_114_0.wav,Hello? Hello? Hello? Hello 911? Hello?,UNKOWN


In [None]:
sentiment_df_vader[sentiment_df_vader == 'NEGATIVE'].count()

21

In [None]:
sentiment_df_vader[sentiment_df_vader == 'POSITIVE'].count()

20

In [None]:
import torch

In [None]:
print(torch.cuda.is_available())

False


In [None]:
%conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0

^C

Note: you may need to restart the kernel to use updated packages.
