In [1]:
import os

# 1) Manipulating Audio Files using PyDub

In [2]:
# !pip install pydub

from pydub import AudioSegment
from pydub.effects import normalize

In [3]:
# Create function to convert audio file to wav
def convert_to_wav(filename):
    """Takes an audio file of non .wav format and converts to .wav"""
    # Import audio file
    audio = AudioSegment.from_file(filename)
    
    # Select first 60s of the audio file
    audio = audio[:60000]
    
    # Create new filename
    new_filename = os.path.splitext(os.path.basename(filename))[0] + ".wav"
    
    # Export file as .wav
    audio.export('./input/wav/'+new_filename, format='wav')
    print(f"Converting {filename} to {'./input/wav/'+new_filename}...")

In [4]:
def show_pydub_stats(filename):
    """Returns different audio attributes related to an audio file."""
    # Create AudioSegment instance
    audio_segment = AudioSegment.from_file(filename)
    
    # Print audio attributes and return AudioSegment instance
    print(f"Channels: {audio_segment.channels}")
    print(f"Sample width: {audio_segment.sample_width}")
    print(f"Frame rate (sample rate): {audio_segment.frame_rate}")
    print(f"Frame width: {audio_segment.frame_width}")
    print(f"Length (ms): {len(audio_segment)}")
    return audio_segment

# 2) Speech Recognition using free google api

In [5]:
# !pip install SpeechRecognition

import speech_recognition as sr

In [6]:
def transcribe_audio(filename):
    """Takes a .wav format audio file and transcribes it to text."""
    # Setup a recognizer instance
    recognizer = sr.Recognizer()
    
    # Import the audio file and convert to audio data
    audio_file = sr.AudioFile(filename)

    with audio_file as source:
        audio_data = recognizer.record(source)
    
    # Return the transcribed text
    return recognizer.recognize_google(audio_data)

In [7]:
# Convert mp3 file to wav
convert_to_wav('./input/mp3/speech1.mp3')

# Check the stats of new file
speech1 = show_pydub_stats("./input/wav/speech1.wav")

# Transcribe wav file
# print(transcribe_audio("./input/wav/speech1.wav"))

print('----------------------------------------------------------------------------------')

# Split speech1 to mono (in order to have smaller size)
speech1_split = speech1.split_to_mono()

# Export channel 2 
speech1_split[1].export("./input/wav/speech1_channel_2.wav", format="wav")

# Transcribe the single channel
print(transcribe_audio("./input/wav/speech1_channel_2.wav"))

Converting ./input/mp3/speech1.mp3 to ./input/wav/speech1.wav...
Channels: 2
Sample width: 2
Frame rate (sample rate): 44100
Frame width: 4
Length (ms): 60000
----------------------------------------------------------------------------------
Dr Brooks said about the virus now being extraordinarily widespread in this country in rural areas in urban areas with the president lashed out at her but he was pressed about this late today and her White House course planner breach of Scott tonight calling her pathetic after she wore in the pandemic has entered a new phase and the virus is extraordinarily widespread the virus is receding in hotspots across the South and West we've seen slow improvement this is the first time the president has probably gone after coronavirus taskforce coordinator where's The West Wing office in Breaston regularly


# A) nltk

In [8]:
# !pip install nltk

# import nltk
# nltk.download('vader_lexicon')
# nltk.download('punkt')

from nltk import sent_tokenize
from nltk.sentiment.vader import SentimentIntensityAnalyzer

### 1) Sentiment analysis

In [9]:
# Create SentimentIntensityAnalyzer instance
sid = SentimentIntensityAnalyzer()

# Transcribe the single channel
speech1_text = transcribe_audio("./input/wav/speech1_channel_2.wav")

# Display text and sentiment polarity scores
print(speech1_text)
print(sid.polarity_scores(speech1_text))

Dr Brooks said about the virus now being extraordinarily widespread in this country in rural areas in urban areas with the president lashed out at her but he was pressed about this late today and her White House course planner breach of Scott tonight calling her pathetic after she wore in the pandemic has entered a new phase and the virus is extraordinarily widespread the virus is receding in hotspots across the South and West we've seen slow improvement this is the first time the president has probably gone after coronavirus taskforce coordinator where's The West Wing office in Breaston regularly
{'neg': 0.047, 'neu': 0.915, 'pos': 0.037, 'compound': -0.2617}


### 2) doc splits into sentences

In [10]:
'''If paid google speech recognition api was used, it can provide texts with full stop between sentences'''

# Split speech1_text into sentences and score each
for sentence in sent_tokenize(speech1_text):
    print(sentence)
    print(sid.polarity_scores(sentence))

Dr Brooks said about the virus now being extraordinarily widespread in this country in rural areas in urban areas with the president lashed out at her but he was pressed about this late today and her White House course planner breach of Scott tonight calling her pathetic after she wore in the pandemic has entered a new phase and the virus is extraordinarily widespread the virus is receding in hotspots across the South and West we've seen slow improvement this is the first time the president has probably gone after coronavirus taskforce coordinator where's The West Wing office in Breaston regularly
{'neg': 0.047, 'neu': 0.915, 'pos': 0.037, 'compound': -0.2617}


## B) spaCy

In [11]:
# !python -m spacy download en_core_web_sm

In [12]:
import spacy

# Transcribe the single channel
# speech1_text = transcribe_audio("./input/wav/speech1_channel_2.wav")

# Create a spaCy language model instance
nlp = spacy.load("en")

# Create a spaCy doc with call 4 channel 2 text
doc = nlp(speech1_text)

# Check the type of doc
print(type(doc))

<class 'spacy.tokens.doc.Doc'>


### 1) tokenization

In [13]:
# Show tokens in doc
for token in doc:
    print(token.text, token.idx)

Dr 0
Brooks 3
said 10
about 15
the 21
virus 25
now 31
being 35
extraordinarily 41
widespread 57
in 68
this 71
country 76
in 84
rural 87
areas 93
in 99
urban 102
areas 108
with 114
the 119
president 123
lashed 133
out 140
at 144
her 147
but 151
he 155
was 158
pressed 162
about 170
this 176
late 181
today 186
and 192
her 196
White 200
House 206
course 212
planner 219
breach 227
of 234
Scott 237
tonight 243
calling 251
her 259
pathetic 263
after 272
she 278
wore 282
in 287
the 290
pandemic 294
has 303
entered 307
a 315
new 317
phase 321
and 327
the 331
virus 335
is 341
extraordinarily 344
widespread 360
the 371
virus 375
is 381
receding 384
in 393
hotspots 396
across 405
the 412
South 416
and 422
West 426
we 431
've 433
seen 437
slow 442
improvement 447
this 459
is 464
the 467
first 471
time 477
the 482
president 486
has 496
probably 500
gone 509
after 514
coronavirus 520
taskforce 532
coordinator 542
where 554
's 559
The 562
West 566
Wing 571
office 576
in 583
Breaston 586
regularly 595


### 2) doc splits into sentences

In [14]:
for sentence in doc.sents:
    print(sentence)
    print('')

Dr Brooks said about the virus now being extraordinarily widespread in this country in rural areas in urban areas with the president lashed out at her but he was pressed about this late today and her White House course planner breach of Scott tonight calling her pathetic after she wore in the pandemic has entered a new phase and the virus is extraordinarily widespread the virus is receding in hotspots across the South and West we've seen slow improvement

this is the first time the president has probably gone after coronavirus taskforce coordinator where's The West Wing office in Breaston regularly



### 3) Name entity

In [15]:
for entity in doc.ents:
    print(entity.text, entity.label_)

Brooks ORG
about this late today DATE
White House ORG
Scott GPE
tonight TIME
South LOC
West LOC
first ORDINAL
West Wing GPE
Breaston PERSON
