## Text Extraction from Speech Extraction

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
# !unzip drive/My\ Drive/emotion

In [0]:
import os

In [0]:
import librosa
import librosa.display
import matplotlib.pyplot as plt

In [0]:
import numpy as np
import pandas as pd

In [0]:
classes = os.listdir('meld/train/')

In [9]:
classes

['neutral', 'happy', 'disgust', 'sad', 'fear']

In [0]:
# !rm -rf train_mel_spectrogram_neutral/
# !rm -rf train_mfcc_neutral/
os.mkdir('train_mel_spectrogram_'+str(classes[3]))
os.mkdir('train_mfcc_'+str(classes[3]))

In [0]:
def get_mfcc(file, feature='mfcc'):
  
  # image file
  img = file.replace('.wav', '_mfcc.jpg')
  img = img.split('/')[-1]
  
  # mfcc features
  aud, sr = librosa.load(path=file, duration=6)
  mfcc = librosa.feature.mfcc(y=aud, sr=sr, n_mfcc=40)
  fig = librosa.display.specshow(mfcc, x_axis=None, y_axis=None)
  plt.box(on=False)
  plt.savefig('train_mel_spectrogram_sad/'+img,  pad_inches=0)
  return img

  
def get_mel(file, feature='mel_spec'):
  
  # image file
  img = file.replace('.wav', '_mel.jpg')
  img = img.split('/')[-1]
  
  # get the mel spectrogram
  aud, sr = librosa.load(path=file, duration=6)
  S = librosa.feature.melspectrogram(y=aud, sr=sr, n_mels=128, fmax=8000)
  S_dB = librosa.power_to_db(S, ref=np.max)
  fig = librosa.display.specshow(S_dB, x_axis=None,y_axis=None, sr=sr, fmax=8000)
  
  plt.box(on=False)
  plt.savefig('train_mfcc_sad/'+img,  pad_inches=0)
  
  return img

def get_rms(file, feature='rms'):
  aud, sr = librosa.load(path=file, duration=6)
  S, phase = librosa.magphase(librosa.stft(aud))
  rms = librosa.feature.rms(S=S)
  rms = rms.flatten()
  if rms.shape[0] < 128:
    rms = np.pad(rms, pad_width=128-rms.shape[0],mode='mean')
  elif rms.shape[0] > 128:
    rms = rms[:128]
  return rms

In [0]:
features_train_sad = {}


In [0]:
np.random.seed(42)

#DataFlair - Load the data and extract features for each sound file

def load_data(path, cls=classes[3]):
    
  
    files = np.array(os.listdir(path))
    # downsampling we will take only less than 500 data points
    if files.shape[0] > 700:
      files = files[:700]
      
    i = 0
      
    for file in files:
      data = {}
      
      file_path = path+'/'+file
      mfcc = get_mfcc(str(file_path))
      mel = get_mel(str(file_path))
      rms = get_rms(str(file_path))
      
      data['emotion']=cls
      data['mfcc']=mfcc
      data['mel']=mel
      data['rms']=rms
      
      features_train_sad[file] = data

      
      if i%50==0:
        print('%d of class '%i+cls+" have been processed")
      i+=1
            
    return features_train_sad

In [0]:
features = load_data('meld/train/sad/')

0 of class sad have been processed
50 of class sad have been processed
100 of class sad have been processed
150 of class sad have been processed
200 of class sad have been processed
250 of class sad have been processed
300 of class sad have been processed
350 of class sad have been processed
400 of class sad have been processed
450 of class sad have been processed
500 of class sad have been processed
550 of class sad have been processed
600 of class sad have been processed
650 of class sad have been processed


In [18]:
len(features_train_disgust)

232

In [0]:
features_train_neutral

{'MEL_dia132_utt7_neutral_NEU.wav': {'emotion': 'neutral',
  'mel': 'MEL_dia132_utt7_neutral_NEU_mel.jpg',
  'mfcc': 'MEL_dia132_utt7_neutral_NEU_mfcc.jpg',
  'rms': array([2.4965808 , 2.4152136 , 2.034279  , 1.6537654 , 1.1213706 ,
         0.7525025 , 0.5344433 , 0.3550132 , 0.20237523, 0.16385382,
         0.14949648, 0.1346566 , 0.37447646, 0.67847323, 0.3595522 ,
         0.17867224, 0.32725608, 0.3884981 , 0.38512182, 0.43700805,
         0.5430578 , 0.8714112 , 1.2028465 , 1.3756019 , 1.701273  ,
         1.9711865 , 1.9502308 , 2.0977159 , 2.4824376 , 2.4581842 ,
         2.5343332 , 2.937641  , 3.0486612 , 3.1345818 , 3.191331  ,
         3.245764  , 3.4322326 , 3.1342478 , 2.9231567 , 3.085519  ,
         2.9615529 , 2.712703  , 2.6623683 , 2.6814356 , 2.405747  ,
         2.6670952 , 2.8521438 , 2.825118  , 2.876135  , 2.5454223 ,
         2.1073196 , 2.2775664 , 2.5959055 , 2.6897476 , 2.7100751 ,
         2.8524923 , 2.9586327 , 2.933026  , 2.743803  , 2.6478732 ,
        

In [20]:
len(os.listdir('train_mel_spectrogram_disgust/'))

232

In [21]:
len(os.listdir('train_mfcc_disgust/'))

232

In [23]:
!zip -r train_mel_spectrogram_disgust.zip train_mel_spectrogram_disgust/

  adding: train_mel_spectrogram_disgust/ (stored 0%)
  adding: train_mel_spectrogram_disgust/MEL_dia237_utt5_negative_DIS_mfcc.jpg (deflated 4%)
  adding: train_mel_spectrogram_disgust/MEL_dia275_utt12_negative_DIS_mfcc.jpg (deflated 4%)
  adding: train_mel_spectrogram_disgust/MEL_dia1010_utt4_negative_DIS_mfcc.jpg (deflated 12%)
  adding: train_mel_spectrogram_disgust/MEL_dia615_utt2_negative_DIS_mfcc.jpg (deflated 7%)
  adding: train_mel_spectrogram_disgust/MEL_dia554_utt9_negative_DIS_mfcc.jpg (deflated 5%)
  adding: train_mel_spectrogram_disgust/MEL_dia861_utt14_negative_DIS_mfcc.jpg (deflated 8%)
  adding: train_mel_spectrogram_disgust/MEL_dia482_utt6_negative_DIS_mfcc.jpg (deflated 6%)
  adding: train_mel_spectrogram_disgust/MEL_dia431_utt19_negative_DIS_mfcc.jpg (deflated 7%)
  adding: train_mel_spectrogram_disgust/MEL_dia231_utt11_negative_DIS_mfcc.jpg (deflated 6%)
  adding: train_mel_spectrogram_disgust/MEL_dia396_utt0_negative_DIS_mfcc.jpg (deflated 6%)
  adding: train_mel_s

In [24]:
!zip -r train_mfcc_disgust.zip train_mfcc_disgust/

  adding: train_mfcc_disgust/ (stored 0%)
  adding: train_mfcc_disgust/MEL_dia945_utt0_negative_DIS_mel.jpg (deflated 2%)
  adding: train_mfcc_disgust/MEL_dia281_utt8_negative_DIS_mel.jpg (deflated 2%)
  adding: train_mfcc_disgust/MEL_dia754_utt1_negative_DIS_mel.jpg (deflated 2%)
  adding: train_mfcc_disgust/MEL_dia151_utt0_negative_DIS_mel.jpg (deflated 3%)
  adding: train_mfcc_disgust/MEL_dia515_utt3_negative_DIS_mel.jpg (deflated 2%)
  adding: train_mfcc_disgust/MEL_dia322_utt0_negative_DIS_mel.jpg (deflated 2%)
  adding: train_mfcc_disgust/MEL_dia1038_utt7_negative_DIS_mel.jpg (deflated 2%)
  adding: train_mfcc_disgust/MEL_dia1038_utt5_negative_DIS_mel.jpg (deflated 2%)
  adding: train_mfcc_disgust/MEL_dia992_utt14_negative_DIS_mel.jpg (deflated 2%)
  adding: train_mfcc_disgust/MEL_dia236_utt11_negative_DIS_mel.jpg (deflated 2%)
  adding: train_mfcc_disgust/MEL_dia1027_utt6_negative_DIS_mel.jpg (deflated 2%)
  adding: train_mfcc_disgust/MEL_dia170_utt4_negative_DIS_mel.jpg (deflat

In [0]:
import pickle

In [0]:
with open('spectral_features_train_disgust.pkl', 'wb') as fp:
  pickle.dump(features_train_disgust, fp)