In [1]:
import os

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

# librosa is a Python library for analyzing audio and music.
# It can be used to extract the data from the audio files
import librosa 
import librosa.display
import audioread
# to play the audio files
from IPython.display import Audio
plt.style.use('seaborn-white')
import soundfile

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

import sys

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report

import keras
from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization
from keras.utils import np_utils, to_categorical
from keras.callbacks import ModelCheckpoint

import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [2]:
SAVEE = "/Users/kokilareddy/Downloads/download/AudioData/"

In [3]:
# Get the data location for SAVEE
dir_list = os.listdir(SAVEE)

# parse the filename to get the emotions
emotion=[]
path = []
for i in dir_list:
    if i[-8:-6]=='_a':
        emotion.append('angry')
    elif i[-8:-6]=='_d':
        emotion.append('disgust')
    elif i[-8:-6]=='_f':
        emotion.append('fear')
    elif i[-8:-6]=='_h':
        emotion.append('happy')
    elif i[-8:-6]=='_n':
        emotion.append('neutral')
    elif i[-8:-6]=='sa':
        emotion.append('sad')
    elif i[-8:-6]=='su':
        emotion.append('surprise')
    else:
        emotion.append('unknown') 
    path.append(SAVEE + i)

# Now check out the label count distribution 
SAVEE_df = pd.DataFrame(emotion, columns = ['Emotion'])
SAVEE_df = pd.concat([SAVEE_df, pd.DataFrame(path, columns = ['Path'])], axis = 1)
print('SAVEE dataset')
SAVEE_df.head()

SAVEE dataset


Unnamed: 0,Emotion,Path
0,sad,/Users/kokilareddy/Downloads/download/AudioDat...
1,sad,/Users/kokilareddy/Downloads/download/AudioDat...
2,neutral,/Users/kokilareddy/Downloads/download/AudioDat...
3,surprise,/Users/kokilareddy/Downloads/download/AudioDat...
4,neutral,/Users/kokilareddy/Downloads/download/AudioDat...


In [4]:
SAVEE_df['Gender']='Male'

In [5]:
Audiodata=SAVEE_df

In [6]:
Audiodata

Unnamed: 0,Emotion,Path,Gender
0,sad,/Users/kokilareddy/Downloads/download/AudioDat...,Male
1,sad,/Users/kokilareddy/Downloads/download/AudioDat...,Male
2,neutral,/Users/kokilareddy/Downloads/download/AudioDat...,Male
3,surprise,/Users/kokilareddy/Downloads/download/AudioDat...,Male
4,neutral,/Users/kokilareddy/Downloads/download/AudioDat...,Male
...,...,...,...
475,angry,/Users/kokilareddy/Downloads/download/AudioDat...,Male
476,angry,/Users/kokilareddy/Downloads/download/AudioDat...,Male
477,neutral,/Users/kokilareddy/Downloads/download/AudioDat...,Male
478,surprise,/Users/kokilareddy/Downloads/download/AudioDat...,Male


In [7]:
Audiodata.Emotion.unique() 

array(['sad', 'neutral', 'surprise', 'fear', 'disgust', 'happy', 'angry'],
      dtype=object)

In [9]:
Audiodata['Paths']=Audiodata['Path']+"@"+Audiodata['Emotion']
Paths=Audiodata['Paths']

In [10]:
# LSTM
import keras
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import *
from keras.optimizers import RMSprop

In [11]:
def extract_mfcc(wav_file_name):
    #This function extracts mfcc features and obtain the mean of each dimension
    #Input : path_to_wav_file
    #Output: mfcc_features'''
    data,sr=librosa.load(wav_file_name , res_type='kaiser_fast')
    result = np.array([])    
    mfccs = librosa.feature.mfcc(y=data, sr=sr, n_mfcc=40)
    mfccs_processed = np.mean(mfccs.T,axis=0)
    return mfccs_processed

Audio_labels = []
Audio_data = []
for file in Paths:
    print(file)
    File=file.split("@")[0]
    emotion=file.split("@")[1]
    Audio_data.append(extract_mfcc(File)) # extract MFCC features/file
    if(emotion == "angry"):
        Audio_labels.append([0])
    elif(emotion == "neutral"):
        Audio_labels.append([1])
    elif(emotion == "disgust"):
        Audio_labels.append([2])
    elif(emotion == "sad"):
        Audio_labels.append([3])
    elif(emotion == "fear"):
        Audio_labels.append([4])
    elif(emotion == "happy"):
        Audio_labels.append([5])
    elif(emotion == "surprise"):
        Audio_labels.append([6])

/Users/kokilareddy/Downloads/download/AudioData/JK_sa01.wav@sad
/Users/kokilareddy/Downloads/download/AudioData/JK_sa15.wav@sad
/Users/kokilareddy/Downloads/download/AudioData/DC_n13.wav@neutral
/Users/kokilareddy/Downloads/download/AudioData/DC_su09.wav@surprise
/Users/kokilareddy/Downloads/download/AudioData/DC_n07.wav@neutral
/Users/kokilareddy/Downloads/download/AudioData/JK_n20.wav@neutral
/Users/kokilareddy/Downloads/download/AudioData/JK_n08.wav@neutral
/Users/kokilareddy/Downloads/download/AudioData/JE_sa08.wav@sad
/Users/kokilareddy/Downloads/download/AudioData/JK_f15.wav@fear
/Users/kokilareddy/Downloads/download/AudioData/JK_f01.wav@fear
/Users/kokilareddy/Downloads/download/AudioData/KL_sa13.wav@sad
/Users/kokilareddy/Downloads/download/AudioData/KL_sa07.wav@sad
/Users/kokilareddy/Downloads/download/AudioData/JK_d03.wav@disgust
/Users/kokilareddy/Downloads/download/AudioData/DC_h01.wav@happy
/Users/kokilareddy/Downloads/download/AudioData/DC_h15.wav@happy
/Users/kokilareddy

/Users/kokilareddy/Downloads/download/AudioData/KL_h04.wav@happy
/Users/kokilareddy/Downloads/download/AudioData/DC_sa04.wav@sad
/Users/kokilareddy/Downloads/download/AudioData/DC_sa10.wav@sad
/Users/kokilareddy/Downloads/download/AudioData/KL_d09.wav@disgust
/Users/kokilareddy/Downloads/download/AudioData/JE_n29.wav@neutral
/Users/kokilareddy/Downloads/download/AudioData/JK_a05.wav@angry
/Users/kokilareddy/Downloads/download/AudioData/JK_a11.wav@angry
/Users/kokilareddy/Downloads/download/AudioData/JE_n15.wav@neutral
/Users/kokilareddy/Downloads/download/AudioData/JE_n01.wav@neutral
/Users/kokilareddy/Downloads/download/AudioData/JE_f08.wav@fear
/Users/kokilareddy/Downloads/download/AudioData/JE_h07.wav@happy
/Users/kokilareddy/Downloads/download/AudioData/JE_h13.wav@happy
/Users/kokilareddy/Downloads/download/AudioData/JE_su11.wav@surprise
/Users/kokilareddy/Downloads/download/AudioData/JE_su05.wav@surprise
/Users/kokilareddy/Downloads/download/AudioData/KL_n02.wav@neutral
/Users/kok

/Users/kokilareddy/Downloads/download/AudioData/KL_su01.wav@surprise
/Users/kokilareddy/Downloads/download/AudioData/KL_su15.wav@surprise
/Users/kokilareddy/Downloads/download/AudioData/JE_f03.wav@fear
/Users/kokilareddy/Downloads/download/AudioData/JE_n22.wav@neutral
/Users/kokilareddy/Downloads/download/AudioData/JK_su13.wav@surprise
/Users/kokilareddy/Downloads/download/AudioData/JK_su07.wav@surprise
/Users/kokilareddy/Downloads/download/AudioData/KL_d02.wav@disgust
/Users/kokilareddy/Downloads/download/AudioData/DC_a15.wav@angry
/Users/kokilareddy/Downloads/download/AudioData/DC_a01.wav@angry
/Users/kokilareddy/Downloads/download/AudioData/KL_f14.wav@fear
/Users/kokilareddy/Downloads/download/AudioData/KL_f01.wav@fear
/Users/kokilareddy/Downloads/download/AudioData/KL_f15.wav@fear
/Users/kokilareddy/Downloads/download/AudioData/DC_a14.wav@angry
/Users/kokilareddy/Downloads/download/AudioData/KL_d03.wav@disgust
/Users/kokilareddy/Downloads/download/AudioData/JK_su06.wav@surprise
/Us

/Users/kokilareddy/Downloads/download/AudioData/JE_sa14.wav@sad
/Users/kokilareddy/Downloads/download/AudioData/DC_d10.wav@disgust
/Users/kokilareddy/Downloads/download/AudioData/DC_d04.wav@disgust
/Users/kokilareddy/Downloads/download/AudioData/JK_f09.wav@fear
/Users/kokilareddy/Downloads/download/AudioData/JK_n28.wav@neutral
/Users/kokilareddy/Downloads/download/AudioData/JE_a04.wav@angry
/Users/kokilareddy/Downloads/download/AudioData/JE_a10.wav@angry
/Users/kokilareddy/Downloads/download/AudioData/JK_n14.wav@neutral
/Users/kokilareddy/Downloads/download/AudioData/DC_su01.wav@surprise
/Users/kokilareddy/Downloads/download/AudioData/DC_su15.wav@surprise
/Users/kokilareddy/Downloads/download/AudioData/DC_n27.wav@neutral
/Users/kokilareddy/Downloads/download/AudioData/JK_sa09.wav@sad
/Users/kokilareddy/Downloads/download/AudioData/DC_n25.wav@neutral
/Users/kokilareddy/Downloads/download/AudioData/DC_n19.wav@neutral
/Users/kokilareddy/Downloads/download/AudioData/DC_su03.wav@surprise
/U

In [12]:
Audio_labels_array = np.array(Audio_labels)
Audio_data_array = np.array(Audio_data)

In [13]:
Audio_data_array.shape,Audio_labels_array.shape

((480, 40), (480, 1))

In [14]:
labels_categorical = to_categorical(Audio_labels_array)
labels_categorical

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [15]:
x_train,x_test,y_train,y_test= train_test_split(np.array(Audio_data_array),
                                                labels_categorical,
                                                test_size=0.20,
                                                random_state=9)

In [16]:
# Split the training, validating, and testing sets
number_of_samples = Audio_data_array.shape[0]
training_samples = int(number_of_samples * 0.8)
validation_samples = int(number_of_samples * 0.1)
test_samples = int(number_of_samples * 0.1)

In [17]:
# making our data compatible to model.
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((384, 40, 1), (384, 7), (96, 40, 1), (96, 7))

In [18]:
def create_model_LSTM():
    model = Sequential()
    model.add(LSTM(units = 128, return_sequences = True, input_shape = (x_train.shape[1], 1)))
    model.add(Dropout(0.2))
    model.add(LSTM(units = 128, return_sequences = True))
    model.add(Dropout(0.2))
    model.add(LSTM(units = 64, return_sequences = True))
    model.add(Dropout(0.2))
    model.add(LSTM(units = 64))
    model.add(Dropout(0.2))
    model.add(Dense(units = 32))
    model.add(Dense(64))
    model.add(Dropout(0.4))
    model.add(Activation('relu'))
    model.add(Dense(32))
    model.add(Dropout(0.4))
    model.add(Activation('relu'))
    model.add(Dense(7))
    model.add(Activation('softmax'))

    model.compile(optimizer = 'adam', loss = 'binary_crossentropy',metrics=['accuracy'])
    return model

In [19]:
model_A = create_model_LSTM()

In [25]:
history=model_A.fit(x_train,y_train,
    epochs=150, shuffle=True , batch_size = 128)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

Epoch 84/150
Epoch 85/150
Epoch 86/150
Epoch 87/150
Epoch 88/150
Epoch 89/150
Epoch 90/150
Epoch 91/150
Epoch 92/150
Epoch 93/150
Epoch 94/150
Epoch 95/150
Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150
Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150
Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150


In [21]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((384, 40, 1), (384, 7), (96, 40, 1), (96, 7))

In [45]:
path_ = "/Users/kokilareddy/Downloads/download/AudioData/KL_sa07.wav"

import IPython.display as ipd
ipd.Audio(path_)
a = extract_mfcc(path_)
a1 = np.asarray(a)
q = np.expand_dims(a1,1)
qq = np.expand_dims(q,0)
pred = model_A.predict(qq)
preds=pred.argmax(axis=1)
print(preds[0])
if preds[0]==0:
    print("Angry")
elif preds[0]==1:
    print("Neutral")
elif preds[0]==2:
    print("Disgust")
elif preds[0]==3:
    print("Sad")
elif preds[0]==4:
    print("Fear")
elif preds[0]==5:
    print("Happy")
elif preds[0]==6:
    print("Surprise")
elif preds[0]==7:
    print("Calm")

3
Sad


In [43]:
# predicting on test data.
pred_test = model_A.predict(x_test)
y_pred = pred_test.argmax(axis=1)
# y_test = y_test.argmax(axis=1)

In [44]:
# - Calculate the accuracy of our model
accuracy=accuracy_score(y_test, y_pred )

# - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 53.12%


In [95]:
import sounddevice as sd
import numpy as np
from scipy.io.wavfile import write 
# import wavio as wv
import scipy.io.wavfile as wav

fs=44100
duration = 5  # seconds
myrecording = sd.rec(duration * fs, samplerate=fs, channels=2,dtype='float64')
print("Recording Audio")
sd.wait()
print("Audio recording complete ,checking emotion")
write("recording0.wav", fs, myrecording)
path_="recording0.wav"
import IPython.display as ipd
ipd.Audio(path_)
a = extract_mfcc(path_)
a1 = np.asarray(a)
q = np.expand_dims(a1,1)
qq = np.expand_dims(q,0)
pred = model_A.predict(qq)
preds=pred.argmax(axis=1)
print(preds[0])
if preds[0]==0:
    print("Angry")
elif preds[0]==1:
    print("Neutral")
elif preds[0]==2:
    print("Disgust")
elif preds[0]==3:
    print("Sad")
elif preds[0]==4:
    print("Fear")
elif preds[0]==5:
    print("Happy")
elif preds[0]==6:
    print("Surprise")
elif preds[0]==7:
    print("Calm")

Recording Audio
Audio recording complete ,checking emotion
0
Angry
