In [4]:
# Importing required libraries 
# Keras
import keras
from keras import regularizers
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
from keras_preprocessing.sequence import pad_sequences
from keras.models import Sequential, Model, model_from_json
from keras.layers import Dense, Embedding, LSTM
from keras.layers import Input, Flatten, Dropout, Activation, BatchNormalization
from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
from keras.utils import np_utils, to_categorical
from keras.callbacks import ModelCheckpoint

# sklearn
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Import libraries 
import librosa
import librosa.display
import json
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from matplotlib.pyplot import specgram
import pandas as pd
import seaborn as sns
import glob 
import wave
from sklearn.metrics import confusion_matrix
import IPython.display as ipd 
import os
import pickle
import sys
import warnings
from tensorflow.python.keras import optimizers
import pyaudio
import h5py

# ignore warnings 
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 

RAVDESS = "data/RAVDESS/"

In [7]:
# RAVDESS data handling
# 1.Get data of RAVESS

RAVDESS_list = os.listdir(RAVDESS)
RAVDESS_list.sort()

emotion = []
gender = []
path = []

for i in RAVDESS_list:
    folder_name = os.listdir(RAVDESS + i)
    for f in folder_name:
        part = f.split('.')[0].split('-')
        emotion.append(int(part[2]))
        path.append(RAVDESS + i + '/' + f)
        
RAVDESS_data_frame = pd.DataFrame(emotion)
RAVDESS_data_frame = RAVDESS_data_frame.replace({1:'neutral', 2:'positive', 3:'positive', 4:'negative', 5:'negative', 6:'negative', 7:'negative', 8:'positive'})
RAVDESS_data_frame = pd.concat([pd.DataFrame(gender),RAVDESS_data_frame],axis=1)
RAVDESS_data_frame.columns = ['emotion']
RAVDESS_data_frame['labels'] = RAVDESS_data_frame.emotion
RAVDESS_data_frame['source'] = 'RAVDESS'  
RAVDESS_data_frame = pd.concat([RAVDESS_data_frame,pd.DataFrame(path, columns = ['path'])],axis=1)
RAVDESS_data_frame = RAVDESS_data_frame.drop(['emotion'], axis=1)
print(RAVDESS_data_frame.labels.value_counts())
RAVDESS_data_frame.head()
RAVDESS_data_frame.to_csv("Data_path_3type.csv",index=False)

negative    768
positive    576
neutral      96
Name: labels, dtype: int64


In [11]:
ref = pd.read_csv("Data_path_3type.csv")

data_frame = pd.DataFrame(columns=['feature'])

# loop feature extraction over the entire dataset
counter=0
for index,path in enumerate(ref.path):
    X, sample_rate = librosa.load(path
                                  , res_type='kaiser_fast'
                                  ,duration=2.5
                                  ,sr=44100*2
                                  ,offset=0.5
                                 )
    sample_rate = np.array(sample_rate)
    
    # mean as the feature. Could do min and max etc as well. 
    mfccs = np.mean(librosa.feature.mfcc(y=X, 
                                        sr=sample_rate, 
                                        n_mfcc=13),
                    axis=0)
    data_frame.loc[counter] = [mfccs]
    counter=counter+1   

# Check a few records to make sure its processed successfully
print(len(data_frame))

1440


In [12]:
data_frame = pd.concat([ref,pd.DataFrame(data_frame['feature'].values.tolist())],axis=1)
data_frame = data_frame.fillna(0)
print(data_frame.shape)
data_frame[:5]

(1440, 434)


Unnamed: 0,labels,source,path,0,1,2,3,4,5,6,...,421,422,423,424,425,426,427,428,429,430
0,neutral,RAVDESS,data/RAVDESS/Actor_01/03-01-01-01-01-01-01.wav,-66.685966,-66.685966,-66.685966,-66.685966,-66.685966,-66.685966,-66.685966,...,-63.782047,-65.159958,-66.379623,-66.370964,-65.050102,-62.615089,-63.867981,-66.252884,-65.178276,-64.132698
1,neutral,RAVDESS,data/RAVDESS/Actor_01/03-01-01-01-01-02-01.wav,-64.795509,-61.218655,-63.572693,-66.222389,-66.224869,-63.284908,-60.982262,...,-66.224869,-66.224869,-66.224869,-66.224869,-66.224869,-66.224869,-66.224869,-66.224869,-66.224869,-66.224869
2,neutral,RAVDESS,data/RAVDESS/Actor_01/03-01-01-01-02-01-01.wav,-66.986763,-66.986763,-66.986763,-66.986763,-66.986763,-66.986763,-66.986763,...,-65.829056,-66.891724,-66.986763,-66.986763,-66.986763,-66.986763,-66.986763,-66.986763,-66.986763,-66.986763
3,neutral,RAVDESS,data/RAVDESS/Actor_01/03-01-01-01-02-02-01.wav,-66.507835,-66.507835,-66.507835,-66.507835,-66.507835,-66.507835,-66.507835,...,-63.659512,-61.913033,-64.136932,-65.742035,-66.507835,-64.259415,-61.868252,-61.696602,-61.834988,-60.222519
4,positive,RAVDESS,data/RAVDESS/Actor_01/03-01-02-01-01-01-01.wav,-70.70639,-70.70639,-70.70639,-70.70639,-70.70639,-70.70639,-70.70639,...,-57.867104,-60.163128,-62.258419,-65.114487,-63.698933,-62.085384,-60.464443,-59.918438,-60.530823,-60.381058


In [13]:
# Split between train and test 
X_train, X_test, y_train, y_test = train_test_split(data_frame.drop(['path','labels','source'],axis=1)
                                                    , data_frame.labels
                                                    , test_size=0.25
                                                    , shuffle=True
                                                    , random_state=42
                                                   )
# Check the dataset now 
X_train.head()
X_test.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,421,422,423,424,425,426,427,428,429,430
168,-44.382664,-47.018936,-54.718075,-50.243736,-51.747402,-55.910515,-58.261299,-56.977737,-53.866367,-56.098293,...,-24.030037,-23.761675,-23.705111,-24.690458,-23.974962,-24.142874,-24.684908,-26.125843,-27.759472,-27.305826
605,-69.63311,-69.830307,-57.556004,-53.482651,-53.80809,-53.371056,-54.884232,-54.602116,-53.492924,-54.011311,...,-54.089188,-55.895615,-54.90139,-55.646564,-58.701538,-57.238113,-56.695328,-59.327534,-58.13623,-57.129715
548,-51.624596,-52.237976,-52.421047,-46.545433,-48.257938,-49.996357,-49.724094,-53.130497,-51.704124,-52.405331,...,-47.864788,-46.920864,-48.456944,-48.651222,-51.213448,-51.993599,-49.663742,-47.144775,-49.029716,-52.649437
65,-66.581444,-66.581444,-66.581444,-66.581444,-66.581444,-66.581444,-66.581444,-66.581444,-66.581444,-66.581444,...,-32.184643,-32.871849,-31.989889,-32.139725,-32.033432,-33.708683,-34.899204,-37.167942,-36.603119,-37.974979
628,-45.586433,-47.635994,-50.17952,-50.912094,-50.11676,-49.17947,-49.236221,-48.963261,-47.886662,-47.749001,...,-47.900581,-45.468338,-49.760113,-49.91394,-50.477028,-50.067268,-51.429726,-52.603394,-53.560162,-55.155289


In [14]:
# Lts do data normalization 
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)

X_train = (X_train - mean)/std
X_test = (X_test - mean)/std

# Check the dataset now 
X_train.head()
X_test.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,421,422,423,424,425,426,427,428,429,430
168,1.422069,1.095086,0.216448,0.739628,0.548048,0.057818,-0.224314,-0.074326,0.295085,0.031048,...,1.871969,1.916299,1.930007,1.866689,1.94229,1.938418,1.916531,1.819044,1.650641,1.434452
605,-1.473871,-1.582902,-0.124789,0.354127,0.305237,0.35745,0.176941,0.207883,0.339306,0.277622,...,-0.566869,-0.70291,-0.611386,-0.655114,-0.892077,-0.755273,-0.702835,-0.915805,-0.798045,-0.700013
548,0.591501,0.482386,0.492646,1.179807,0.959211,0.755632,0.790056,0.382702,0.551127,0.467366,...,-0.061854,0.028614,-0.086391,-0.085246,-0.280903,-0.328411,-0.127451,0.087698,-0.063964,-0.379364
65,-1.12388,-1.201495,-1.210022,-1.204914,-1.199846,-1.20125,-1.212892,-1.215185,-1.210573,-1.207521,...,1.210349,1.173737,1.25509,1.259843,1.284561,1.159836,1.08071,0.909498,0.93775,0.670872
628,1.28401,1.022645,0.762171,0.660079,0.740187,0.852017,0.848024,0.877742,1.003173,1.017504,...,-0.064758,0.147008,-0.192554,-0.188112,-0.220797,-0.171623,-0.271959,-0.361932,-0.429166,-0.558705


In [15]:
# Lets few preparation steps to get it into the correct format for Keras 
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

# one hot encode the target 
lb = LabelEncoder()
y_train = np_utils.to_categorical(lb.fit_transform(y_train))
y_test = np_utils.to_categorical(lb.fit_transform(y_test))

print(X_train.shape)
print(lb.classes_)
# print(y_train[0:10])
# print(y_test[0:10])

# Pickel the lb object for future use 
filename = 'labels'
outfile = open(filename,'wb')
pickle.dump(lb,outfile)
outfile.close()

(1080, 431)
['negative' 'neutral' 'positive']


In [16]:
X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)
X_train.shape

(1080, 431, 1)

In [17]:
model = Sequential()
model.add(Conv1D(256, 8, padding='same',input_shape=(X_train.shape[1],1)))  # X_train.shape[1] = No. of Columns
model.add(Activation('relu'))
model.add(Conv1D(256, 8, padding='same'))
# model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(MaxPooling1D(pool_size=(8)))
model.add(Conv1D(128, 8, padding='same'))
model.add(Activation('relu'))
model.add(Conv1D(128, 8, padding='same'))
model.add(Activation('relu'))
model.add(Conv1D(128, 8, padding='same'))
model.add(Activation('relu'))
model.add(Conv1D(128, 8, padding='same'))
# model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(MaxPooling1D(pool_size=(8)))
model.add(Conv1D(64, 8, padding='same'))
model.add(Activation('relu'))
model.add(Conv1D(64, 8, padding='same'))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(3)) # Target class number
model.add(Activation('softmax'))
# opt = keras.optimizers.SGD(lr=0.0001, momentum=0.0, decay=0.0, nesterov=False)
# opt = keras.optimizers.Adam(lr=0.0001)
opt = keras.optimizers.RMSprop(lr=0.00001, decay=1e-6)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 431, 256)          2304      
                                                                 
 activation (Activation)     (None, 431, 256)          0         
                                                                 
 conv1d_1 (Conv1D)           (None, 431, 256)          524544    
                                                                 
 activation_1 (Activation)   (None, 431, 256)          0         
                                                                 
 dropout (Dropout)           (None, 431, 256)          0         
                                                                 
 max_pooling1d (MaxPooling1D  (None, 53, 256)          0         
 )                                                               
                                                        

In [18]:
model.compile(loss='categorical_crossentropy', optimizer=opt,metrics=['accuracy'])
model_history=model.fit(X_train, y_train, batch_size=14, epochs=100, validation_data=(X_test, y_test))

Epoch 1/100
13/78 [====>.........................] - ETA: 18s - loss: 1.0778 - accuracy: 0.5769

KeyboardInterrupt: 

In [None]:
plt.plot(model_history.history['loss'])
plt.plot(model_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# Save model and weights
model_name = 'Emotion_Model_3type.h5'
save_dir = os.path.join(os.getcwd(), 'saved_models')

if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Save model and weights at %s ' % model_path)

# Save the model to disk
model_json = model.to_json()
with open('model_json.json', "w") as json_file:
    json_file.write(model_json)

In [None]:
# loading json and model architecture 
json_file = open('model_json.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

# load weights into new model
loaded_model.load_weights("saved_models/Emotion_Model.h5")
print("Loaded model from disk")
 
# Keras optimiser
opt = keras.optimizers.RMSprop(lr=0.00001, decay=1e-6)
loaded_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
score = loaded_model.evaluate(X_test, y_test, verbose=0)
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))