In [109]:
# Import libraries 
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from matplotlib.pyplot import specgram
import pandas as pd
import glob 
from sklearn.metrics import confusion_matrix
import IPython.display as ipd  # To play sound in the notebook
import os
import sys
import warnings
# ignore warnings 
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [110]:
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))

TESS = "/kaggle/input/toronto-emotional-speech-set-tess/tess toronto emotional speech set data/TESS Toronto emotional speech set data/"
RAV = "/kaggle/input/ravdess-emotional-speech-audio/audio_speech_actors_01-24/"




In [111]:
dir_list = os.listdir(RAV)
dir_list.sort()

emotion = []
gender = []
path = []
for i in dir_list:
    fname = os.listdir(RAV + i)
    for f in fname:
        part = f.split('.')[0].split('-')
        emotion.append(int(part[2]))
        temp = int(part[6])
        if temp%2 == 0:
            temp = "female"
        else:
            temp = "male"
        gender.append(temp)
        path.append(RAV + i + '/' + f)

        
RAV_df = pd.DataFrame(emotion)
RAV_df = RAV_df.replace({1:'neutral', 2:'neutral', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'})
RAV_df = pd.concat([pd.DataFrame(gender),RAV_df],axis=1)
RAV_df.columns = ['gender','emotion']
RAV_df['labels'] =RAV_df.gender + '_' + RAV_df.emotion
RAV_df['source'] = 'RAVDESS'  
RAV_df = pd.concat([RAV_df,pd.DataFrame(path, columns = ['path'])],axis=1)
RAV_df = RAV_df.drop(['gender', 'emotion'], axis=1)
RAV_df.labels.value_counts()

In [112]:
# Pick a fearful track
fname = RAV + 'Actor_14/03-01-06-02-02-02-14.wav'  
data, sampling_rate = librosa.load(fname)
plt.figure(figsize=(15, 5))
librosa.display.waveplot(data, sr=sampling_rate)

# Lets play the audio 
ipd.Audio(fname)

In [113]:
# Pick a happy track
fname = RAV + 'Actor_14/03-01-03-02-02-02-14.wav'  
data, sampling_rate = librosa.load(fname)
plt.figure(figsize=(15, 5))
librosa.display.waveplot(data, sr=sampling_rate)

# Lets play the audio 
ipd.Audio(fname)

In [114]:
dir_list = os.listdir(TESS)
dir_list.sort()
dir_list

In [115]:
path = []
emotion = []

for i in dir_list:
    fname = os.listdir(TESS + i)
    for f in fname:
        if i == 'OAF_angry' or i == 'YAF_angry':
            emotion.append('female_angry')
        elif i == 'OAF_disgust' or i == 'YAF_disgust':
            emotion.append('female_disgust')
        elif i == 'OAF_Fear' or i == 'YAF_fear':
            emotion.append('female_fear')
        elif i == 'OAF_happy' or i == 'YAF_happy':
            emotion.append('female_happy')
        elif i == 'OAF_neutral' or i == 'YAF_neutral':
            emotion.append('female_neutral')                                
        elif i == 'OAF_Pleasant_surprise' or i == 'YAF_pleasant_surprised':
            emotion.append('female_surprise')               
        elif i == 'OAF_Sad' or i == 'YAF_sad':
            emotion.append('female_sad')
        else:
            emotion.append('Unknown')
        path.append(TESS + i + "/" + f)

TESS_df = pd.DataFrame(emotion, columns = ['labels'])
TESS_df['source'] = 'TESS'
TESS_df = pd.concat([TESS_df,pd.DataFrame(path, columns = ['path'])],axis=1)
TESS_df.labels.value_counts()

In [116]:
# lets play a fearful track 
fname = TESS + 'YAF_fear/YAF_dog_fear.wav' 

data, sampling_rate = librosa.load(fname)
plt.figure(figsize=(15, 5))
librosa.display.waveplot(data, sr=sampling_rate)

# Lets play the audio 
ipd.Audio(fname)

In [117]:
# lets play a happy track 
fname =  TESS + 'YAF_happy/YAF_dog_happy.wav' 

data, sampling_rate = librosa.load(fname)
plt.figure(figsize=(15, 5))
librosa.display.waveplot(data, sr=sampling_rate)

# Lets play the audio 
ipd.Audio(fname)

In [118]:
EmotionData = pd.concat([ RAV_df, TESS_df], axis = 0)
print(EmotionData.labels.value_counts())
#EmotionData.head()
EmotionData.to_csv("Data_path.csv",index=False)

In [119]:
EmotionData.tail()

In [120]:
# lets pick up the meta-data that we got from our first part of the Kernel
ref = pd.read_csv("Data_path.csv")
ref.head()

**FEATURE EXTRACTION**
There are lots of features which we can get from an audio data such as 
* Spectral Centroid
* Zero Crossing Rate
* Chroma Frequencies
* Mel Frequency Ceptral Coefficient(MFCC)
* Spectral Roll off
But for human voice characterization and modelling MFCC is the best feature so that's why we are using MFCC feature and extract it for each of the data.

Lets extract MFCC feature for one of the example voice

In [121]:
# Source - RAVDESS; Gender - Male; Emotion - Happy 
path = "/kaggle/input/ravdess-emotional-speech-audio/audio_speech_actors_01-24/Actor_11/03-01-03-01-02-02-11.wav"
X, sample_rate = librosa.load(path, res_type='kaiser_fast',duration=2.5,sr=22050*2,offset=0.5)  
mfcc = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=13)

# audio wave
plt.figure(figsize=(20, 15))
plt.subplot(3,1,1)
librosa.display.waveplot(X, sr=sample_rate)
plt.title('Audio sampled at 44100 hrz')

# MFCC
plt.figure(figsize=(20, 15))
plt.subplot(3,1,1)
#Here we are displaying Spectrogram for the Happy voice and lets visualiza how its look like
librosa.display.specshow(mfcc, x_axis='time')
plt.ylabel('MFCC')
plt.colorbar()

ipd.Audio(path)

Now lets extract this feature for entire dataset and then concatinate this feature column into our dataframe

In [122]:
df = pd.DataFrame(columns=['feature'])

# loop feature extraction over the entire dataset
counter=0
for index,path in enumerate(ref.path):
    X, sample_rate = librosa.load(path
                                  , res_type='kaiser_fast'
                                  ,duration=2.5
                                  ,sr=44100
                                  ,offset=0.5
                                 )
    sample_rate = np.array(sample_rate)
    
    # mean as the feature. Could do min and max etc as well. 
    mfccs = np.mean(librosa.feature.mfcc(y=X, 
                                        sr=sample_rate, 
                                        n_mfcc=13),
                    axis=0)
    features=mfccs
    df.loc[counter] = [features]
    counter=counter+1   

# Check a few records to make sure its processed successfully


In [123]:
#concatinating the feature column into the complete dataframe
df = pd.concat([ref,pd.DataFrame(df['feature'].values.tolist())],axis=1)
df[:5]

In [124]:
# replace NA with 0
df=df.fillna(0)
print(df.shape)
df[:5]

In [125]:
from sklearn.model_selection import train_test_split

In [126]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(['path','labels','source'],axis=1)
                                                    , df.labels
                                                    , test_size=0.25
                                                    , shuffle=True
                                                    , random_state=42
                                                   )

# Lets see how the data present itself before normalisation 
X_train[150:160]

In [127]:
# Lets do data normalization
#Here we are using z-score normalization technique
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)

X_train = (X_train - mean)/std
X_test = (X_test - mean)/std

# Check the dataset now 
X_train[150:160]

In [128]:
from keras.utils import np_utils, to_categorical
from sklearn.preprocessing import LabelEncoder
import pickle

In [129]:
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

# Label encode the target 
lb = LabelEncoder()
y_train = np_utils.to_categorical(lb.fit_transform(y_train))
y_test = np_utils.to_categorical(lb.fit_transform(y_test))

print(X_train.shape)
print(lb.classes_)
#print(y_train[0:10])
#print(y_test[0:10])

# Pickel the lb object for future use 
filename = 'labels'
outfile = open(filename,'wb')
pickle.dump(lb,outfile)
outfile.close()

In [130]:
X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)
X_train.shape

In [131]:
import keras
from keras import regularizers
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, Model, model_from_json
from keras.layers import Dense, Embedding, LSTM
from keras.layers import Input, Flatten, Dropout, Activation, BatchNormalization
from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
from keras.utils import np_utils, to_categorical
from keras.callbacks import ModelCheckpoint

In [132]:
model = Sequential()
model.add(Conv1D(256, 8, padding='same',input_shape=(X_train.shape[1],1))) 
# X_train.shape[1] = No. of Columns
model.add(Activation('relu'))
model.add(Conv1D(256, 8, padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(MaxPooling1D(pool_size=(8)))
model.add(Conv1D(128, 8, padding='same'))
model.add(Activation('relu'))
model.add(Conv1D(128, 8, padding='same'))
model.add(Activation('relu'))
model.add(Conv1D(128, 8, padding='same'))
model.add(Activation('relu'))
model.add(Conv1D(128, 8, padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(MaxPooling1D(pool_size=(8)))
model.add(Conv1D(64, 8, padding='same'))
model.add(Activation('relu'))
model.add(Conv1D(64, 8, padding='same'))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(14)) # Target class number
model.add(Activation('softmax'))
opt = keras.optimizers.Adam(lr=0.0001)
model.summary()

In [133]:
model.compile(loss='categorical_crossentropy', optimizer=opt,metrics=['accuracy'])
model_history=model.fit(X_train, y_train, batch_size=20, epochs=100, validation_data=(X_test, y_test))

In [134]:
plt.plot(model_history.history['loss'])
plt.plot(model_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [135]:
model2 = Sequential()

model2.add(Conv1D(256, 5,padding='same',
                 input_shape=(216,1)))
model2.add(Activation('relu'))
model2.add(Conv1D(128, 5,padding='same'))
model2.add(Activation('relu'))
model2.add(Dropout(0.1))
model2.add(MaxPooling1D(pool_size=(8)))
model2.add(Activation('relu'))
#model.add(Conv1D(128, 5,padding='same',))
#model.add(Activation('relu'))
#model.add(Conv1D(128, 5,padding='same',))
#model.add(Activation('relu'))
#model.add(Dropout(0.2))
model2.add(Conv1D(128, 5,padding='same',))
model2.add(Activation('relu'))
model2.add(Flatten())
model2.add(Dense(14))
model2.add(Activation('softmax'))
opt = keras.optimizers.Adam(lr=0.00001, decay=1e-6)

In [136]:
model2.summary()

In [137]:
model2.compile(loss='categorical_crossentropy', optimizer=opt,metrics=['accuracy'])
model_history_2=model2.fit(X_train, y_train, batch_size=16, epochs=200, validation_data=(X_test, y_test))

In [138]:
plt.plot(model_history_2.history['loss'])
plt.plot(model_history_2.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('loss2.png')
plt.show()

**Observation**

Here we found that Accuracy is arround 0.9901 and loss is 0.0291 
But When we look around Validation set we found that val_loss: 1.0878  val_acc: 0.7398
Which signifies the overfitting as val_loss>train_loss

In [139]:
model3 = Sequential()

model3.add(Conv1D(256, 5,padding='same',
                 input_shape=(216,1)))
model3.add(Activation('relu'))
model3.add(Conv1D(128, 5,padding='same'))
model3.add(Activation('relu'))
model3.add(Dropout(0.1))
model3.add(MaxPooling1D(pool_size=(8)))
model3.add(Activation('relu'))
#model.add(Conv1D(128, 5,padding='same',))
#model.add(Activation('relu'))
#model.add(Conv1D(128, 5,padding='same',))
#model.add(Activation('relu'))
#model.add(Dropout(0.2))
model3.add(Conv1D(128, 5,padding='same',))
model3.add(Activation('relu'))
model3.add(Flatten())
model3.add(Dense(14))
model3.add(Activation('softmax'))
opt1 = keras.optimizers.Adam(lr=0.00001, decay=1e-6)
model3.summary()

In [140]:
model3.compile(loss='categorical_crossentropy', optimizer=opt,metrics=['accuracy'])
model_history_3=model3.fit(X_train, y_train, batch_size=16, epochs=200, validation_data=(X_test, y_test))

In [141]:
plt.plot(model_history_3.history['loss'])
plt.plot(model_history_3.history['val_loss'])
plt.title('model 3 loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('loss_Adam_200_epochs.png')
plt.show()

In [142]:
model4 = Sequential()

model4.add(Conv1D(256, 5,padding='same',
                 input_shape=(216,1)))
model4.add(Activation('relu'))
model4.add(Conv1D(128, 5,padding='same'))
model4.add(Activation('relu'))
model4.add(Dropout(0.1))
model4.add(MaxPooling1D(pool_size=(8)))
model4.add(Activation('relu'))
#model.add(Conv1D(128, 5,padding='same',))
#model.add(Activation('relu'))
#model.add(Conv1D(128, 5,padding='same',))
#model.add(Activation('relu'))
#model.add(Dropout(0.2))
model4.add(Conv1D(128, 5,padding='same',))
model4.add(Activation('relu'))
model4.add(Flatten())
model4.add(Dense(14))
model4.add(Activation('softmax'))
opt1 = keras.optimizers.Adam(lr=0.00001, decay=1e-6)
model4.summary()

model4.compile(loss='categorical_crossentropy', optimizer=opt,metrics=['accuracy'])
model_history_4=model4.fit(X_train, y_train, batch_size=16, epochs=100, validation_data=(X_test, y_test))

plt.plot(model_history_4.history['loss'])
plt.plot(model_history_4.history['val_loss'])
# plt.title('Adam+200')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('loss_Adam_100_epochs.png')
plt.show()



In [149]:
model5 = Sequential()
model5.add(Conv1D(256, 5,padding='same',
                 input_shape=(216,1)))
model5.add(Activation('relu'))
model5.add(Conv1D(128, 5,padding='same'))
model5.add(Activation('relu'))
model5.add(Dropout(0.1))
model5.add(MaxPooling1D(pool_size=(8)))
model5.add(Activation('relu'))
model5.add(Conv1D(128, 5,padding='same',))
model5.add(Activation('relu'))
model5.add(Conv1D(128, 5,padding='same',))
model5.add(Activation('relu'))
model5.add(Dropout(0.2))
model5.add(Conv1D(128, 5,padding='same',))
model5.add(Activation('relu'))
model5.add(Flatten())
model5.add(Dense(14))
model5.add(Activation('softmax'))
opt1 = keras.optimizers.Adam(lr=0.00001, decay=1e-6)
model5.summary()
model5.compile(loss='categorical_crossentropy', optimizer=opt,metrics=['accuracy'])
model_history_5=model5.fit(X_train, y_train, batch_size=16, epochs=100, validation_data=(X_test, y_test))
plt.plot(model_history_5.history['loss'])
plt.plot(model_history_5.history['val_loss'])
# plt.title('Adam+200')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('full_loss_Adam_100_epochs.png')
plt.show()

In [150]:
model6 = Sequential()
model6.add(Conv1D(256, 5,padding='same',
                 input_shape=(216,1)))
model6.add(Activation('relu'))
model6.add(Conv1D(128, 5,padding='same'))
model6.add(BatchNormalization())
model6.add(Activation('relu'))
model6.add(Dropout(0.2))
model6.add(Activation('relu'))
model6.add(MaxPooling1D(pool_size=(8)))
model6.add(Activation('relu'))
model6.add(Dropout(0.2))
model6.add(Conv1D(128, 5,padding='same',))
model6.add(BatchNormalization())
model6.add(Activation('relu'))
model6.add(Dropout(0.2))
model6.add(Conv1D(128, 5,padding='same',))
model6.add(BatchNormalization())
model6.add(Activation('relu'))
model6.add(Dropout(0.2))
model6.add(Conv1D(128, 5,padding='same',))
model6.add(BatchNormalization())
model6.add(Activation('relu'))
model6.add(Flatten())
model6.add(Dense(14))
model6.add(Activation('softmax'))
opt1 = keras.optimizers.Adam(lr=0.00001, decay=1e-6)
model6.summary()
model6.compile(loss='categorical_crossentropy', optimizer=opt,metrics=['accuracy'])
model_history_6=model6.fit(X_train, y_train, batch_size=16, epochs=100, validation_data=(X_test, y_test))
plt.plot(model_history_6.history['loss'])
plt.plot(model_history_6.history['val_loss'])
# plt.title('Adam+200')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('full_loss_Adam_100_epochs.png')
plt.show()