In [12]:
!pip install librosa



In [318]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import IPython.display as ipd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow.keras.layers import Dense, Activation, Input, Dropout

import os
import librosa
import librosa.display
import pickle

from tqdm.notebook import tqdm

In [179]:
emotions = { '01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad', '05': 'angry', 
            '06': 'fearful', '07': 'disgust', '08': 'surprised'}

def get_label(filename):
    '''This function takes the audio file name as a parameter, e.g. 03-01-01-01-01-01-01.wav and returns
    the label as a text, e.g. neutral'''
    tags = filename.split('-')
    emotion = emotions[tags[2]]
    return emotion

In [180]:
# create a dataframe containing the files paths with their labels

files_paths = {'filepath': [], 'label': []}
for dirname, _, filenames in os.walk('data'):
    for filename in filenames:
        filepath = os.path.join(dirname, filename)
        
        files_paths['filepath'].append(filepath)
        files_paths['label'].append(get_label(filename))
        

files_paths = pd.DataFrame(files_paths)

In [181]:
files_paths

Unnamed: 0,filepath,label
0,data\Actor_01\03-01-01-01-01-01-01.wav,neutral
1,data\Actor_01\03-01-01-01-01-02-01.wav,neutral
2,data\Actor_01\03-01-01-01-02-01-01.wav,neutral
3,data\Actor_01\03-01-01-01-02-02-01.wav,neutral
4,data\Actor_01\03-01-02-01-01-01-01.wav,calm
...,...,...
1435,data\Actor_24\03-01-08-01-02-02-24.wav,surprised
1436,data\Actor_24\03-01-08-02-01-01-24.wav,surprised
1437,data\Actor_24\03-01-08-02-01-02-24.wav,surprised
1438,data\Actor_24\03-01-08-02-02-01-24.wav,surprised


In [134]:
# create a dataframe with the features (mfccs) calculated

df = pd.DataFrame(columns=['feature'])

# The following block is commented out because the results of it are saved in a file 

# i = 0
# for filepath in tqdm(files_paths.filepath):
#     audio, sample_rate = librosa.load(filepath)
    
#     sample_rate = np.array(sample_rate)
#     mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
#     mfccs_mean = np.mean(mfccs, axis=0)
#     df.loc[i] = [mfccs_mean]
    
#     i += 1

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1440.0), HTML(value='')))




In [136]:
# saving the results of the last block in a file
# df.to_pickle('RAVDESS-mfccs')

In [311]:
# load the saved results from the file
df = pd.read_pickle('RAVDESS-mfccs')
df.head()

Unnamed: 0,feature
0,"[-21.432735, -21.432735, -21.432735, -21.43273..."
1,"[-21.622257, -21.622257, -21.622257, -21.62225..."
2,"[-18.101141, -17.714779, -16.729706, -17.67694..."
3,"[-19.077229, -19.447943, -19.401802, -18.92272..."
4,"[-20.582228, -19.17782, -18.60321, -19.964561,..."


In [312]:
# create one big dataframe containing the file paths, their labels and their features
df = pd.concat([files_paths, pd.DataFrame(df['feature'].values.tolist())], axis=1)

In [313]:
df

Unnamed: 0,filepath,label,0,1,2,3,4,5,6,7,...,218,219,220,221,222,223,224,225,226,227
0,data\Actor_01\03-01-01-01-01-01-01.wav,neutral,-21.432735,-21.432735,-21.432735,-21.432735,-21.432735,-21.432735,-21.432735,-21.277515,...,,,,,,,,,,
1,data\Actor_01\03-01-01-01-01-02-01.wav,neutral,-21.622257,-21.622257,-21.622257,-21.622257,-20.095755,-19.467009,-20.704380,-20.392015,...,,,,,,,,,,
2,data\Actor_01\03-01-01-01-02-01-01.wav,neutral,-18.101141,-17.714779,-16.729706,-17.676943,-18.284256,-17.920685,-18.152332,-17.445875,...,,,,,,,,,,
3,data\Actor_01\03-01-01-01-02-02-01.wav,neutral,-19.077229,-19.447943,-19.401802,-18.922726,-18.963850,-18.131964,-19.041887,-18.886808,...,,,,,,,,,,
4,data\Actor_01\03-01-02-01-01-01-01.wav,calm,-20.582228,-19.177820,-18.603210,-19.964561,-20.390726,-20.453844,-21.788517,-21.550198,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1435,data\Actor_24\03-01-08-01-02-02-24.wav,surprised,-19.122814,-19.122814,-19.122814,-19.122814,-19.122814,-19.122814,-19.122814,-19.122814,...,,,,,,,,,,
1436,data\Actor_24\03-01-08-02-01-01-24.wav,surprised,-18.309284,-18.309284,-18.309284,-18.309284,-18.309284,-18.309284,-18.309284,-18.309284,...,,,,,,,,,,
1437,data\Actor_24\03-01-08-02-01-02-24.wav,surprised,-18.535404,-18.535404,-18.535404,-18.535404,-18.535404,-18.535404,-18.535404,-18.535404,...,,,,,,,,,,
1438,data\Actor_24\03-01-08-02-02-01-24.wav,surprised,-17.836590,-17.836590,-17.836590,-17.836590,-17.836590,-17.824371,-17.803312,-17.827566,...,,,,,,,,,,


In [314]:
# replace NaN by 0
df = df.fillna(0)
df

Unnamed: 0,filepath,label,0,1,2,3,4,5,6,7,...,218,219,220,221,222,223,224,225,226,227
0,data\Actor_01\03-01-01-01-01-01-01.wav,neutral,-21.432735,-21.432735,-21.432735,-21.432735,-21.432735,-21.432735,-21.432735,-21.277515,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,data\Actor_01\03-01-01-01-01-02-01.wav,neutral,-21.622257,-21.622257,-21.622257,-21.622257,-20.095755,-19.467009,-20.704380,-20.392015,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,data\Actor_01\03-01-01-01-02-01-01.wav,neutral,-18.101141,-17.714779,-16.729706,-17.676943,-18.284256,-17.920685,-18.152332,-17.445875,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,data\Actor_01\03-01-01-01-02-02-01.wav,neutral,-19.077229,-19.447943,-19.401802,-18.922726,-18.963850,-18.131964,-19.041887,-18.886808,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,data\Actor_01\03-01-02-01-01-01-01.wav,calm,-20.582228,-19.177820,-18.603210,-19.964561,-20.390726,-20.453844,-21.788517,-21.550198,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1435,data\Actor_24\03-01-08-01-02-02-24.wav,surprised,-19.122814,-19.122814,-19.122814,-19.122814,-19.122814,-19.122814,-19.122814,-19.122814,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1436,data\Actor_24\03-01-08-02-01-01-24.wav,surprised,-18.309284,-18.309284,-18.309284,-18.309284,-18.309284,-18.309284,-18.309284,-18.309284,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1437,data\Actor_24\03-01-08-02-01-02-24.wav,surprised,-18.535404,-18.535404,-18.535404,-18.535404,-18.535404,-18.535404,-18.535404,-18.535404,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1438,data\Actor_24\03-01-08-02-02-01-24.wav,surprised,-17.836590,-17.836590,-17.836590,-17.836590,-17.836590,-17.824371,-17.803312,-17.827566,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [315]:
# split the data: 80% train and 20% validation
X_train, X_test, y_train, y_test = train_test_split(df.drop(['filepath', 'label'], axis=1),
                                                   df['label'],
                                                   test_size=0.2,
                                                   shuffle=True,
                                                   random_state=42)

X_train[:10]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,218,219,220,221,222,223,224,225,226,227
1148,-25.653189,-25.653189,-25.653189,-25.653189,-25.653189,-25.653189,-25.653189,-25.653189,-25.653189,-25.653189,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
677,-16.186077,-16.186077,-16.186077,-16.186077,-16.186077,-16.186077,-16.186077,-16.186077,-16.186077,-16.186077,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
221,-15.161586,-15.161586,-15.161586,-15.161586,-15.161586,-15.161586,-15.161586,-15.161586,-15.161586,-15.161586,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
113,-19.642263,-19.642263,-19.642263,-19.642263,-19.642263,-19.642263,-19.642263,-19.642263,-19.642263,-19.642263,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
845,-13.947101,-14.531044,-15.090982,-14.969928,-14.906607,-15.177971,-14.709198,-14.394396,-14.194595,-14.864087,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1293,-16.140579,-16.444485,-16.444485,-16.302029,-16.444485,-16.444485,-16.444485,-15.763693,-15.243236,-16.430103,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
174,-19.254173,-19.254173,-19.254173,-19.254173,-19.254173,-19.254173,-19.254173,-19.254173,-19.254173,-19.254173,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
479,-14.281309,-14.782087,-15.179446,-14.877653,-15.576762,-15.649673,-14.3766,-13.651395,-13.175242,-13.773949,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
346,-24.008503,-24.008503,-24.008503,-24.008503,-24.008503,-24.008503,-23.46834,-22.172907,-22.580055,-23.948896,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1436,-18.309284,-18.309284,-18.309284,-18.309284,-18.309284,-18.309284,-18.309284,-18.309284,-18.309284,-18.309284,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [316]:
# Normalizing the values
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)

X_train = (X_train - mean)/std
X_test = (X_test - mean)/std

X_train[:10]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,218,219,220,221,222,223,224,225,226,227
1148,-2.606499,-2.647716,-2.684357,-2.695999,-2.708532,-2.711116,-2.733128,-2.731972,-2.740366,-2.735461,...,0.041458,0.041499,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476
677,0.452697,0.404067,0.400542,0.39215,0.388615,0.386758,0.37239,0.37381,0.373541,0.364191,...,0.041458,0.041499,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476
221,0.783751,0.734318,0.734376,0.726337,0.723775,0.721997,0.708456,0.709905,0.710515,0.699622,...,0.041458,0.041499,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476
113,-0.664133,-0.710056,-0.725671,-0.73525,-0.74207,-0.744192,-0.761351,-0.760027,-0.763262,-0.767408,...,0.041458,0.041499,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476
845,1.176199,0.937577,0.757383,0.788855,0.807191,0.716636,0.856854,0.961589,1.028576,0.797027,...,0.041458,0.041499,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476
1293,0.467399,0.320768,0.316338,0.354327,0.304078,0.302201,0.287624,0.512378,0.683659,0.284294,...,0.041458,0.041499,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476
174,-0.538725,-0.584953,-0.599211,-0.608656,-0.615107,-0.617199,-0.634045,-0.63271,-0.635612,-0.640343,...,0.041458,0.041499,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476
479,1.068203,0.856652,0.728556,0.818955,0.587951,0.562283,0.965957,1.205338,1.36386,1.153952,...,0.041458,0.041499,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476
346,-2.075037,-2.117541,-2.148429,-2.159506,-2.170476,-2.172934,-2.016427,-1.59023,-1.729556,-2.177454,...,0.041458,0.041499,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476
1436,-0.233395,-0.280362,-0.291314,-0.300435,-0.305988,-0.308008,-0.324091,-0.32273,-0.324821,-0.330974,...,0.041458,0.041499,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476,0.029476


In [254]:
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [266]:
# mapping the labels into numbers: angry -> 0, calm -> 1, etc.
encoder = LabelEncoder()
encoder.fit(y_train)

# encoding the mapped values into vectors (one hot encoding), e.g.:
# 0 -> [1, 0, 0, 0, 0, 0, 0, 0]
# 1 -> [0, 1, 0, 0, 0, 0, 0, 0]
# 5 -> [0, 0, 0, 0, 0, 1, 0, 0]
y_train_encoded = tf.keras.utils.to_categorical(encoder.transform(y_train))
y_test_encoded = tf.keras.utils.to_categorical(encoder.transform(y_test))

print(encoder.classes_)
print(y_test)
print(y_test_encoded)

['angry' 'calm' 'disgust' 'fearful' 'happy' 'neutral' 'sad' 'surprised']
['disgust' 'calm' 'calm' 'calm' 'angry' 'calm' 'calm' 'fearful'
 'surprised' 'calm' 'disgust' 'surprised' 'sad' 'fearful' 'sad' 'disgust'
 'angry' 'fearful' 'calm' 'disgust' 'surprised' 'neutral' 'neutral' 'sad'
 'fearful' 'sad' 'surprised' 'surprised' 'disgust' 'angry' 'surprised'
 'angry' 'happy' 'fearful' 'surprised' 'calm' 'disgust' 'angry' 'happy'
 'angry' 'calm' 'neutral' 'calm' 'angry' 'surprised' 'calm' 'calm'
 'neutral' 'fearful' 'sad' 'sad' 'surprised' 'surprised' 'angry' 'neutral'
 'calm' 'calm' 'surprised' 'happy' 'neutral' 'surprised' 'fearful'
 'surprised' 'surprised' 'fearful' 'happy' 'calm' 'disgust' 'disgust'
 'sad' 'happy' 'surprised' 'calm' 'happy' 'surprised' 'calm' 'angry'
 'surprised' 'sad' 'disgust' 'happy' 'happy' 'surprised' 'calm' 'happy'
 'angry' 'fearful' 'calm' 'happy' 'calm' 'neutral' 'calm' 'angry' 'angry'
 'neutral' 'disgust' 'surprised' 'happy' 'happy' 'surprised' 'disgust'
 'fearf

In [319]:
# creating the neural network model
model = tf.keras.Sequential()

# specifying the size of the input
model.add(Input(X_train.shape[1]))

model.add(Dense(units=128, activation ='relu', kernel_regularizer=tf.keras.regularizers.L2(0.04)))
model.add(Dense(units=64, activation ='relu', kernel_regularizer=tf.keras.regularizers.L2(0.04)))
model.add(Dense(units=8, activation ='softmax'))

model.summary()

Model: "sequential_45"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_87 (Dense)             (None, 128)               29312     
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_88 (Dense)             (None, 64)                8256      
_________________________________________________________________
dropout_4 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_89 (Dense)             (None, 8)                 520       
Total params: 38,088
Trainable params: 38,088
Non-trainable params: 0
_________________________________________________________________


In [320]:
opt = tf.keras.optimizers.RMSprop(lr=0.001, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])


In [321]:
history = model.fit(X_train, y_train_encoded, batch_size=16, epochs=50, validation_data=(X_test, y_test_encoded))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [214]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1152, 228), (1152,), (288, 228), (288,))

In [229]:
y_test

array([2, 1, 1, 1, 0, 1, 1, 3, 7, 1, 2, 7, 6, 3, 6, 2, 0, 3, 1, 2, 7, 5,
       5, 6, 3, 6, 7, 7, 2, 0, 7, 0, 4, 3, 7, 1, 2, 0, 4, 0, 1, 5, 1, 0,
       7, 1, 1, 5, 3, 6, 6, 7, 7, 0, 5, 1, 1, 7, 4, 5, 7, 3, 7, 7, 3, 4,
       1, 2, 2, 6, 4, 7, 1, 4, 7, 1, 0, 7, 6, 2, 4, 4, 7, 1, 4, 0, 3, 1,
       4, 1, 5, 1, 0, 0, 5, 2, 7, 4, 4, 7, 2, 3, 0, 3, 6, 7, 4, 2, 3, 5,
       7, 0, 7, 1, 2, 0, 3, 2, 2, 3, 2, 1, 7, 3, 6, 0, 7, 4, 3, 3, 4, 4,
       6, 3, 4, 6, 1, 1, 2, 2, 0, 0, 4, 4, 0, 4, 0, 1, 7, 3, 3, 6, 1, 3,
       4, 5, 3, 6, 7, 3, 6, 0, 0, 6, 4, 5, 0, 1, 6, 7, 1, 3, 0, 2, 3, 1,
       0, 7, 4, 6, 4, 3, 4, 6, 5, 5, 7, 0, 0, 6, 2, 3, 6, 1, 6, 3, 7, 7,
       2, 1, 3, 6, 7, 6, 1, 0, 2, 6, 4, 7, 4, 2, 7, 3, 5, 3, 0, 0, 7, 7,
       6, 7, 2, 3, 6, 2, 1, 6, 6, 5, 6, 7, 6, 2, 7, 0, 0, 2, 0, 5, 2, 1,
       7, 5, 4, 4, 5, 1, 1, 6, 0, 6, 0, 1, 0, 2, 6, 7, 1, 1, 0, 1, 1, 1,
       0, 5, 2, 7, 7, 1, 4, 5, 6, 7, 6, 0, 0, 4, 0, 4, 0, 4, 6, 2, 6, 2,
       4, 6])