In [1]:
import os
import time
import joblib
import librosa
import numpy as np
import pandas as pd

In [4]:
wav = []

for subdir, dirs, files in os.walk('data/RawData/Ravdess'):
    for file in files:
        try:
            X, sample_rate = librosa.load(os.path.join(subdir,file),
                                         res_type='kaiser_fast')
            mfccs = np.mean(librosa.feature.mfcc(y = X, sr = sample_rate,
                                                n_mfcc = 40).T, axis=0)
            file_class = int(file[7:8]) - 1
            arr = mfccs, file_class
            wav.append(arr)
        except ValueError as err:
            print(err)
            continue

In [5]:
X, y = zip(*wav)
X, y = np.asarray(X), np.asarray(y)
print(X.shape, y.shape)

if not os.path.isdir('new/meta'):
    os.makedirs('new/meta')
joblib.dump(X, os.path.join('new/meta', 'X.joblib'))
joblib.dump(y, os.path.join('new/meta', 'y.joblib'))

(2452, 40) (2452,)


['new/meta\\y.joblib']

In [35]:
X = pd.DataFrame(X)

In [36]:
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,-700.398926,58.630211,-3.025852,16.040241,4.248529,3.869935,-6.381716,-0.188635,-13.735004,-0.319724,...,-1.411359,-2.769772,-2.042009,-2.522663,-2.507448,-2.250499,-0.381506,-2.481059,-2.791022,-2.244865
1,-653.169006,58.028076,-12.581207,11.818786,-7.681562,-0.617142,-8.337758,-5.823570,-6.547592,1.458057,...,0.788761,2.570493,2.558634,3.708506,2.790020,2.201920,-1.021456,0.819200,-0.277811,0.207586
2,-617.052307,60.103371,-5.984258,13.886285,1.120427,0.511750,-14.841356,-4.016369,-5.575839,-6.309851,...,-1.133367,-2.019846,-1.024786,0.331097,0.531833,-1.621019,-2.158077,-2.502774,-0.676133,2.089097
3,-698.721130,47.088486,-11.333614,12.963089,-8.005651,-1.252229,-10.009068,-9.434125,-10.318874,-0.864674,...,-0.299306,-0.004270,-1.408216,0.958172,2.574661,1.877764,1.608618,0.222401,4.338524,4.266795
4,-715.922607,71.976791,4.457525,18.602497,8.404046,3.621954,-1.974365,0.022673,-2.678083,4.232803,...,0.505697,0.424780,0.972097,0.210004,0.667769,1.918957,0.174543,-1.768463,-1.512280,-0.253280
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2447,-405.257812,41.294205,-30.586042,-0.538227,-18.484348,-14.656284,-16.151356,-13.920813,-16.820213,3.324283,...,-1.249055,0.077632,-0.483442,-0.436015,0.735576,1.786816,1.513320,3.222856,4.312741,5.172601
2448,-473.056061,53.553688,-20.607430,10.907098,-1.008979,-5.573545,-19.404833,5.218593,-28.027699,-6.464190,...,1.640660,-2.040031,-0.220178,-0.151539,-3.876288,1.615301,4.084427,9.134243,11.374112,14.116317
2449,-474.610382,24.998922,-24.003815,2.117282,-15.958261,-8.645466,-19.333958,-6.487021,-26.644005,0.500758,...,-1.764503,2.385815,-0.887787,-4.043890,-1.560124,0.833699,1.932070,-1.613097,1.777461,7.443244
2450,-463.701447,60.063808,-25.744654,10.314075,4.596226,-9.529220,-17.214312,-7.799369,-18.088699,-0.637278,...,-0.438399,-0.092735,-1.187605,-1.340231,-0.190074,1.682774,7.134923,9.182275,12.354645,11.913918


In [37]:
from tensorflow.keras.layers import Dense, Conv1D, Flatten, Dropout, Activation
from tensorflow.keras.models import Sequential
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

In [38]:
model = Sequential()

model.add(Conv1D(64, 5, padding='same',
                input_shape=(40,1)))

model.add(Activation('relu'))

model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(8))
model.add(Activation('softmax'))

In [39]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 40, 64)            384       
_________________________________________________________________
activation_2 (Activation)    (None, 40, 64)            0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 40, 64)            0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 2560)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 20488     
_________________________________________________________________
activation_3 (Activation)    (None, 8)                 0         
Total params: 20,872
Trainable params: 20,872
Non-trainable params: 0
__________________________________________________

In [40]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=42)

In [41]:
x_traincnn = np.expand_dims(X_train, axis = 2)
x_testcnn = np.expand_dims(X_test, axis = 2)

In [42]:
print(x_traincnn.shape, x_testcnn.shape)

(1716, 40, 1) (736, 40, 1)


In [43]:
print(y_train.shape, y_test.shape)

(1716,) (736,)


In [44]:
from tensorflow.keras.optimizers import RMSprop

In [51]:
model.compile(loss = 'sparse_categorical_crossentropy',
                  optimizer = 'rmsprop',
                  metrics = ['accuracy'])

In [52]:
y_train

array([4, 4, 3, ..., 6, 7, 4])

In [53]:
history = model.fit(x_traincnn, y_train,
                    batch_size = 64,
                    epochs = 100,
                    verbose = 1,
                    validation_data = (x_testcnn, y_test))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [143]:
X, sample_rate = librosa.load(os.path.join('test/surprised.wav'), res_type='kaiser_fast')

mfccs = np.mean(librosa.feature.mfcc(y = X, sr = sample_rate, n_mfcc = 40).T, axis=0)

In [144]:
mfccs

array([-6.64376404e+02,  6.04857864e+01, -4.18858099e+00,  1.69400635e+01,
       -4.00226861e-01,  2.59444976e+00, -7.45956182e+00, -4.03830099e+00,
       -1.40770035e+01,  2.06803262e-01, -3.32057571e+00, -1.55312514e+00,
       -3.92186618e+00, -3.76724315e+00, -3.78725290e+00,  2.12958837e+00,
       -7.63181829e+00, -2.46205378e+00, -1.55082119e+00, -6.30556107e-01,
       -8.21463680e+00, -2.07641864e+00, -3.25763845e+00, -4.19562912e+00,
       -3.53725982e+00, -2.67386293e+00, -4.61865854e+00, -1.77279270e+00,
       -1.58636880e+00, -1.73208976e+00, -2.31740212e+00, -2.67540956e+00,
       -2.37380433e+00, -4.09476566e+00, -3.43976617e+00, -3.96298289e+00,
       -4.75864887e-01, -2.83636737e+00, -2.15631056e+00, -2.89204311e+00],
      dtype=float32)

In [145]:
mfccs = pd.DataFrame([mfccs])

In [146]:
mfccs

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,-664.376404,60.485786,-4.188581,16.940063,-0.400227,2.59445,-7.459562,-4.038301,-14.077003,0.206803,...,-2.317402,-2.67541,-2.373804,-4.094766,-3.439766,-3.962983,-0.475865,-2.836367,-2.156311,-2.892043


In [147]:
test_x = np.expand_dims(mfccs, axis = 2)

In [148]:
test_x.shape

(1, 40, 1)

In [149]:
emotion = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful', 6: 'disgust', 7: 'surprised'}

In [150]:
pred = model.predict(test_x)

In [151]:
emotion[pred.argmax()]

'calm'