In [1]:
import matplotlib.pyplot as plt
import librosa.display as disp
import numpy as np

from data_utils.data_loader import Data_loader, EMO_DICT

import tensorflow as tf
from tensorflow.keras import layers


from sklearn.model_selection import train_test_split, KFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# 1) Dataflair reproduce (MLP)

In [2]:
dl= Data_loader()
data = dl.load_data()

##### Load only samples only express 'calm', 'happy', 'fearful' or 'disgust'

In [101]:
observed_emotions=['calm', 'happy', 'fearful', 'disgust']
N_observed = 4
EMO_DICT= {0:'neutral', 1:'calm', 2:'happy', 3:'sad', 4:'angry', 5:'fearful', 6:'disgust', 7:'surprised'}
NR_TO_NR = {1:0, 2:1, 5:2, 6:3}
x = []
y = []
for d in data:
    emot_nr = np.argmax(d['emotion'])
    if EMO_DICT[emot_nr] in observed_emotions:
        x.append(np.hstack((np.mean(d['mfcc'],axis=0), np.mean(d['chroma'],axis=0), np.mean(d['mel'],axis=0))))
        
        y.append(np.eye(N_observed, dtype=np.int32)[NR_TO_NR[emot_nr]-1])
    
    
    
# splitt for train
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.2)


## Keras

In [76]:
model_keras = tf.keras.Sequential([layers.Dense(300,input_shape=[len(X_train[0])], kernel_initializer="he_normal", activation="relu") 
                             ,layers.Dense(4,activation='softmax')])

optimizer = tf.keras.optimizers.Adam(lr=0.003, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.1)
model_keras.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
        
model_keras.summary()

Model: "sequential_24"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 300)               54300     
_________________________________________________________________
dense_8 (Dense)              (None, 4)                 1204      
Total params: 55,504
Trainable params: 55,504
Non-trainable params: 0
_________________________________________________________________


In [77]:
_ = model_keras.fit(np.array(X_train),np.array(y_train),batch_size=256, epochs=300,shuffle=True, verbose=0)

In [78]:
model_keras.evaluate(np.array(X_test),np.array(y_test))



[0.7620943188667297, 0.7792207598686218]

## Sklearn

In [79]:
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)
model.fit(X_train,y_train)

MLPClassifier(activation='relu', alpha=0.01, batch_size=256, beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(300,), learning_rate='adaptive',
              learning_rate_init=0.001, max_fun=15000, max_iter=500,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [80]:
y_pred=model.predict(X_test)

#DataFlair - Calculate the accuracy of our model
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)
#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 68.18%


# 2) CNN 

In [118]:
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.2)

X_train = np.expand_dims(np.array(X_train), axis=2)
X_test = np.expand_dims(np.array(X_test), axis=2)

y_train, y_test = np.array(y_train), np.array(y_test)

In [119]:
# Normalize
#X_train = np.expand_dims(np.array(X_train), axis=2)
#X_test = np.expand_dims(np.array(X_test), axis=2)

mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)

X_train = (X_train - mean)/std
X_test = (X_test - mean)/std


In [120]:
print(X_train.shape)
print(X_test.shape)

(614, 180, 1)
(154, 180, 1)


In [123]:
# New model
model = tf.keras.Sequential()
model.add(layers.Conv1D(256, 8, padding='same',input_shape=(X_train.shape[1],1), name="C1"))  # X_train.shape[1] = No. of Columns
model.add(layers.Activation('relu'))

model.add(layers.Conv1D(256, 8, padding='same', name="C2"))
model.add(layers.Activation('relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.25))
model.add(layers.MaxPooling1D(pool_size=(8)))

for i in range(3):
    model.add(layers.Conv1D(128, 8, padding='same', name="C"+str(i+3)))
    model.add(layers.Activation('relu'))

model.add(layers.Conv1D(128, 8, padding='same', name="C6"))
model.add(layers.Activation('relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.25))
model.add(layers.MaxPooling1D(pool_size=(8)))

model.add(layers.Conv1D(64, 8, padding='same', name="C7"))
model.add(layers.Activation('relu'))

model.add(layers.Conv1D(64, 8, padding='same', name="C8"))
model.add(layers.Activation('relu'))
model.add(layers.Flatten())

model.add(layers.Dense(4, name="OUT")) # Target class number
model.add(layers.Activation('softmax'))

model.summary()

Model: "sequential_28"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
C1 (Conv1D)                  (None, 180, 256)          2304      
_________________________________________________________________
activation_169 (Activation)  (None, 180, 256)          0         
_________________________________________________________________
C2 (Conv1D)                  (None, 180, 256)          524544    
_________________________________________________________________
activation_170 (Activation)  (None, 180, 256)          0         
_________________________________________________________________
batch_normalization_40 (Batc (None, 180, 256)          1024      
_________________________________________________________________
dropout_40 (Dropout)         (None, 180, 256)          0         
_________________________________________________________________
max_pooling1d_40 (MaxPooling (None, 22, 256)         

In [124]:
# opt = tf.keras.optimizers.SGD(lr=0.0001, momentum=0.0, decay=0.0, nesterov=False)
# opt = tf.keras.optimizers.Adam(lr=0.0001)
opt = tf.keras.optimizers.RMSprop(lr=0.00001, decay=1e-6)

model.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model_history=model.fit(X_train, y_train, batch_size=16, epochs=50, validation_data=(X_test, y_test), shuffle=True)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
model.evaluate(X_test,y_test)

# 3) RNN/LSTM/GRU

##### Load only samples only express 'calm', 'happy', 'fearful' or 'disgust'

In [187]:
observed_emotions=['calm', 'happy', 'fearful', 'disgust']
N_observed = 4
EMO_DICT= {0:'neutral', 1:'calm', 2:'happy', 3:'sad', 4:'angry', 5:'fearful', 6:'disgust', 7:'surprised'}
NR_TO_NR = {1:0, 2:1, 5:2, 6:3}
x = []
y = []
for d in data:
    emot_nr = np.argmax(d['emotion'])
    if EMO_DICT[emot_nr] in observed_emotions:      
        x.append(np.hstack((d['mfcc'], d['chroma'], d['mel'])))
        
        y.append(np.eye(N_observed, dtype=np.int32)[NR_TO_NR[emot_nr]-1])
    
# splitt for train
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.2)

In [188]:
# transform lists to numpy arrays
X_train, X_test, y_train, y_test = np.array(X_train), np.array(X_test), np.array(y_train), np.array(y_test)

# pad
X_train = tf.keras.preprocessing.sequence.pad_sequences(X_train)
X_test = tf.keras.preprocessing.sequence.pad_sequences(X_test,X_train.shape[1])

In [189]:
# Normalize
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)

X_train = np.nan_to_num((X_train - mean)/std, nan=0.0)
X_test = np.nan_to_num((X_test - mean)/std, nan=0.0)
# X_test = (X_test - mean)/std

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

  """


(614, 165, 180) (154, 165, 180) (614, 4) (154, 4)


  
  


In [196]:
model = tf.keras.Sequential()

# Add a LSTM layer with 300 internal units.
model.add( tf.keras.layers.LSTM(300, input_shape=(None, X_train.shape[2])))

# Add a Dense layer with 64 units.
model.add(layers.Dense(64, kernel_initializer="he_normal"))
model.add(layers.BatchNormalization())

model.add(layers.Dense(4, activation='softmax'))

model.summary()

Model: "sequential_41"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_12 (LSTM)               (None, 300)               577200    
_________________________________________________________________
dense_27 (Dense)             (None, 64)                19264     
_________________________________________________________________
batch_normalization_52 (Batc (None, 64)                256       
_________________________________________________________________
dense_28 (Dense)             (None, 4)                 260       
Total params: 596,980
Trainable params: 596,852
Non-trainable params: 128
_________________________________________________________________


In [197]:
opt = tf.keras.optimizers.Adam(lr=0.003, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.1)
#opt = tf.keras.optimizers.SGD(lr=0.0001, momentum=0.0, decay=0.0, nesterov=False)
model.compile(loss='categorical_crossentropy', 
              optimizer=opt,
              metrics=['accuracy'])


In [198]:
_=model.fit(X_train, y_train, batch_size=16, epochs=25, validation_data=(X_test, y_test), shuffle=True)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [199]:
model.evaluate(X_test,y_test)



[nan, 0.649350643157959]

# Kfold Cross Validation
### Use either the data from CNN to prepare, or use the data from LSTM from previos sections

In [8]:
def get_model():
    model = tf.keras.Sequential()
    model.add(layers.Conv1D(256, 8, padding='same',input_shape=(inputs[train].shape[1],1), name="C1"))  
    model.add(layers.Activation('relu'))

    model.add(layers.Conv1D(256, 8, padding='same', name="C2"))
    model.add(layers.Activation('relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.25))
    model.add(layers.MaxPooling1D(pool_size=(8)))

    for i in range(3):
        model.add(layers.Conv1D(128, 8, padding='same', name="C"+str(i+3)))
        model.add(layers.Activation('relu'))

    model.add(layers.Conv1D(128, 8, padding='same', name="C6"))
    model.add(layers.Activation('relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.25))
    model.add(layers.MaxPooling1D(pool_size=(8)))

    model.add(layers.Conv1D(64, 8, padding='same', name="C7"))
    model.add(layers.Activation('relu'))

    model.add(layers.Conv1D(64, 8, padding='same', name="C8"))
    model.add(layers.Activation('relu'))
    model.add(layers.Flatten())

    model.add(layers.Dense(4, name="OUT")) # Target class number
    model.add(layers.Activation('softmax'))

    # compile model
    opt = tf.keras.optimizers.RMSprop(lr=0.00001, decay=1e-6)
    model.compile(optimizer=opt,
                loss='categorical_crossentropy',
                metrics=['accuracy'])
    return model



In [None]:
def LSTM_model():
  model = tf.keras.Sequential()

  # Add a LSTM layer with 300 internal units.
  model.add( tf.keras.layers.LSTM(300, input_shape=(None, 180)))

  # Add a Dense layer with 64 units.
  model.add(layers.Dense(64, kernel_initializer="he_normal"))
  model.add(layers.BatchNormalization())

  model.add(layers.Dense(4, activation='softmax'))

  opt = tf.keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0, amsgrad=False, clipnorm=1.)
 
  model.compile(loss='categorical_crossentropy', 
                optimizer=opt,
                metrics=['accuracy'])
  return model

In [6]:
inputs = np.concatenate((X_train,X_test))
targets = np.concatenate((y_train,y_test))

In [7]:

seed =7
np.random.seed(seed)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
tscv = TimeSeriesSplit(n_splits=3)
TimeSeriesSplit(max_train_size=None, n_splits=3)



## Kfold for CNN

In [9]:

CV_scores = []
acc_per_fold = []
loss_per_fold = []
fold_no =1
for train, test in kfold.split(x,y):
    model = get_model()
    
    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ')
    model_history=model.fit(inputs[train], targets[train], batch_size=10, epochs=150, verbose=0, validation_split=0.15)
    
    #evaluate model
    scores = model.evaluate(inputs[test], targets[test], verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])
    fold_no+=1
            

------------------------------------------------------------------------
Training for fold 1 
Score for fold 1: loss of 1.2654606103897095; accuracy of 46.75324559211731%
------------------------------------------------------------------------
Training for fold 2 
Score for fold 2: loss of 1.1848477125167847; accuracy of 57.14285969734192%
------------------------------------------------------------------------
Training for fold 3 
Score for fold 3: loss of 1.2830265760421753; accuracy of 48.051947355270386%
------------------------------------------------------------------------
Training for fold 4 
Score for fold 4: loss of 1.2638294696807861; accuracy of 45.45454680919647%
------------------------------------------------------------------------
Training for fold 5 
Score for fold 5: loss of 1.262800693511963; accuracy of 50.64935088157654%
------------------------------------------------------------------------
Training for fold 6 
Score for fold 6: loss of 1.2728854417800903; accur

In [13]:
av_loss = np.mean(loss_per_fold)
av_acc = np.mean(acc_per_fold) 
print("Kfold average:\n loss:", av_loss, "\n accuracy:", av_acc)

Kfold average:
 loss: 1.2558023810386658 
 accuracy: 48.70300829410553


In [None]:
CV_scores = []
acc_per_fold = []
loss_per_fold = []
fold_no =1
plt.figure(figsize=(20,10))
for train, test in tscv.split(x,y):
    
    model = LSTM_model()
    
    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ')
    model_history=model.fit(inputs[train], targets[train], batch_size=16, epochs=30)

    # plot
    plt.subplot(121)
    plt.plot(model_history.history['loss'],label="training data, fold:" +str(fold_no))
    plt.grid()
    plt.subplot(122)
    plt.plot(model_history.history['accuracy'],label="training data, fold:" +str(fold_no))
    plt.grid()
    

    #evaluate model
    scores = model.evaluate(inputs[test], targets[test], verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])
    fold_no+=1
plt.grid()
plt.legend()
plt.show()       

In [None]:
av_loss = np.mean(loss_per_fold)
av_acc = np.mean(acc_per_fold) 
print("Kfold average:\n loss:", av_loss, "\n accuracy:", av_acc)