In [None]:
import tensorflow
import tensorflow.keras as keras
from keras.models import Sequential
from keras.layers import Reshape, Permute, GRU
from keras.layers import Dense, Activation, Conv2D, MaxPool2D, BatchNormalization, Flatten, Dropout, LSTM
from keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau

In [None]:
from google.colab import drive

# mount to Google Drive 
drive.mount("/content/drive")

# save the most common part of file path
root_path = "/content/drive/My Drive/Audio_Mood_Classification"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import os

In [None]:
# load train, validation and test data

# MFCCs
file_names = ['X_train_mfcc30s.npy',
              'X_val_mfcc30s.npy',
              'X_test_mfcc30s.npy',
              'y_train_mfcc30s.npy',
              'y_val_mfcc30s.npy',
              'y_test_mfcc30s.npy']

folder = "train_test_data/mfcc30s"

for name in file_names:
    file_path = os.path.join(root_path, folder, name)
    var_name = name.split('.')[0]
    globals()[var_name] = np.load(file_path)

# path = root_path + "/train_test_data/mfcc30s/" + file_name

# X_train, X_val, X_test, y_train, y_val, y_test = [np.load(f) for f in file_names]


# # declare variables
# X_train_mfcc30s, X_val_mfcc30s, X_test_mfcc30s, y_train_mfcc30s, y_val_mfcc30s, y_test_mfcc30s = None, None, None, None, None, None

# files = [X_train_mfcc30s, X_val_mfcc30s, X_test_mfcc30s, y_train_mfcc30s, y_val_mfcc30s, y_test_mfcc30s]

# # load files
# for file, file_name in zip(files, file_names):
#     path = root_path + "/train_test_data/mfcc30s/" + file_name
#     file = np.load(path)

In [None]:
print('X_train_mfcc30s:')
print(X_train_mfcc30s.shape)
# print(X_train_mfcc30s[:5])
print()
print('X_val_mfcc30s:')
print(X_val_mfcc30s.shape)
# print(X_val_mfcc30s[:5])
print()
print('X_test_mfcc30s:')
print(X_test_mfcc30s.shape)
# print(X_test_mfcc30s[:5])
print()
print("y_train_mfcc30s:")
print(y_train_mfcc30s.shape)
# print(y_train_mfcc30s[:5])
print()
print("y_val_mfcc30s:")
print(y_val_mfcc30s.shape)
# print(y_val_mfcc30s[:5])
print()
print("y_test_mfcc30s:")
print(y_test_mfcc30s.shape)
# print(y_test_mfcc30s[:5])


X_train_mfcc30s:
(46, 1292, 20, 1)

X_val_mfcc30s:
(16, 1292, 20, 1)

X_test_mfcc30s:
(16, 1292, 20, 1)

y_train_mfcc30s:
(46, 10)

y_val_mfcc30s:
(16, 10)

y_test_mfcc30s:
(16, 10)


In [None]:

# CNN 1: Shallow CNN

def get_CNN_shallow(input_shape):

    model = Sequential()

    # 1st conv layer
    model.add(Conv2D(32, (3, 3), activation = 'relu', input_shape = input_shape)) # num_filters/num_kernels, kernel/filter size
    model.add(MaxPool2D((3, 3), strides = (2, 2), padding = 'same')) # padding same s.t. size is equal
    model.add(BatchNormalization()) # quicker convergence and more reliable model
    model.add(Dropout(0.1))

    # 2nd conv layer
    model.add(Conv2D(64, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPool2D((3, 3), strides=(2, 2), padding='same'))
    model.add(BatchNormalization())
    model.add(Dropout(0.1))

    # 3rd conv layer
    model.add(Conv2D(128, (2, 2), activation='relu', input_shape=input_shape))
    model.add(MaxPool2D((2, 2), strides=(2, 2), padding='same'))
    model.add(BatchNormalization())
    model.add(Dropout(0.1))

    # Flatten
    model.add(Flatten())
    
    # Dense Layer
    model.add(Dense(128, activation='relu')) # first dense layer decides which feature is important for which class -> fully connected layer
    model.add(Dropout(0.3)) # dropout prob = 30 %. To avoid overfitting
    # Output layer
    model.add(Dense(10, activation = 'softmax', name='output')) #num_neurons = num_classes, softmax = prob distribution of classes

    return model

In [None]:
# Message to Julian: Please check if the model is set up for multilabeling :) 

In [None]:
input_shape = (X_train_mfcc30s.shape[1], X_train_mfcc30s.shape[2], X_train_mfcc30s.shape[3])
CNN_shallow = get_CNN_shallow(input_shape)

optimizer = keras.optimizers.Adam(learning_rate= 0.001)

CNN_shallow.compile(optimizer=optimizer,
              loss = keras.losses.categorical_crossentropy,
              metrics = ['accuracy'])

In [None]:
# Train model
CNN_shallow.fit(X_train_mfcc30s, y_train_mfcc30s, validation_data = (X_val_mfcc30s, y_val_mfcc30s), batch_size=32, epochs = 30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f0eeb2eedd0>

In [None]:
CNN_shallow.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 1290, 18, 32)      320       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 645, 9, 32)       0         
 2D)                                                             
                                                                 
 batch_normalization_6 (Batc  (None, 645, 9, 32)       128       
 hNormalization)                                                 
                                                                 
 dropout_8 (Dropout)         (None, 645, 9, 32)        0         
                                                                 
 conv2d_7 (Conv2D)           (None, 643, 7, 64)        18496     
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 322, 4, 64)      

In [None]:
test_error, test_accuracy = CNN_shallow.evaluate(X_test_mfcc30s, y_test_mfcc30s, verbose = 1)



In [None]:
# CNN 2: VGG-like CNN

In [None]:
# CNN 3 (FCN-4): FCN structure from Choi et al. 2016
# input shape in paper: (96x1366x1)
# our input shape: (1292x40x1)

cnn3 = keras.models.Sequential([
    keras.layers.Conv2D(128, kernel_size=3, padding="same", activation="relu"),
    keras.layers.MaxPool2D((4,1), strides=(2,2), padding="same"),
    keras.layers.Conv2D(384, kernel_size=3, activation="relu"),
    keras.layers.MaxPool2D((5,3), strides=(2,2), padding="same"),
    keras.layers.Conv2D(768, kernel_size=3,  activation="relu"),
    keras.layers.MaxPool2D((8,3), strides=(2,2), padding="same"),
    keras.layers.Conv2D(2024, kernel_size=3, activation="relu"),
    keras.layers.MaxPool2D((8,4), strides=(2,2), padding="same")
])
# output in paper: (50x1) - must be (10x1)?
# does last layer in FCN have to have shape 10x1?

cnn3.compile(loss=keras.losses.binary_crossentropy,
              optimizer="adam",
              metrics=['accuracy'])
cnn3.fit(X_train, y_train_mfcc30s, batch_size=64, epochs=30, verbose=1, validation_data=(X_val_mfcc30s, y_val_mfcc30s))