## Exercise - DL Tutorial 06

### student names:

Submit you solution by 16 June 23:59 to manuel.milling@informatik.uni-augsburg.de OR maurice.gerczuk@informatik.uni-augsburg.de

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import librosa
from os.path import basename
from glob import glob
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, Conv2D, MaxPooling2D, BatchNormalization, Dense, Activation, Flatten
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix

In [None]:
#train_speakers = ["03", "08", "09", "10", "11", "12", "13"]
train_speakers = ["03", "08", "09", "10", "11"]
validation_speakers = ["12", "13"]
test_speakers = ["14", "15", "16"]
data_path = "emodb/wav/"
label_dict = {"A" : 0, "F": 1, "L": 2, "N": 3, "T": 4, "W": 5, "E": 6}
num_classes = len(label_dict.keys())
cutoff_length = 4 #s

2. Prepare data and labels by cutting/zero-padding to 4 seconds, extracting spectrograms. 

In [None]:
files = glob(data_path + ".wav")
files.sort
x_train = []
y_train = []
x_test = []
y_test = []
x_validation = []
y_validation = []

for file in files:
    filename = basename(file)
    speaker_id = filename[0:2]
    label_num = label_dict[filename[5]]
    signal, sr = librosa.load(file)
    signal_len = signal.shape[0]
    if signal_len < cutoff_length * sr:                
        signal = np.concatenate((signal, np.zeros(cutoff_length * sr - signal_len)))    
    signal = signal[0:cutoff_length * sr]
    melspectrogram = librosa.feature.melspectrogram(y = signal)
    log_melspectrogram = librosa.power_to_db(melspectrogram)

    log_melspectrogram = np.expand_dims(log_melspectrogram, 2)
    
    if speaker_id in train_speakers:
        x_train.append(log_melspectrogram)
        y_train.append(label_num)
    elif speaker_id in validation_speakers:
        x_validation.append(log_melspectrogram)
        y_validation.append(label_num)
    else:
        x_test.append(log_melspectrogram)
        y_test.append(label_num)

x_train = np.stack(x_train)
x_validation = np.stack(x_validation)
x_test = np.stack(x_test)
y_train = np.array(y_train)
y_validation = np.array(y_validation)
y_test = np.array(y_test)

y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_validation = tf.keras.utils.to_categorical(y_validation, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes) 

In [None]:
print("x_train shape:\t\t{}".format(x_train.shape))
print("x_validation shape:\t{}".format(x_validation.shape))
print("x_test shape:\t\t{}".format(x_test.shape))
print("y_train shape:\t\t{}".format(y_train.shape))
print("y_validation shape:\t{}".format(y_validation.shape))
print("y_test shape:\t\t{}".format(y_test.shape))

3. Implement CNN-based model

4. Train model with early stopping. Evaluate on test data and plot the training history.

In [None]:
model = Sequential()
model.add(Conv2D(8, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(16, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(num_classes))
model.add(Activation('softmax'))
model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer=tf.keras.optimizers.Adam(),
              metrics=['accuracy'])
model.summary()