In [None]:
import os
from scipy.io import wavfile
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import keras
from keras.layers import Conv2D, MaxPool2D, Flatten, LSTM
from keras.layers import Dropout, Dense, TimeDistributed
from keras.models import Sequential
from keras.utils import to_categorical
from sklearn.utils.class_weight import compute_class_weight
from keras.callbacks import ModelCheckpoint
import librosa
from tqdm import tqdm

In [None]:
num_outputs = 8

In [None]:
spectrograms = []
first = True

for filename in tqdm(os.listdir('ravdess')):
    split_filename = filename.split('.')
    if split_filename[1] == 'wav':
        gender_id = int(split_filename[0].split('-')[-1])
        # only use female voices
        if gender_id % 2 == 0:
            data, rate = librosa.load('./ravdess/' + filename, duration=2.5, sr=None)
            spectrogram = librosa.feature.melspectrogram(y=data, sr=rate)
            if first:
                shape = spectrogram.shape
                _min = np.amin(spectrogram)
                _max = np.amax(spectrogram)
                first = False
            else:
                _min = min(np.amin(spectrogram), _min)
                _max = max(np.amax(spectrogram), _max)

            emotion = int(filename.split('-')[2]) - 1
            spectrograms.append((spectrogram, emotion))
    
for spectrogram in spectrograms:
    (spectrogram - _min) / (_max - _min)

In [None]:
np.random.shuffle(spectrograms)
train = spectrograms
#test = spectrograms[180:]

x_train, y_train = zip(*train)
#x_test, y_test = zip(*test)

# reshape for CNN
x_train = np.array([x.reshape((shape[0], shape[1], 1)) for x in x_train])
#x_test = np.array([x.reshape((shape[0], shape[1], 1)) for x in x_test])

# reshape for RNN
#x_train = np.array([x.reshape((shape[0], shape[1])) for x in x_train])
#x_test = np.array([x.reshape((shape[0], shape[1])) for x in x_test])

# one hot encode target output
y_train = np.array(keras.utils.to_categorical(y_train, num_outputs))
#y_test = np.array(keras.utils.to_categorical(y_test, 2))

In [None]:
model = Sequential()
model.add(Conv2D(16, (3, 3), activation='relu', strides=(1, 1), padding='same', input_shape=(shape[0], shape[1], 1)))
model.add(Conv2D(32, (3,3), activation='relu', strides=(1,1), padding='same'))
model.add(Conv2D(64, (3,3), activation='relu', strides=(1,1), padding='same'))
model.add(Conv2D(128, (3,3), activation='relu', strides=(1,1), padding='same'))
model.add(MaxPool2D(2,2))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(num_outputs, activation='softmax'))
model.summary()
#adam = keras.optimizers.Adam(lr=0.00001)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy'])

In [None]:
history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

In [None]:
plt.plot(history.history['categorical_accuracy'])
plt.plot(history.history['val_categorical_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()