Based on https://github.com/Hbbbbbby/EmotionRecognition_2Dcnn-lstm. 

In [1]:
import os
import numpy as np
import pandas as pd
from sklearn import preprocessing
import librosa

import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.utils import normalize, to_categorical
from keras.models import load_model
from keras.callbacks import EarlyStopping, ModelCheckpoint

# import own functions
from ipynb.fs.full.Functions import load_train_test_data, model_eval, load_model

In [2]:
data_path = '../data/emodb/wav'

In [3]:
def get_log_mel_spectrogram(path, n_fft=2048, hop_length=512, sample_rate=16000, duration=8, n_mels=128):
    """
    Extract log mel spectrogram with given duration and sample rate.
    """
    y, sr = librosa.load(path, sr=sample_rate, duration=duration)

    file_length = np.size(y)
    
    # pad shorter files & segment longer files than sample rate times duration to ensure same file length
    if file_length < sr * duration: 
        y = np.concatenate((y, np.zeros(sr * duration - file_length)), axis=0)
        
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    log_mel_spectrogram = librosa.amplitude_to_db(mel_spectrogram)
    log_mel_spectrogram = log_mel_spectrogram.reshape((-1,))

    return log_mel_spectrogram

In [40]:
def load_data(path, train_test_val='ours'):
    """
    Loads all log mel spectrograms and labels for files in the path.
    returns: Dataframe with file name, log mel spectrogram, label.
    """
    audio_files = os.listdir(path)
    file_dict = {}
    emotion_dict = {'W':'anger', 'L':'boredom', 'E':'disgust', 'A':'fear', 'F':'happiness', 'N':'neutral', 'T':'sadness'}
    file_dict['file'] = []
    file_dict['log_mel_spec'] = []
    file_dict['label'] = []
    for file in audio_files:
        file_dict['file'].append(file)
        file_dict['log_mel_spec'].append(get_log_mel_spectrogram(path + '/' + file))
        file_dict['label'].append(emotion_dict[file[5]])
    df = pd.DataFrame.from_dict(file_dict)
    label_enc = preprocessing.LabelEncoder()
    df['label'] = label_enc.fit_transform(df['label'])
    
    # load train, test, and validation data
    if train_test_val == 'paper':
        # load the train/test data 
        X_train, X_test, y_train, y_test = load_train_test_data(df, test_size = 0.2, verbose=False)

        # split train set into validation and train data (this is not completely clear in the paper, follows github repo)
        df_train = pd.concat([X_train, y_train], axis = 1)
        X_train, X_val, y_train, y_val = load_train_test_data(df_train, test_size=0.2, split_type='train/val', verbose=False)
    
    elif train_test_val == 'ours':
        # load the train/test data 
        X_train, X_test, y_train, y_test = load_train_test_data(df, test_size = 0.3, verbose=False)

        # split test set into validation and test data
        df_test = pd.concat([X_test, y_test], axis = 1)
        X_val, X_test, y_val, y_test = load_train_test_data(df_test, test_size=0.5, split_type='val/test')
    
    print("There are {} entries in the training data.".format(X_train.shape[0]))
    print("There are {} entries in the testing data.".format(X_test.shape[0]))
    print("There are {} entries in the validation data.".format(X_val.shape[0]))
    
    # reshape the data
    X_train = np.concatenate([np.array(row) for row in X_train['log_mel_spec']]).reshape(-1, 128, 251, 1)
    y_train = y_train.values
    
    X_test = np.concatenate([np.array(row) for row in X_test['log_mel_spec']]).reshape(-1, 128, 251, 1)
    y_test = y_test.values
    
    X_val = np.concatenate([np.array(row) for row in X_val['log_mel_spec']]).reshape(-1, 128, 251, 1)
    y_val = y_val.values
        
    return X_train, X_test, X_val, y_train, y_test, y_val

In [41]:
X_train, X_test, X_val, y_train, y_test, y_val = load_data(data_path, train_test_val='paper')

There are 342 entries in the training data.
There are 107 entries in the testing data.
There are 86 entries in the validation data.
