## Importing the required libraries

In [None]:
import os
import glob
import librosa
import numpy as np
import pandas as pd
from emotenn import constants as ct, gen_utils as gu, load_utils as lu

## Setting the labels

In [None]:
for archive in [ct.RAVDESS_SPEECH, ct.RAVDESS_SONG]:
    lu.download_file_from_google_drive(archive, exist_ok=True)
    gu.unpack(archive.path)

In [None]:
sound_files_pattern = os.path.join(ct.UNPACKED_DIR, 'Audio_*_Actors_01-24', '**', '*.wav')
sound_files = glob.glob(sound_files_pattern, recursive=True)

In [None]:
labels = []
for sound_path in sound_files:
    name = os.path.basename(sound_path)
    emotion = name[6:8]
    if emotion in ['01', '02']:
        label = 'Neutral'
    elif emotion == '03':
        label = 'Happy'
    elif emotion == '04':
        label = 'Sad'
    elif emotion == '05':
        label = 'Angry'
    elif emotion == '06':
        label = 'Fear'
    elif emotion == '07':
        label = 'Disgust'
    elif emotion == '08':
        label = 'Surprise'
    labels.append(label)
dummies = pd.get_dummies(labels)[ct.CLASS_NAMES]
Y = dummies.to_numpy(dtype='float32')

## Getting the MFCC features of audio files using librosa

In [None]:
mfcc_features = []
for sound_path in sound_files:
    y, sample_rate = librosa.load(sound_path, res_type='kaiser_fast', duration=2.5, sr=22050*2, offset=0.5)
    mfcc = librosa.feature.mfcc(y=y, sr=sample_rate, n_mfcc=13)
    mfcc_features.append(mfcc)

In [None]:
# Fill missing elements with zeros
result_shape = max([mfcc.shape for mfcc in X])
X = []
for mfcc in mfcc_features:
    z = np.zeros((mfcc.shape[0], result_shape[1] - mfcc.shape[1]), dtype=mfcc.dtype)
    X.append(np.concatenate((mfcc,z), axis=1))
X = np.asarray(X)

In [None]:
gu.save(X, Y, 'ravdess')