# Data creation and loading

In [None]:
import librosa
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

In [None]:
dir_data = os.listdir('Dataset/Actor')
audio_df = pd.DataFrame(dir_data)
audio_df = audio_df.rename(columns={0:'file'})
audio_df.head()

Unnamed: 0,file
0,03-01-06-01-02-02-02.wav
1,03-01-05-01-02-01-16.wav
2,03-01-08-01-01-01-14.wav
3,03-01-06-01-02-02-16.wav
4,03-01-05-01-02-01-02.wav


In [None]:
emotions_list={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}


In [None]:
def extract_feature(wav_file):
    X, sample_rate = librosa.load(wav_file, res_type='kaiser_fast')
    result = np.array([])

    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=13).T,axis=0)
    result = np.hstack((result, mfccs))

    stft = np.abs(librosa.stft(X))
    chroma = np.mean(librosa.feature.chroma_stft(y=X , S=stft, sr=sample_rate).T,axis=0)
    result = np.hstack((result, chroma))

    mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
    result = np.hstack((result, mel))

    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    result = np.hstack((result, contrast))

    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    result = np.hstack((result, tonnetz))

    return result

In [None]:
def load_data(test_size=0.2, data_dir=audio_df, label=emotions_list):
    X,y=[],[]
    for i in range(data_dir.shape[0]):
        file_name=str(data_dir.file[i])
        emotion=label[file_name.split("-")[2]]
        if(emotion=='neutral'):
            continue
        wav_file = os.path.join(os.path.abspath('Dataset/Actor')+'/'+str(data_dir.file[i]))
        feature=extract_feature(wav_file)
        X.append(feature)
        y.append(emotion)
    return train_test_split(np.array(X), np.array(y), test_size=test_size, random_state=49)

In [None]:
X_train,X_test,y_train,y_test=load_data(0.1,audio_df,emotions_list)



In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(1209, 166)
(1209,)
(135, 166)
(135,)


In [None]:
X_train_data, X_val, y_train_data, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state = 49, stratify = y_train)

In [None]:
print(X_train_data.shape)
print(y_train_data.shape)
print(X_test.shape)
print(y_test.shape)
print(X_val.shape)
print(y_val.shape)

(967, 166)
(967,)
(135, 166)
(135,)
(242, 166)
(242,)


In [None]:
np.savetxt('Data_files/x_train.csv',X_train_data, delimiter=',')
np.savetxt('Data_files/x_val.csv',X_val, delimiter=',')
np.savetxt('Data_files/x_test.csv',X_test, delimiter=',')

np.savetxt('Data_files/y_train.csv',y_train_data, delimiter=',', fmt="%s")
np.savetxt('Data_files/y_val.csv',y_val, delimiter=',', fmt="%s")
np.savetxt('Data_files/y_test.csv',y_test, delimiter=',', fmt="%s")

In [None]:
Scaler = MinMaxScaler(feature_range=(-1,1))
Scaler.fit(X_train_data)
X_train_scaled = Scaler.transform(X_train_data)
X_val_scaled = Scaler.transform(X_val)
X_test_scaled = Scaler.transform(X_test)
lenc = LabelEncoder()
lenc.fit(y_train_data)
y_train_enc = lenc.transform(y_train_data)
y_val_enc = lenc.transform(y_val)
y_test_enc = lenc.transform(y_test)

In [None]:
print(type(X_train_scaled))
print(type(y_train_enc))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [None]:
np.savetxt('Data_files/x_train_scaled.csv',X_train_scaled, delimiter=',')
np.savetxt('Data_files/x_val_scaled.csv',X_val_scaled, delimiter=',')
np.savetxt('Data_files/x_test_scaled.csv',X_test_scaled, delimiter=',')


np.savetxt('Data_files/y_train_scaled.csv',y_train_enc, delimiter=',', fmt="%s")
np.savetxt('Data_files/y_val_scaled.csv',y_val_enc, delimiter=',', fmt="%s")
np.savetxt('Data_files/y_test_scaled.csv',y_test_enc, delimiter=',', fmt="%s")