In [1]:
#Import data packages
import os
import sys
import glob
import numpy as np
import pandas as pd

#Import audio packages
import librosa
import librosa.display
from scipy.io import wavfile
import scipy.io.wavfile
import sys

# Read Files

In [2]:
rawdata = []

for root, dirs, files in os.walk('data/RawData/Ravdess'):
    for filename in files:
        rawdata.append(filename)

In [3]:
len(rawdata)

2452

In [4]:
rawdata

['03-01-01-01-01-01-01.wav',
 '03-01-01-01-01-01-02.wav',
 '03-01-01-01-01-01-03.wav',
 '03-01-01-01-01-01-04.wav',
 '03-01-01-01-01-01-05.wav',
 '03-01-01-01-01-01-06.wav',
 '03-01-01-01-01-01-07.wav',
 '03-01-01-01-01-01-08.wav',
 '03-01-01-01-01-01-09.wav',
 '03-01-01-01-01-01-10.wav',
 '03-01-01-01-01-01-11.wav',
 '03-01-01-01-01-01-12.wav',
 '03-01-01-01-01-01-13.wav',
 '03-01-01-01-01-01-14.wav',
 '03-01-01-01-01-01-15.wav',
 '03-01-01-01-01-01-16.wav',
 '03-01-01-01-01-01-17.wav',
 '03-01-01-01-01-01-18.wav',
 '03-01-01-01-01-01-19.wav',
 '03-01-01-01-01-01-20.wav',
 '03-01-01-01-01-01-21.wav',
 '03-01-01-01-01-01-22.wav',
 '03-01-01-01-01-01-23.wav',
 '03-01-01-01-01-01-24.wav',
 '03-01-01-01-01-02-01.wav',
 '03-01-01-01-01-02-02.wav',
 '03-01-01-01-01-02-03.wav',
 '03-01-01-01-01-02-04.wav',
 '03-01-01-01-01-02-05.wav',
 '03-01-01-01-01-02-06.wav',
 '03-01-01-01-01-02-07.wav',
 '03-01-01-01-01-02-08.wav',
 '03-01-01-01-01-02-09.wav',
 '03-01-01-01-01-02-10.wav',
 '03-01-01-01-

## LIbrosa & MFCC configuration
In order to analyze and standardize how each audio file feature was built, the following configurations were determined:

In [5]:
#sample feature
#librosa.core.load(path, sr=22050, mono=True, offset=0.0, duration=None, dtype=<class 'numpy.float32'>, res_type='kaiser_best')
res_type_s = 'kaiser_best'
duration_s = None
sample_rate_s = 22050
offset_s = 0.5

#Mfcc
#librosa.feature.mfcc(y=None, sr=22050, S=None, n_mfcc=20, dct_type=2, norm='ortho', lifter=0, **kwargs)
mfcc_sample_rate = 22050
n_mfcc = 40
axis_mfcc = 1

### RAVDESS get emotion features

File naming convention

Each of the 7356 RAVDESS files has a unique filename. The filename consists of a 7-part numerical identifier (e.g., 02-01-06-01-02-01-12.mp4). These identifiers define the stimulus characteristics: 

Filename identifiers 

- Modality (01 = full-AV, 02 = video-only, 03 = audio-only).
- Vocal channel (01 = speech, 02 = song).
- Emotion (01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised).
- Emotional intensity (01 = normal, 02 = strong). NOTE: There is no strong intensity for the 'neutral' emotion.
- Statement (01 = "Kids are talking by the door", 02 = "Dogs are sitting by the door").
- Repetition (01 = 1st repetition, 02 = 2nd repetition).
- Actor (01 to 24. Odd numbered actors are male, even numbered actors are female).

Filename example: 02-01-06-01-02-01-12.mp4 
- Video-only (02)
- Speech (01)
- Fearful (06)
- Normal intensity (01)
- Statement "dogs" (02)
- 1st Repetition (01)
- 12th Actor (12)
- Female, as the actor ID number is even.

In [6]:
#Build list with target variables for each file
feeling_list=[]

#Emotion (01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fear, 07 = disgust, 08 = surprised) 

for emotion_path in rawdata:
    if emotion_path.split('-')[2] == '01':
        feeling_list.append("neutral")
    elif emotion_path.split('-')[2] == '02':
        feeling_list.append("calm")
    elif emotion_path.split('-')[2] == '03':
        feeling_list.append("happy")
    elif emotion_path.split('-')[2] == '04':
        feeling_list.append("sad")
    elif emotion_path.split('-')[2] == '05':
        feeling_list.append("angry")
    elif emotion_path.split('-')[2] == '06':
        feeling_list.append("fear")
    elif emotion_path.split('-')[2] == '07':
        feeling_list.append("disgust")
    elif emotion_path.split('-')[2] == '08':
        feeling_list.append("surprised")
    else:
        feeling_list.append("unknown")

In [7]:
#Check list
print(feeling_list)

['neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral'

In [8]:
#Turn list into dataframe
labels = pd.DataFrame(feeling_list)

In [9]:
labels.shape

(2452, 1)

In [10]:
labels.head()

Unnamed: 0,0
0,neutral
1,neutral
2,neutral
3,neutral
4,neutral


In [11]:
#Change index name to "emotion"
labels = labels.rename({0: 'emotion'}, axis=1)

In [12]:
labels.head()

Unnamed: 0,emotion
0,neutral
1,neutral
2,neutral
3,neutral
4,neutral


In [13]:
#Count the number of files per emotion
labels_total = pd.DataFrame(labels.groupby(['emotion']).size())
labels_total

Unnamed: 0_level_0,0
emotion,Unnamed: 1_level_1
angry,376
calm,376
disgust,192
fear,376
happy,376
neutral,188
sad,376
surprised,192


### RAVDESS get audio features with librosa library

In [21]:
ravdess = pd.DataFrame(columns=['feature'])
bookmark=0

for y in rawdata:
    #Change to kaiser_best & 22050 kHz
    #sr > target sampling rate
    #offset=0.5
    X, sample_rate = librosa.load('data/RawData/Ravdess/'+y, 
                                  res_type = res_type_s,
                                  duration = duration_s,
                                  sr = sample_rate_s,
                                  offset = offset_s)
    sample_rate = np.array(sample_rate)
    
    #Get MFCCs from each file
    mfccs = librosa.feature.mfcc(   y=X, 
                                    sr = mfcc_sample_rate, 
                                    n_mfcc = n_mfcc)
    
    #Calculate mean of MFCCs
    mfccs_mean = np.mean(    mfccs, 
                             axis = axis_mfcc)
    feature = mfccs_mean
    
    #Add MFCCs feature results to list
    ravdess.loc[bookmark] = [feature]
    bookmark=bookmark+1

In [22]:
#Verity data results
ravdess.shape

(2452, 1)

In [23]:
#Verify that there are no null values
ravdess.isnull().values.any()

False

In [24]:
# See array sample of features
ravdess

Unnamed: 0,feature
0,"[-670.4301, 65.463234, 0.48822185, 15.117002, ..."
1,"[-623.4058, 63.381435, -10.818053, 10.145172, ..."
2,"[-585.484, 66.34704, -3.7503064, 12.671173, 4...."
3,"[-663.4812, 50.858315, -9.176056, 11.07772, -5..."
4,"[-688.7739, 78.38483, 7.591097, 16.431046, 11...."
...,...
2447,"[-387.8787, 36.04285, -23.398796, -8.214959, -..."
2448,"[-444.3181, 56.227222, -19.693079, 8.307607, 2..."
2449,"[-449.08875, 22.074923, -20.714376, -4.0529246..."
2450,"[-438.63586, 62.3395, -23.957375, 6.800054, 9...."


In [25]:
#Turn array into dataframe
ravdess_final = pd.DataFrame(ravdess['feature'].values.tolist())

In [26]:
#Analyze new dataframe shape
ravdess_final.shape

(2452, 40)

In [27]:
# Check data sample
ravdess_final.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,-670.430115,65.463234,0.488222,15.117002,8.783389,1.056395,-4.240677,-3.193044,-13.346265,-2.917735,...,-2.543452,-2.323639,-3.321014,-2.033034,-3.949001,-1.677335,-1.385899,-2.031007,-4.255268,-1.686932
1,-623.405823,63.381435,-10.818053,10.145172,-5.461401,-3.885505,-6.538383,-9.566403,-5.01394,-0.690976,...,0.175067,3.802194,2.135208,5.054154,2.365537,3.358405,-1.978289,1.73288,-1.106719,1.04456
2,-585.484009,66.347038,-3.750306,12.671173,4.536705,-2.493158,-14.470141,-7.46761,-4.01441,-9.637287,...,-1.901974,-1.697298,-1.790364,1.086149,-0.032909,-1.257177,-3.268337,-2.29821,-1.423142,3.082723
3,-663.481201,50.858315,-9.176056,11.07772,-5.359027,-5.426731,-8.111016,-14.475863,-9.056971,-3.899186,...,-1.19886,0.879547,-2.542073,1.964959,2.155827,3.020328,1.052251,1.071099,4.351066,5.806594
4,-688.773926,78.384827,7.591097,16.431046,11.740804,-0.595977,-0.485841,-3.879308,-1.697953,1.260449,...,-0.804726,0.795291,-0.247736,0.821141,-0.357659,2.644244,-1.098661,-1.577293,-2.700699,0.252261


## Ravdess join features and target

In [28]:
#Join labels with features
ravdess_df = pd.concat([ravdess_final,labels], axis=1)

In [29]:
#Rename dataframe
ravdess_df = ravdess_df.rename(index=str, columns={"0": "label"})

In [30]:
#Analyze dataframe shape
ravdess_df.shape

(2452, 41)

In [31]:
#Anayze dataframe sample
ravdess_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,31,32,33,34,35,36,37,38,39,emotion
0,-670.430115,65.463234,0.488222,15.117002,8.783389,1.056395,-4.240677,-3.193044,-13.346265,-2.917735,...,-2.323639,-3.321014,-2.033034,-3.949001,-1.677335,-1.385899,-2.031007,-4.255268,-1.686932,neutral
1,-623.405823,63.381435,-10.818053,10.145172,-5.461401,-3.885505,-6.538383,-9.566403,-5.01394,-0.690976,...,3.802194,2.135208,5.054154,2.365537,3.358405,-1.978289,1.73288,-1.106719,1.04456,neutral
2,-585.484009,66.347038,-3.750306,12.671173,4.536705,-2.493158,-14.470141,-7.46761,-4.01441,-9.637287,...,-1.697298,-1.790364,1.086149,-0.032909,-1.257177,-3.268337,-2.29821,-1.423142,3.082723,neutral
3,-663.481201,50.858315,-9.176056,11.07772,-5.359027,-5.426731,-8.111016,-14.475863,-9.056971,-3.899186,...,0.879547,-2.542073,1.964959,2.155827,3.020328,1.052251,1.071099,4.351066,5.806594,neutral
4,-688.773926,78.384827,7.591097,16.431046,11.740804,-0.595977,-0.485841,-3.879308,-1.697953,1.260449,...,0.795291,-0.247736,0.821141,-0.357659,2.644244,-1.098661,-1.577293,-2.700699,0.252261,neutral


In [32]:
#Datafram drop Nan values
ravdess_df.dropna(inplace=True)

In [33]:
from sklearn.utils import shuffle

#Shuffle dataframe
ravdess_df = shuffle(ravdess_df)
ravdess_df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,31,32,33,34,35,36,37,38,39,emotion
2146,-389.017883,52.395432,-22.033159,10.453345,0.275597,-0.286044,-3.675208,-7.437788,-17.401249,3.7786,...,0.758388,1.805288,0.879782,-0.693537,-1.896684,1.157221,5.588692,11.383379,14.470024,angry
1522,-584.343201,28.755703,-21.978504,6.203398,-19.09215,-15.651931,-11.186831,-23.063335,-5.513908,-10.048711,...,1.012252,-4.841632,-4.877862,-3.246335,1.560375,1.482806,0.503226,2.057159,8.947983,neutral
1150,-632.233826,50.419388,4.495446,7.642652,3.926707,1.383908,-1.987386,-7.958145,-7.860272,-3.527152,...,-1.701575,-3.043911,-3.751471,-3.674377,-2.477098,-2.756687,-1.086796,-2.42892,-0.857626,disgust
1729,-497.894318,37.361317,-31.664379,5.139538,-9.740922,-19.250357,-4.44662,-16.469189,-5.265702,-3.690193,...,2.461368,-4.139584,-2.422719,-1.771385,0.638308,0.20466,1.863322,0.150455,2.180856,happy
1460,-494.695129,21.106981,-14.390797,-8.524015,-17.840214,-15.993177,-9.437501,-8.402895,-27.335264,-3.107215,...,1.321094,-5.2527,-4.642137,-2.825211,1.554649,2.489509,0.6924,0.615476,11.936675,neutral
1649,-447.018951,64.222572,-16.019062,13.306705,-3.811141,-1.411737,-12.825457,-6.72301,-11.300441,-10.789612,...,6.884084,-0.215744,0.864332,-2.99273,2.607578,4.416861,12.476767,13.951736,15.314069,calm
1102,-614.381042,55.399303,-2.706339,11.015399,4.783836,-0.198596,-3.313485,-4.904955,-7.153729,-3.756768,...,-0.82205,-3.359003,-3.19514,-4.168027,-0.449905,-0.709052,0.167568,-1.473405,-0.565748,disgust
2043,-551.061768,25.775564,-43.437393,3.111761,-16.176998,-27.410635,-10.545733,-23.728813,-11.61309,2.486735,...,6.814993,-1.373797,-2.708674,0.227154,5.965232,2.134248,0.47151,6.064524,15.115532,sad
1175,-585.192322,61.776909,-12.739568,-0.276975,-21.161785,-8.128613,-13.754605,-17.378832,-11.45783,0.58046,...,-0.500184,-2.17516,0.076556,-1.023516,-0.202732,-0.21005,0.203295,-1.377845,-0.825895,disgust
514,-678.94751,71.909492,25.969122,26.969381,14.522541,8.765557,5.553268,3.765746,-3.901671,-1.029346,...,2.055247,0.08925,1.26257,-2.240326,0.257333,-3.96652,0.19855,-3.226501,-1.415789,sad


In [34]:
#Verify that there are no null values
ravdess_df.isnull().values.any()

False

In [35]:
# Check dataframe sample
ravdess_df.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,31,32,33,34,35,36,37,38,39,emotion
2146,-389.017883,52.395432,-22.033159,10.453345,0.275597,-0.286044,-3.675208,-7.437788,-17.401249,3.7786,...,0.758388,1.805288,0.879782,-0.693537,-1.896684,1.157221,5.588692,11.383379,14.470024,angry
1522,-584.343201,28.755703,-21.978504,6.203398,-19.09215,-15.651931,-11.186831,-23.063335,-5.513908,-10.048711,...,1.012252,-4.841632,-4.877862,-3.246335,1.560375,1.482806,0.503226,2.057159,8.947983,neutral
1150,-632.233826,50.419388,4.495446,7.642652,3.926707,1.383908,-1.987386,-7.958145,-7.860272,-3.527152,...,-1.701575,-3.043911,-3.751471,-3.674377,-2.477098,-2.756687,-1.086796,-2.42892,-0.857626,disgust
1729,-497.894318,37.361317,-31.664379,5.139538,-9.740922,-19.250357,-4.44662,-16.469189,-5.265702,-3.690193,...,2.461368,-4.139584,-2.422719,-1.771385,0.638308,0.20466,1.863322,0.150455,2.180856,happy
1460,-494.695129,21.106981,-14.390797,-8.524015,-17.840214,-15.993177,-9.437501,-8.402895,-27.335264,-3.107215,...,1.321094,-5.2527,-4.642137,-2.825211,1.554649,2.489509,0.6924,0.615476,11.936675,neutral


In [36]:
#Analyz shape of dataframe
ravdess_df.shape

(2452, 41)

In [37]:
# see number of emotions
ravdess_df[ravdess_df.columns[-1]].nunique()

8

In [38]:
ravdess_df['emotion'].value_counts()

calm         376
happy        376
fear         376
sad          376
angry        376
disgust      192
surprised    192
neutral      188
Name: emotion, dtype: int64

In [39]:
ravdess_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,31,32,33,34,35,36,37,38,39,emotion
2146,-389.017883,52.395432,-22.033159,10.453345,0.275597,-0.286044,-3.675208,-7.437788,-17.401249,3.7786,...,0.758388,1.805288,0.879782,-0.693537,-1.896684,1.157221,5.588692,11.383379,14.470024,angry
1522,-584.343201,28.755703,-21.978504,6.203398,-19.09215,-15.651931,-11.186831,-23.063335,-5.513908,-10.048711,...,1.012252,-4.841632,-4.877862,-3.246335,1.560375,1.482806,0.503226,2.057159,8.947983,neutral
1150,-632.233826,50.419388,4.495446,7.642652,3.926707,1.383908,-1.987386,-7.958145,-7.860272,-3.527152,...,-1.701575,-3.043911,-3.751471,-3.674377,-2.477098,-2.756687,-1.086796,-2.42892,-0.857626,disgust
1729,-497.894318,37.361317,-31.664379,5.139538,-9.740922,-19.250357,-4.44662,-16.469189,-5.265702,-3.690193,...,2.461368,-4.139584,-2.422719,-1.771385,0.638308,0.20466,1.863322,0.150455,2.180856,happy
1460,-494.695129,21.106981,-14.390797,-8.524015,-17.840214,-15.993177,-9.437501,-8.402895,-27.335264,-3.107215,...,1.321094,-5.2527,-4.642137,-2.825211,1.554649,2.489509,0.6924,0.615476,11.936675,neutral


In [33]:
#Move dataframe into separate file
ravdess_df.to_csv('data/ravdess_data.csv')

In [34]:
rawdata_list = os.listdir('data/RawData/tess')

In [35]:
rawdata_list

['.ipynb_checkpoints',
 'OAF_back_angry.wav',
 'OAF_back_disgust.wav',
 'OAF_back_fear.wav',
 'OAF_back_happy.wav',
 'OAF_back_neutral.wav',
 'OAF_back_ps.wav',
 'OAF_back_sad.wav',
 'OAF_bar_angry.wav',
 'OAF_bar_disgust.wav',
 'OAF_bar_fear.wav',
 'OAF_bar_happy.wav',
 'OAF_bar_neutral.wav',
 'OAF_bar_ps.wav',
 'OAF_bar_sad.wav',
 'OAF_base_angry.wav',
 'OAF_base_disgust.wav',
 'OAF_base_fear.wav',
 'OAF_base_happy.wav',
 'OAF_base_neutral.wav',
 'OAF_base_ps.wav',
 'OAF_base_sad.wav',
 'OAF_bath_angry.wav',
 'OAF_bath_disgust.wav',
 'OAF_bath_fear.wav',
 'OAF_bath_happy.wav',
 'OAF_bath_neutral.wav',
 'OAF_bath_ps.wav',
 'OAF_bath_sad.wav',
 'OAF_bean_angry.wav',
 'OAF_bean_disgust.wav',
 'OAF_bean_fear.wav',
 'OAF_bean_happy.wav',
 'OAF_bean_neutral.wav',
 'OAF_bean_ps.wav',
 'OAF_bean_sad.wav',
 'OAF_beg_angry.wav',
 'OAF_beg_disgust.wav',
 'OAF_beg_fear.wav',
 'OAF_beg_happy.wav',
 'OAF_beg_neutral.wav',
 'OAF_beg_ps.wav',
 'OAF_beg_sad.wav',
 'OAF_bite_angry.wav',
 'OAF_bite_dis

## Read TESS Dataset

In [36]:
parent = os.getcwd()

In [37]:
parent

'C:\\Users\\arsal\\Documents\\FINAL YEAR PROJECTS\\SpeechModel'

In [38]:
# Build list of audio files
raw_data_tess_path = r"data\RawData\tess\\"

folder_list_tess = os.listdir(raw_data_tess_path)
folder_list_tess

['.ipynb_checkpoints',
 'OAF_back_angry.wav',
 'OAF_back_disgust.wav',
 'OAF_back_fear.wav',
 'OAF_back_happy.wav',
 'OAF_back_neutral.wav',
 'OAF_back_ps.wav',
 'OAF_back_sad.wav',
 'OAF_bar_angry.wav',
 'OAF_bar_disgust.wav',
 'OAF_bar_fear.wav',
 'OAF_bar_happy.wav',
 'OAF_bar_neutral.wav',
 'OAF_bar_ps.wav',
 'OAF_bar_sad.wav',
 'OAF_base_angry.wav',
 'OAF_base_disgust.wav',
 'OAF_base_fear.wav',
 'OAF_base_happy.wav',
 'OAF_base_neutral.wav',
 'OAF_base_ps.wav',
 'OAF_base_sad.wav',
 'OAF_bath_angry.wav',
 'OAF_bath_disgust.wav',
 'OAF_bath_fear.wav',
 'OAF_bath_happy.wav',
 'OAF_bath_neutral.wav',
 'OAF_bath_ps.wav',
 'OAF_bath_sad.wav',
 'OAF_bean_angry.wav',
 'OAF_bean_disgust.wav',
 'OAF_bean_fear.wav',
 'OAF_bean_happy.wav',
 'OAF_bean_neutral.wav',
 'OAF_bean_ps.wav',
 'OAF_bean_sad.wav',
 'OAF_beg_angry.wav',
 'OAF_beg_disgust.wav',
 'OAF_beg_fear.wav',
 'OAF_beg_happy.wav',
 'OAF_beg_neutral.wav',
 'OAF_beg_ps.wav',
 'OAF_beg_sad.wav',
 'OAF_bite_angry.wav',
 'OAF_bite_dis

In [39]:
tess_list = []

for folder in folder_list_tess:
    folder_path = raw_data_tess_path
os.chdir(folder_path)
for file in glob.glob("*.wav"):
    tess_list.append(folder_path+file)
os.chdir(parent)
#Check results
tess_list[:10]

['data\\RawData\\tess\\\\OAF_back_angry.wav',
 'data\\RawData\\tess\\\\OAF_back_disgust.wav',
 'data\\RawData\\tess\\\\OAF_back_fear.wav',
 'data\\RawData\\tess\\\\OAF_back_happy.wav',
 'data\\RawData\\tess\\\\OAF_back_neutral.wav',
 'data\\RawData\\tess\\\\OAF_back_ps.wav',
 'data\\RawData\\tess\\\\OAF_back_sad.wav',
 'data\\RawData\\tess\\\\OAF_bar_angry.wav',
 'data\\RawData\\tess\\\\OAF_bar_disgust.wav',
 'data\\RawData\\tess\\\\OAF_bar_fear.wav']

### TESS Get emotion features

In [40]:
#Build list of emotions for Tess
feeling_list_tess = []

#'angry', 'disgust', 'fear', 'happy', 'neutral', 'sad' and 'surprised' emotion classes respectively. 
#E.g., 'd03.wav' is the 3rd disgust sentence.  

emotion_dic = {"angry":'angry', 
               "disgust":'disgust', 
               "fear":'fear', 
               "happy":'happy', 
               "neutral":'neutral', 
               "sad":'sad', 
               "ps":'surprised'}

for file_path in tess_list:
    file = file_path.split('\\')[-1]
    file_name = file.split('.')[0]
    emotion = file_name.split('_')[-1]
    feeling_list_tess.append(emotion_dic[emotion])

In [41]:
feeling_list_tess

['angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'f

In [42]:
#Build dataframe from array
labels_tess = pd.DataFrame(feeling_list_tess)

In [43]:
#Check results
labels_tess.head()

Unnamed: 0,0
0,angry
1,disgust
2,fear
3,happy
4,neutral


In [44]:
#Rename column to emotion
labels_tess = labels_tess.rename({0: 'emotion'}, axis=1)

In [45]:
#Check shape
labels_tess.shape

(2800, 1)

In [46]:
#Check results
labels_tess.head()

Unnamed: 0,emotion
0,angry
1,disgust
2,fear
3,happy
4,neutral


In [47]:
#Check emotion size
labels_tess_total = pd.DataFrame(labels_tess.groupby(['emotion']).size())
labels_tess_total

Unnamed: 0_level_0,0
emotion,Unnamed: 1_level_1
angry,400
disgust,400
fear,400
happy,400
neutral,400
sad,400
surprised,400


### TESS Get audio features

In [48]:
rawdata_tess = pd.DataFrame(columns=['feature'])
bookmark=0

for y in tess_list:
    #Get audio features
    X, sample_rate = librosa.load(y, 
                                  res_type = res_type_s,
                                  duration = duration_s,
                                  sr = sample_rate_s,
                                  offset=offset_s)
    
    #Get MFFC features
    mfccs = librosa.feature.mfcc(   y=X, 
                                    sr = mfcc_sample_rate, 
                                    n_mfcc = n_mfcc)
    #Get MFFCs average features
    mfccs_mean = np.mean(    mfccs, 
                             axis = axis_mfcc)
    feature = mfccs_mean
    rawdata_tess.loc[bookmark] = [feature]
    bookmark=bookmark+1

In [49]:
#Verify Tess features shape
rawdata_tess.shape

(2800, 1)

In [50]:
#Check that there are no nan values
rawdata_tess.isnull().values.any()

False

In [51]:
#Get sample data
rawdata_tess.head()

Unnamed: 0,feature
0,"[-396.60403, 78.25556, -19.569338, -21.489851,..."
1,"[-491.92722, 120.23254, 9.670667, -7.0598683, ..."
2,"[-447.49503, 91.5542, -0.39023134, -17.291504,..."
3,"[-494.20566, 108.41832, 12.990735, -13.772038,..."
4,"[-532.54865, 108.74479, 13.694969, -4.8464985,..."


In [52]:
#Build list
rawdata_tess_final = pd.DataFrame(rawdata_tess['feature'].values.tolist())

In [53]:
#Check dataframe
rawdata_tess_final

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,-396.604034,78.255562,-19.569338,-21.489851,-2.525299,10.445654,-21.044718,-5.894850,-7.459246,-0.524694,...,0.204293,5.107435,9.377555,5.378602,2.489727,-1.628691,4.714848,9.084726,2.038231,-1.809616
1,-491.927216,120.232536,9.670667,-7.059868,16.235134,3.580734,-5.178472,2.561268,-17.117437,1.335359,...,-0.393715,1.222405,-0.874633,3.228373,1.478694,3.743068,2.234108,3.861484,7.015639,10.103461
2,-447.495026,91.554199,-0.390231,-17.291504,2.782623,4.717841,-13.548041,0.557419,-10.805155,3.248377,...,0.053630,2.425200,-2.861225,-4.485274,0.616731,3.227551,-3.117861,-1.251210,-2.262123,2.087229
3,-494.205658,108.418320,12.990735,-13.772038,8.819846,12.204424,-9.161975,1.516832,-7.936755,1.373845,...,1.711510,8.710872,10.475064,10.166164,8.878898,8.305769,7.912946,10.890725,11.227779,14.162820
4,-532.548645,108.744789,13.694969,-4.846498,16.790934,13.777178,-7.775505,-3.357900,-11.597085,4.466738,...,-8.034263,-4.047399,-10.091186,-1.731224,7.314921,19.364264,30.353188,35.723701,31.090689,21.279510
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2795,-348.233398,35.602421,-4.365129,15.534871,6.135697,0.063725,-4.578150,-6.932275,-20.723263,1.774053,...,-3.321148,7.392859,-2.018038,-6.862062,-12.835851,3.390870,1.486814,-0.507122,0.197945,9.505314
2796,-346.212616,37.196987,-12.763891,36.840496,-3.720520,-0.910189,-11.683501,-17.155199,-2.398546,-11.626377,...,9.798115,1.436242,-1.915311,5.417030,7.406669,7.149210,-3.331374,-6.135561,-2.562752,1.432900
2797,-420.886383,80.779999,5.512202,33.144695,1.982193,-3.874799,0.712508,-17.866940,-4.412733,-0.012554,...,-8.026308,9.106145,18.160496,32.811478,34.057686,27.130341,9.327121,-0.924105,-5.404797,2.861058
2798,-373.980774,61.241165,-4.412523,14.750440,-15.815634,3.444529,-7.860944,-19.820478,-13.926143,-7.034667,...,5.387750,10.827277,4.194105,8.494205,3.632763,10.727443,4.604687,6.022658,2.831079,2.852623


## Join TESS features with targets

In [54]:
#Concat both feature table and target table
newdf_tess = pd.concat([rawdata_tess_final,labels_tess], axis=1)
newdf_tess

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,31,32,33,34,35,36,37,38,39,emotion
0,-396.604034,78.255562,-19.569338,-21.489851,-2.525299,10.445654,-21.044718,-5.894850,-7.459246,-0.524694,...,5.107435,9.377555,5.378602,2.489727,-1.628691,4.714848,9.084726,2.038231,-1.809616,angry
1,-491.927216,120.232536,9.670667,-7.059868,16.235134,3.580734,-5.178472,2.561268,-17.117437,1.335359,...,1.222405,-0.874633,3.228373,1.478694,3.743068,2.234108,3.861484,7.015639,10.103461,disgust
2,-447.495026,91.554199,-0.390231,-17.291504,2.782623,4.717841,-13.548041,0.557419,-10.805155,3.248377,...,2.425200,-2.861225,-4.485274,0.616731,3.227551,-3.117861,-1.251210,-2.262123,2.087229,fear
3,-494.205658,108.418320,12.990735,-13.772038,8.819846,12.204424,-9.161975,1.516832,-7.936755,1.373845,...,8.710872,10.475064,10.166164,8.878898,8.305769,7.912946,10.890725,11.227779,14.162820,happy
4,-532.548645,108.744789,13.694969,-4.846498,16.790934,13.777178,-7.775505,-3.357900,-11.597085,4.466738,...,-4.047399,-10.091186,-1.731224,7.314921,19.364264,30.353188,35.723701,31.090689,21.279510,neutral
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2795,-348.233398,35.602421,-4.365129,15.534871,6.135697,0.063725,-4.578150,-6.932275,-20.723263,1.774053,...,7.392859,-2.018038,-6.862062,-12.835851,3.390870,1.486814,-0.507122,0.197945,9.505314,fear
2796,-346.212616,37.196987,-12.763891,36.840496,-3.720520,-0.910189,-11.683501,-17.155199,-2.398546,-11.626377,...,1.436242,-1.915311,5.417030,7.406669,7.149210,-3.331374,-6.135561,-2.562752,1.432900,happy
2797,-420.886383,80.779999,5.512202,33.144695,1.982193,-3.874799,0.712508,-17.866940,-4.412733,-0.012554,...,9.106145,18.160496,32.811478,34.057686,27.130341,9.327121,-0.924105,-5.404797,2.861058,neutral
2798,-373.980774,61.241165,-4.412523,14.750440,-15.815634,3.444529,-7.860944,-19.820478,-13.926143,-7.034667,...,10.827277,4.194105,8.494205,3.632763,10.727443,4.604687,6.022658,2.831079,2.852623,surprised


In [55]:
newdf_tess = newdf_tess.rename(index=str, columns={"0": "label"})

In [56]:
#Verify table shape
newdf_tess.shape

(2800, 41)

In [57]:
#Get dataframe sample data
newdf_tess.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,31,32,33,34,35,36,37,38,39,emotion
0,-396.604034,78.255562,-19.569338,-21.489851,-2.525299,10.445654,-21.044718,-5.89485,-7.459246,-0.524694,...,5.107435,9.377555,5.378602,2.489727,-1.628691,4.714848,9.084726,2.038231,-1.809616,angry
1,-491.927216,120.232536,9.670667,-7.059868,16.235134,3.580734,-5.178472,2.561268,-17.117437,1.335359,...,1.222405,-0.874633,3.228373,1.478694,3.743068,2.234108,3.861484,7.015639,10.103461,disgust
2,-447.495026,91.554199,-0.390231,-17.291504,2.782623,4.717841,-13.548041,0.557419,-10.805155,3.248377,...,2.4252,-2.861225,-4.485274,0.616731,3.227551,-3.117861,-1.25121,-2.262123,2.087229,fear
3,-494.205658,108.41832,12.990735,-13.772038,8.819846,12.204424,-9.161975,1.516832,-7.936755,1.373845,...,8.710872,10.475064,10.166164,8.878898,8.305769,7.912946,10.890725,11.227779,14.16282,happy
4,-532.548645,108.744789,13.694969,-4.846498,16.790934,13.777178,-7.775505,-3.3579,-11.597085,4.466738,...,-4.047399,-10.091186,-1.731224,7.314921,19.364264,30.353188,35.723701,31.090689,21.27951,neutral


In [58]:
#Drop nan values
newdf_tess.dropna(inplace=True)
newdf_tess.shape

(2800, 41)

In [59]:
#Shuffle rows
newdf_tess = shuffle(newdf_tess)
newdf_tess.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,31,32,33,34,35,36,37,38,39,emotion
303,-439.209839,52.031967,14.808918,-9.496803,5.101288,8.61673,-1.320742,-0.686291,-23.771204,-1.638695,...,-1.071538,3.15822,2.097135,-2.885954,-5.593198,-7.087986,4.418566,-3.481195,3.663387,fear
2540,-390.70755,48.434856,30.143837,34.541302,-6.814882,8.196797,-2.632373,-11.593469,-2.810791,0.898264,...,10.700237,7.250145,12.502523,3.03437,6.233582,-1.553688,3.280564,1.164667,3.929083,sad
1672,-380.585236,107.327866,23.79994,19.740412,-20.588615,13.702047,4.824707,-14.648274,0.192977,-3.029135,...,13.63417,6.417684,6.043452,3.010067,6.284101,8.991129,13.317131,8.057784,5.717334,sad
918,-483.565186,96.419746,13.606753,-3.497384,17.959509,6.561676,-13.423718,1.070405,-22.64588,0.32424,...,3.212639,-0.148176,8.982528,8.857074,10.762841,14.506783,11.366557,11.552303,11.609197,disgust
731,-453.229675,107.998215,35.394962,-10.678308,4.511348,0.189389,-3.419339,-3.951507,-23.19804,-16.357641,...,7.806617,3.902833,-0.324019,2.555919,7.294698,8.366813,11.241141,6.364999,7.89728,happy
2503,-406.610809,74.639946,-0.072521,19.760883,-5.596318,4.479149,-4.612859,-14.098087,-3.075858,9.918381,...,1.06096,4.851324,18.581367,28.72333,32.624786,23.410536,12.395446,-3.261205,-0.303659,neutral
220,-513.16156,95.641846,31.466316,-1.226158,13.259369,6.726179,-6.056936,0.237161,-11.969058,-2.559626,...,2.954829,3.225525,5.049396,4.55851,8.272676,6.495747,7.902458,9.144543,13.8107,happy
1642,-380.923309,78.359909,-6.194743,19.479153,-15.990236,-6.435923,4.033202,-22.290525,-3.806813,4.977145,...,9.125403,16.548077,34.852245,37.831814,33.619522,15.809753,0.515893,-7.908116,2.302252,neutral
976,-504.324249,127.999405,40.026131,-23.777229,0.510662,13.182938,1.834589,-1.294213,-19.061615,-13.6938,...,3.247662,5.603178,2.463166,3.595618,5.772469,3.080873,6.309603,5.597229,10.934939,happy
1843,-285.254211,86.242668,-2.773583,22.61731,-15.214632,11.602871,11.93178,-2.531818,0.659862,11.627561,...,4.473824,-11.377713,-8.931878,-3.848209,4.950994,-1.725497,2.659218,11.390565,11.332727,fear


In [60]:
#Verify there are no nan values
newdf_tess.isnull().values.any()

False

In [61]:
#Check shape
newdf_tess.shape

(2800, 41)

In [62]:
# See number of emotions
newdf_tess[newdf_tess.columns[-1]].nunique()

7

In [63]:
newdf_tess['emotion'].value_counts()

surprised    400
neutral      400
disgust      400
angry        400
happy        400
sad          400
fear         400
Name: emotion, dtype: int64

In [64]:
#Move dataframe into separate file
newdf_tess.to_csv('data/tess_data.csv')

# Join RAVDESS + TESS dataframes

In [65]:
ravdess_df.columns

Index([        0,         1,         2,         3,         4,         5,
               6,         7,         8,         9,        10,        11,
              12,        13,        14,        15,        16,        17,
              18,        19,        20,        21,        22,        23,
              24,        25,        26,        27,        28,        29,
              30,        31,        32,        33,        34,        35,
              36,        37,        38,        39, 'emotion'],
      dtype='object')

In [66]:
newdf_tess.columns

Index([        0,         1,         2,         3,         4,         5,
               6,         7,         8,         9,        10,        11,
              12,        13,        14,        15,        16,        17,
              18,        19,        20,        21,        22,        23,
              24,        25,        26,        27,        28,        29,
              30,        31,        32,        33,        34,        35,
              36,        37,        38,        39, 'emotion'],
      dtype='object')

In [67]:
frames = [ravdess_df,newdf_tess]

final_data = pd.concat(frames, ignore_index=True)
final_data.shape

(5252, 41)

In [68]:
#Check new and final dataframe
final_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,31,32,33,34,35,36,37,38,39,emotion
0,-587.907898,36.178154,-17.241106,-9.742208,-27.773447,-14.733717,-11.477688,-15.569857,-14.273412,-8.294398,...,3.212901,-1.833511,-2.232787,-4.604078,0.343586,-1.242408,-0.375359,3.929535,10.683189,sad
1,-509.770386,43.354965,-24.751860,8.810980,-8.211090,-24.347290,-9.572964,-15.536777,-10.063596,4.296363,...,1.836776,-8.388222,-7.970921,-3.082049,3.787357,-0.004449,2.402939,7.500911,7.930544,happy
2,-608.325256,91.134865,-8.422993,24.703096,1.783348,-23.475849,-8.582272,-3.728264,-11.954734,0.899885,...,5.030650,0.232143,2.143103,-1.058072,-1.230369,-0.428278,6.082251,10.974966,18.634037,calm
3,-504.740112,55.126217,-20.908773,13.133360,-6.350088,-7.488061,-18.778595,-4.136145,-18.639141,-2.384829,...,2.723440,2.149348,0.681944,-4.007479,-1.396597,-1.036337,5.015131,10.885223,16.854820,happy
4,-637.394531,43.984432,-17.269554,-3.644861,-15.109137,-18.893116,-2.212192,-20.164730,-10.050422,2.773326,...,15.225397,0.327097,-4.202260,-5.842893,1.823740,-2.011576,3.554226,11.488029,20.341881,calm
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5247,-433.188965,57.363720,4.908972,13.024331,-3.378766,-0.960908,-15.769565,4.778440,-14.686361,-8.497721,...,6.618638,6.445676,-1.611167,-1.723101,1.382978,4.430180,5.747354,-2.161623,3.657368,angry
5248,-559.998840,101.993713,33.605961,12.951871,21.204279,13.413920,4.372688,-7.319698,-9.703876,1.709915,...,16.355019,8.957122,11.558165,4.750055,4.040524,2.418961,6.325723,7.394610,15.063540,sad
5249,-496.139191,133.011856,45.438461,0.555025,10.132165,11.738538,-6.431499,7.704493,-19.552839,-16.369890,...,0.150162,3.208148,6.247221,12.822376,13.872787,10.784860,12.104943,9.085450,10.249683,disgust
5250,-353.824036,98.714981,-1.281175,8.937387,-3.878722,-3.579316,-3.132740,-20.462267,-5.338827,8.199279,...,-2.974227,-11.691386,-2.699654,3.988725,22.577427,33.460434,37.656155,27.965282,13.931880,neutral


In [69]:
#Move dataframe into separate file
final_data.to_csv('data/final_data.csv')

# Thats All for Data Preperation