In [173]:
#Import data packages
import os
import sys
import glob
import numpy as np
import pandas as pd

#Import audio packages
import librosa
import librosa.display
from scipy.io import wavfile
import scipy.io.wavfile
import sys

# Read Files

In [174]:
rawdata = []

for root, dirs, files in os.walk('data/RawData/Ravdess'):
    for filename in files:
        rawdata.append(filename)

In [175]:
len(rawdata)

2452

In [176]:
rawdata

['03-01-01-01-01-01-01.wav',
 '03-01-01-01-01-01-02.wav',
 '03-01-01-01-01-01-03.wav',
 '03-01-01-01-01-01-04.wav',
 '03-01-01-01-01-01-05.wav',
 '03-01-01-01-01-01-06.wav',
 '03-01-01-01-01-01-07.wav',
 '03-01-01-01-01-01-08.wav',
 '03-01-01-01-01-01-09.wav',
 '03-01-01-01-01-01-10.wav',
 '03-01-01-01-01-01-11.wav',
 '03-01-01-01-01-01-12.wav',
 '03-01-01-01-01-01-13.wav',
 '03-01-01-01-01-01-14.wav',
 '03-01-01-01-01-01-15.wav',
 '03-01-01-01-01-01-16.wav',
 '03-01-01-01-01-01-17.wav',
 '03-01-01-01-01-01-18.wav',
 '03-01-01-01-01-01-19.wav',
 '03-01-01-01-01-01-20.wav',
 '03-01-01-01-01-01-21.wav',
 '03-01-01-01-01-01-22.wav',
 '03-01-01-01-01-01-23.wav',
 '03-01-01-01-01-01-24.wav',
 '03-01-01-01-01-02-01.wav',
 '03-01-01-01-01-02-02.wav',
 '03-01-01-01-01-02-03.wav',
 '03-01-01-01-01-02-04.wav',
 '03-01-01-01-01-02-05.wav',
 '03-01-01-01-01-02-06.wav',
 '03-01-01-01-01-02-07.wav',
 '03-01-01-01-01-02-08.wav',
 '03-01-01-01-01-02-09.wav',
 '03-01-01-01-01-02-10.wav',
 '03-01-01-01-

## LIbrosa & MFCC configuration
In order to analyze and standardize how each audio file feature was built, the following configurations were determined:

In [177]:
#sample feature
#librosa.core.load(path, sr=22050, mono=True, offset=0.0, duration=None, dtype=<class 'numpy.float32'>, res_type='kaiser_best')
res_type_s = 'kaiser_best'
duration_s = None
sample_rate_s = 22050
offset_s = 0.5

#Mfcc
#librosa.feature.mfcc(y=None, sr=22050, S=None, n_mfcc=20, dct_type=2, norm='ortho', lifter=0, **kwargs)
mfcc_sample_rate = 22050
n_mfcc = 40
axis_mfcc = 1

### RAVDESS get emotion features

File naming convention

Each of the 7356 RAVDESS files has a unique filename. The filename consists of a 7-part numerical identifier (e.g., 02-01-06-01-02-01-12.mp4). These identifiers define the stimulus characteristics: 

Filename identifiers 

- Modality (01 = full-AV, 02 = video-only, 03 = audio-only).
- Vocal channel (01 = speech, 02 = song).
- Emotion (01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised).
- Emotional intensity (01 = normal, 02 = strong). NOTE: There is no strong intensity for the 'neutral' emotion.
- Statement (01 = "Kids are talking by the door", 02 = "Dogs are sitting by the door").
- Repetition (01 = 1st repetition, 02 = 2nd repetition).
- Actor (01 to 24. Odd numbered actors are male, even numbered actors are female).

Filename example: 02-01-06-01-02-01-12.mp4 
- Video-only (02)
- Speech (01)
- Fearful (06)
- Normal intensity (01)
- Statement "dogs" (02)
- 1st Repetition (01)
- 12th Actor (12)
- Female, as the actor ID number is even.

In [178]:
#Build list with target variables for each file
feeling_list=[]

#Emotion (01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fear, 07 = disgust, 08 = surprised) 

for emotion_path in rawdata:
    if emotion_path.split('-')[2] == '01':
        feeling_list.append("neutral")
    elif emotion_path.split('-')[2] == '02':
        feeling_list.append("calm")
    elif emotion_path.split('-')[2] == '03':
        feeling_list.append("happy")
    elif emotion_path.split('-')[2] == '04':
        feeling_list.append("sad")
    elif emotion_path.split('-')[2] == '05':
        feeling_list.append("angry")
    elif emotion_path.split('-')[2] == '06':
        feeling_list.append("fear")
    elif emotion_path.split('-')[2] == '07':
        feeling_list.append("disgust")
    elif emotion_path.split('-')[2] == '08':
        feeling_list.append("surprised")
    else:
        feeling_list.append("unknown")

In [179]:
#Check list
print(feeling_list)

['neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral'

In [180]:
#Turn list into dataframe
labels = pd.DataFrame(feeling_list)

In [181]:
labels.shape

(2452, 1)

In [182]:
labels.head()

Unnamed: 0,0
0,neutral
1,neutral
2,neutral
3,neutral
4,neutral


In [183]:
#Change index name to "emotion"
labels = labels.rename({0: 'emotion'}, axis=1)

In [184]:
labels.head()

Unnamed: 0,emotion
0,neutral
1,neutral
2,neutral
3,neutral
4,neutral


In [185]:
#Count the number of files per emotion
labels_total = pd.DataFrame(labels.groupby(['emotion']).size())
labels_total

Unnamed: 0_level_0,0
emotion,Unnamed: 1_level_1
angry,376
calm,376
disgust,192
fear,376
happy,376
neutral,188
sad,376
surprised,192


### RAVDESS get audio features with librosa library

In [186]:
ravdess = pd.DataFrame(columns=['feature'])
bookmark=0

for y in rawdata:
    #Change to kaiser_best & 22050 kHz
    #sr > target sampling rate
    #offset=0.5
    X, sample_rate = librosa.load('data/RawData/Ravdess/'+y, 
                                  res_type = res_type_s,
                                  duration = duration_s,
                                  sr = sample_rate_s,
                                  offset = offset_s)

    features = []
    
    # Pitch and Pitch Range
    pitches, magnitudes = librosa.piptrack(y=X, sr=sample_rate)
    pitch_mean = np.mean(pitches)
    pitch_range = np.max(pitches) - np.min(pitches)
    features.append(pitch_mean)
    features.append(pitch_range)

    # Intensity
    intensity = np.mean(np.abs(X))
    features.append(intensity)

    # MFCCs (Mel-frequency cepstral coefficients)
    mfcc = librosa.feature.mfcc(y=X, sr = mfcc_sample_rate, n_mfcc=n_mfcc)
    
    for i in mfcc:
        features.append(np.mean(i))
    for j in mfcc:
        features.append(np.std(j))

    # Chroma features
    chroma = librosa.feature.chroma_stft(y=X, sr=sample_rate)
    features.append(np.mean(chroma))
    features.append(np.std(chroma))
    
    #Add MFCCs feature results to list
    ravdess.loc[bookmark] = [features]
    bookmark=bookmark+1

In [187]:
#Verity data results
ravdess.shape

(2452, 1)

In [188]:
#Verify that there are no null values
ravdess.isnull().values.any()

False

In [189]:
# See array sample of features
ravdess

Unnamed: 0,feature
0,"[49.068134, 3996.804, 0.0019592696, -670.4301,..."
1,"[52.45128, 3996.4348, 0.002887319, -623.4058, ..."
2,"[41.690105, 3998.2512, 0.003375546, -585.484, ..."
3,"[66.95653, 3999.1055, 0.002120519, -663.4812, ..."
4,"[15.9838295, 3997.3025, 0.0012016749, -688.773..."
...,...
2447,"[65.428215, 3999.1863, 0.02469888, -387.8787, ..."
2448,"[27.232674, 3998.338, 0.020712128, -444.3181, ..."
2449,"[42.529827, 3999.453, 0.02207093, -449.08875, ..."
2450,"[48.66791, 3999.7268, 0.018891487, -438.63586,..."


In [190]:
#Turn array into dataframe
ravdess_final = pd.DataFrame(ravdess['feature'].values.tolist())

In [191]:
#Analyze new dataframe shape
ravdess_final.shape

(2452, 85)

In [192]:
# Check data sample
ravdess_final.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,75,76,77,78,79,80,81,82,83,84
0,49.068134,3996.803955,0.001959,-670.430115,65.463234,0.488222,15.117002,8.783389,1.056395,-4.240677,...,4.73979,4.150108,4.769588,3.86572,3.045947,3.879615,5.004361,2.84866,0.568589,0.303148
1,52.451279,3996.434814,0.002887,-623.405823,63.381435,-10.818053,10.145172,-5.461401,-3.885505,-6.538383,...,9.312,10.647473,10.248365,9.154744,7.05687,5.472805,5.345921,5.03735,0.457342,0.322681
2,41.690105,3998.251221,0.003376,-585.484009,66.347038,-3.750306,12.671173,4.536705,-2.493158,-14.470141,...,7.081253,8.082842,6.293633,6.731339,7.455569,8.0654,6.958014,8.449804,0.461698,0.315767
3,66.956528,3999.105469,0.002121,-663.481201,50.858315,-9.176056,11.07772,-5.359027,-5.426731,-8.111016,...,9.933121,8.101486,7.335652,6.591033,5.912118,7.441578,8.615579,9.000319,0.428384,0.350066
4,15.983829,3997.30249,0.001202,-688.773926,78.384827,7.591097,16.431046,11.740804,-0.595977,-0.485841,...,4.543953,3.562203,3.662074,3.590326,5.032635,4.081418,4.49736,3.394016,0.527983,0.299023


## Ravdess join features and target

In [193]:
#Join labels with features
ravdess_df = pd.concat([ravdess_final,labels], axis=1)

In [194]:
#Rename dataframe
ravdess_df = ravdess_df.rename(index=str, columns={"0": "label"})

In [195]:
#Analyze dataframe shape
ravdess_df.shape

(2452, 86)

In [196]:
#Anayze dataframe sample
ravdess_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,76,77,78,79,80,81,82,83,84,emotion
0,49.068134,3996.803955,0.001959,-670.430115,65.463234,0.488222,15.117002,8.783389,1.056395,-4.240677,...,4.150108,4.769588,3.86572,3.045947,3.879615,5.004361,2.84866,0.568589,0.303148,neutral
1,52.451279,3996.434814,0.002887,-623.405823,63.381435,-10.818053,10.145172,-5.461401,-3.885505,-6.538383,...,10.647473,10.248365,9.154744,7.05687,5.472805,5.345921,5.03735,0.457342,0.322681,neutral
2,41.690105,3998.251221,0.003376,-585.484009,66.347038,-3.750306,12.671173,4.536705,-2.493158,-14.470141,...,8.082842,6.293633,6.731339,7.455569,8.0654,6.958014,8.449804,0.461698,0.315767,neutral
3,66.956528,3999.105469,0.002121,-663.481201,50.858315,-9.176056,11.07772,-5.359027,-5.426731,-8.111016,...,8.101486,7.335652,6.591033,5.912118,7.441578,8.615579,9.000319,0.428384,0.350066,neutral
4,15.983829,3997.30249,0.001202,-688.773926,78.384827,7.591097,16.431046,11.740804,-0.595977,-0.485841,...,3.562203,3.662074,3.590326,5.032635,4.081418,4.49736,3.394016,0.527983,0.299023,neutral


In [197]:
#Datafram drop Nan values
ravdess_df.dropna(inplace=True)

In [198]:
from sklearn.utils import shuffle

#Shuffle dataframe
ravdess_df = shuffle(ravdess_df)
ravdess_df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,76,77,78,79,80,81,82,83,84,emotion
1460,40.755844,3998.540771,0.016281,-494.695129,21.106981,-14.390797,-8.524015,-17.840214,-15.993177,-9.437501,...,15.505001,11.635184,8.597304,6.602968,6.760071,12.745721,19.217274,0.344926,0.349399,neutral
2212,87.427071,3999.430176,0.024899,-403.938202,36.262287,-41.81337,5.657478,-27.468971,-16.871235,0.037341,...,6.164628,5.792684,4.838076,4.070064,5.289231,5.589336,6.913059,0.444162,0.319737,angry
585,38.043034,3995.927002,0.011388,-484.794891,62.422585,-11.896454,14.755815,7.94448,-13.96347,-7.956229,...,9.050076,7.524532,8.427584,8.299865,8.247096,7.541253,4.993912,0.393158,0.332756,sad
1544,10.065931,3989.753174,0.005435,-577.055359,69.674881,-14.483016,14.063886,-2.686594,-3.753006,-15.049387,...,9.239218,9.124434,6.464316,9.620693,12.556333,14.276609,17.946737,0.379075,0.341756,calm
1107,54.398445,3999.153076,0.007179,-563.696594,38.974346,-10.978131,-2.156035,-5.07476,-10.062261,-9.639794,...,7.758176,6.876032,6.71654,7.187038,5.51199,4.943897,4.870331,0.382393,0.352424,disgust
1105,48.66721,3996.908691,0.004249,-590.35199,59.348392,-10.462002,5.037456,-6.530917,-2.77228,-9.637831,...,7.297721,5.453188,6.197719,5.447929,6.717255,8.904503,10.3675,0.434874,0.348797,disgust
2329,54.838623,3999.008057,0.007795,-554.447388,45.411404,-9.499786,11.634337,-3.235129,-12.162117,-13.339204,...,11.649852,8.502145,6.304353,4.620742,6.269948,9.220433,9.998088,0.307788,0.336562,fear
1332,14.212524,3996.065186,0.002984,-653.457642,54.171574,-1.864815,6.170718,-3.516242,0.336341,-9.592955,...,13.572635,9.38687,8.303733,6.11437,6.228092,5.05965,5.195548,0.513406,0.346239,surprised
400,12.56193,3996.63916,0.016377,-477.610046,47.384655,1.183854,7.481524,-2.448664,-1.256853,-4.782849,...,8.880488,7.793181,7.097712,5.661459,5.384609,5.228882,7.260744,0.442501,0.331954,happy
90,8.673202,3986.409424,0.002173,-677.506348,72.204964,11.515165,24.783993,7.019105,10.029251,-1.520974,...,3.938437,4.481961,3.874146,7.226377,5.894233,5.903655,7.98481,0.507016,0.339959,neutral


In [199]:
#Verify that there are no null values
ravdess_df.isnull().values.any()

False

In [200]:
# Check dataframe sample
ravdess_df.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,76,77,78,79,80,81,82,83,84,emotion
1460,40.755844,3998.540771,0.016281,-494.695129,21.106981,-14.390797,-8.524015,-17.840214,-15.993177,-9.437501,...,15.505001,11.635184,8.597304,6.602968,6.760071,12.745721,19.217274,0.344926,0.349399,neutral
2212,87.427071,3999.430176,0.024899,-403.938202,36.262287,-41.81337,5.657478,-27.468971,-16.871235,0.037341,...,6.164628,5.792684,4.838076,4.070064,5.289231,5.589336,6.913059,0.444162,0.319737,angry
585,38.043034,3995.927002,0.011388,-484.794891,62.422585,-11.896454,14.755815,7.94448,-13.96347,-7.956229,...,9.050076,7.524532,8.427584,8.299865,8.247096,7.541253,4.993912,0.393158,0.332756,sad
1544,10.065931,3989.753174,0.005435,-577.055359,69.674881,-14.483016,14.063886,-2.686594,-3.753006,-15.049387,...,9.239218,9.124434,6.464316,9.620693,12.556333,14.276609,17.946737,0.379075,0.341756,calm
1107,54.398445,3999.153076,0.007179,-563.696594,38.974346,-10.978131,-2.156035,-5.07476,-10.062261,-9.639794,...,7.758176,6.876032,6.71654,7.187038,5.51199,4.943897,4.870331,0.382393,0.352424,disgust


In [201]:
#Analyz shape of dataframe
ravdess_df.shape

(2452, 86)

In [202]:
# see number of emotions
ravdess_df[ravdess_df.columns[-1]].nunique()

8

In [203]:
ravdess_df['emotion'].value_counts()

fear         376
angry        376
sad          376
calm         376
happy        376
surprised    192
disgust      192
neutral      188
Name: emotion, dtype: int64

In [204]:
ravdess_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,76,77,78,79,80,81,82,83,84,emotion
1460,40.755844,3998.540771,0.016281,-494.695129,21.106981,-14.390797,-8.524015,-17.840214,-15.993177,-9.437501,...,15.505001,11.635184,8.597304,6.602968,6.760071,12.745721,19.217274,0.344926,0.349399,neutral
2212,87.427071,3999.430176,0.024899,-403.938202,36.262287,-41.81337,5.657478,-27.468971,-16.871235,0.037341,...,6.164628,5.792684,4.838076,4.070064,5.289231,5.589336,6.913059,0.444162,0.319737,angry
585,38.043034,3995.927002,0.011388,-484.794891,62.422585,-11.896454,14.755815,7.94448,-13.96347,-7.956229,...,9.050076,7.524532,8.427584,8.299865,8.247096,7.541253,4.993912,0.393158,0.332756,sad
1544,10.065931,3989.753174,0.005435,-577.055359,69.674881,-14.483016,14.063886,-2.686594,-3.753006,-15.049387,...,9.239218,9.124434,6.464316,9.620693,12.556333,14.276609,17.946737,0.379075,0.341756,calm
1107,54.398445,3999.153076,0.007179,-563.696594,38.974346,-10.978131,-2.156035,-5.07476,-10.062261,-9.639794,...,7.758176,6.876032,6.71654,7.187038,5.51199,4.943897,4.870331,0.382393,0.352424,disgust


In [205]:
#Move dataframe into separate file
ravdess_df.to_csv('data/diff_ravdess_data.csv')

In [206]:
rawdata_list = os.listdir('data/RawData/tess')

In [207]:
rawdata_list

['OAF_back_angry.wav',
 'OAF_back_disgust.wav',
 'OAF_back_fear.wav',
 'OAF_back_happy.wav',
 'OAF_back_neutral.wav',
 'OAF_back_ps.wav',
 'OAF_back_sad.wav',
 'OAF_bar_angry.wav',
 'OAF_bar_disgust.wav',
 'OAF_bar_fear.wav',
 'OAF_bar_happy.wav',
 'OAF_bar_neutral.wav',
 'OAF_bar_ps.wav',
 'OAF_bar_sad.wav',
 'OAF_base_angry.wav',
 'OAF_base_disgust.wav',
 'OAF_base_fear.wav',
 'OAF_base_happy.wav',
 'OAF_base_neutral.wav',
 'OAF_base_ps.wav',
 'OAF_base_sad.wav',
 'OAF_bath_angry.wav',
 'OAF_bath_disgust.wav',
 'OAF_bath_fear.wav',
 'OAF_bath_happy.wav',
 'OAF_bath_neutral.wav',
 'OAF_bath_ps.wav',
 'OAF_bath_sad.wav',
 'OAF_bean_angry.wav',
 'OAF_bean_disgust.wav',
 'OAF_bean_fear.wav',
 'OAF_bean_happy.wav',
 'OAF_bean_neutral.wav',
 'OAF_bean_ps.wav',
 'OAF_bean_sad.wav',
 'OAF_beg_angry.wav',
 'OAF_beg_disgust.wav',
 'OAF_beg_fear.wav',
 'OAF_beg_happy.wav',
 'OAF_beg_neutral.wav',
 'OAF_beg_ps.wav',
 'OAF_beg_sad.wav',
 'OAF_bite_angry.wav',
 'OAF_bite_disgust.wav',
 'OAF_bite_f

## Read TESS Dataset

In [208]:
parent = os.getcwd()

In [209]:
parent

'C:\\Users\\arsal\\Documents\\FINAL YEAR PROJECTS\\SpeechModel'

In [140]:
# Build list of audio files
raw_data_tess_path = r"data\RawData\tess\\"

folder_list_tess = os.listdir(raw_data_tess_path)
folder_list_tess

['OAF_back_angry.wav',
 'OAF_back_disgust.wav',
 'OAF_back_fear.wav',
 'OAF_back_happy.wav',
 'OAF_back_neutral.wav',
 'OAF_back_ps.wav',
 'OAF_back_sad.wav',
 'OAF_bar_angry.wav',
 'OAF_bar_disgust.wav',
 'OAF_bar_fear.wav',
 'OAF_bar_happy.wav',
 'OAF_bar_neutral.wav',
 'OAF_bar_ps.wav',
 'OAF_bar_sad.wav',
 'OAF_base_angry.wav',
 'OAF_base_disgust.wav',
 'OAF_base_fear.wav',
 'OAF_base_happy.wav',
 'OAF_base_neutral.wav',
 'OAF_base_ps.wav',
 'OAF_base_sad.wav',
 'OAF_bath_angry.wav',
 'OAF_bath_disgust.wav',
 'OAF_bath_fear.wav',
 'OAF_bath_happy.wav',
 'OAF_bath_neutral.wav',
 'OAF_bath_ps.wav',
 'OAF_bath_sad.wav',
 'OAF_bean_angry.wav',
 'OAF_bean_disgust.wav',
 'OAF_bean_fear.wav',
 'OAF_bean_happy.wav',
 'OAF_bean_neutral.wav',
 'OAF_bean_ps.wav',
 'OAF_bean_sad.wav',
 'OAF_beg_angry.wav',
 'OAF_beg_disgust.wav',
 'OAF_beg_fear.wav',
 'OAF_beg_happy.wav',
 'OAF_beg_neutral.wav',
 'OAF_beg_ps.wav',
 'OAF_beg_sad.wav',
 'OAF_bite_angry.wav',
 'OAF_bite_disgust.wav',
 'OAF_bite_f

In [141]:
tess_list = []

for folder in folder_list_tess:
    folder_path = raw_data_tess_path
os.chdir(folder_path)
for file in glob.glob("*.wav"):
    tess_list.append(folder_path+file)
os.chdir(parent)
#Check results
tess_list[:10]

['data\\RawData\\tess\\\\OAF_back_angry.wav',
 'data\\RawData\\tess\\\\OAF_back_disgust.wav',
 'data\\RawData\\tess\\\\OAF_back_fear.wav',
 'data\\RawData\\tess\\\\OAF_back_happy.wav',
 'data\\RawData\\tess\\\\OAF_back_neutral.wav',
 'data\\RawData\\tess\\\\OAF_back_ps.wav',
 'data\\RawData\\tess\\\\OAF_back_sad.wav',
 'data\\RawData\\tess\\\\OAF_bar_angry.wav',
 'data\\RawData\\tess\\\\OAF_bar_disgust.wav',
 'data\\RawData\\tess\\\\OAF_bar_fear.wav']

### TESS Get emotion features

In [142]:
#Build list of emotions for Tess
feeling_list_tess = []

#'angry', 'disgust', 'fear', 'happy', 'neutral', 'sad' and 'surprised' emotion classes respectively. 
#E.g., 'd03.wav' is the 3rd disgust sentence.  

emotion_dic = {"angry":'angry', 
               "disgust":'disgust', 
               "fear":'fear', 
               "happy":'happy', 
               "neutral":'neutral', 
               "sad":'sad', 
               "ps":'surprised'}

for file_path in tess_list:
    file = file_path.split('\\')[-1]
    file_name = file.split('.')[0]
    emotion = file_name.split('_')[-1]
    feeling_list_tess.append(emotion_dic[emotion])

In [143]:
feeling_list_tess

['angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'surprised',
 'sad',
 'angry',
 'disgust',
 'f

In [144]:
#Build dataframe from array
labels_tess = pd.DataFrame(feeling_list_tess)

In [145]:
#Check results
labels_tess.head()

Unnamed: 0,0
0,angry
1,disgust
2,fear
3,happy
4,neutral


In [146]:
#Rename column to emotion
labels_tess = labels_tess.rename({0: 'emotion'}, axis=1)

In [147]:
#Check shape
labels_tess.shape

(2800, 1)

In [148]:
#Check results
labels_tess.head()

Unnamed: 0,emotion
0,angry
1,disgust
2,fear
3,happy
4,neutral


In [149]:
#Check emotion size
labels_tess_total = pd.DataFrame(labels_tess.groupby(['emotion']).size())
labels_tess_total

Unnamed: 0_level_0,0
emotion,Unnamed: 1_level_1
angry,400
disgust,400
fear,400
happy,400
neutral,400
sad,400
surprised,400


### TESS Get audio features

In [151]:
rawdata_tess = pd.DataFrame(columns=['feature'])
bookmark=0

for y in tess_list:
    #Get audio features
    X, sample_rate = librosa.load(y, 
                                  res_type = res_type_s,
                                  duration = duration_s,
                                  sr = sample_rate_s,
                                  offset=offset_s)
    
    features = []
    
    # Pitch and Pitch Range
    pitches, magnitudes = librosa.piptrack(y=X, sr=sample_rate)
    pitch_mean = np.mean(pitches)
    pitch_range = np.max(pitches) - np.min(pitches)
    features.append(pitch_mean)
    features.append(pitch_range)

    # Intensity
    intensity = np.mean(np.abs(X))
    features.append(intensity)

    # MFCCs (Mel-frequency cepstral coefficients)
    mfcc = librosa.feature.mfcc(y=X, sr = mfcc_sample_rate, n_mfcc=n_mfcc)
    
    for i in mfcc:
        features.append(np.mean(i))
    for j in mfcc:
        features.append(np.std(j))

    # Chroma features
    chroma = librosa.feature.chroma_stft(y=X, sr=sample_rate)
    features.append(np.mean(chroma))
    features.append(np.std(chroma))
    
    rawdata_tess.loc[bookmark] = [features]
    bookmark=bookmark+1

In [152]:
#Verify Tess features shape
rawdata_tess.shape

(2800, 1)

In [153]:
#Check that there are no nan values
rawdata_tess.isnull().values.any()

False

In [154]:
#Get sample data
rawdata_tess.head()

Unnamed: 0,feature
0,"[10.062884, 2700.9187, 0.03264154, -396.60403,..."
1,"[15.867102, 2612.6333, 0.009034442, -491.92722..."
2,"[5.3227735, 2354.3735, 0.022158107, -447.49503..."
3,"[10.871197, 2816.5632, 0.011239446, -494.20566..."
4,"[5.2253914, 2614.5654, 0.0080889175, -532.5486..."


In [155]:
#Build list
rawdata_tess_final = pd.DataFrame(rawdata_tess['feature'].values.tolist())

In [156]:
#Check dataframe
rawdata_tess_final

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,75,76,77,78,79,80,81,82,83,84
0,10.062884,2700.918701,0.032642,-396.604034,78.255562,-19.569338,-21.489851,-2.525299,10.445654,-21.044718,...,7.858108,11.119743,10.812220,10.045583,8.272487,8.852215,5.614707,10.299593,0.301287,0.330004
1,15.867102,2612.633301,0.009034,-491.927216,120.232536,9.670667,-7.059868,16.235134,3.580734,-5.178472,...,9.534755,8.984271,6.148209,11.074634,8.206832,8.656436,10.747535,14.910752,0.320630,0.339395
2,5.322773,2354.373535,0.022158,-447.495026,91.554199,-0.390231,-17.291504,2.782623,4.717841,-13.548041,...,6.780420,6.550632,6.684436,5.101778,9.185796,7.942301,9.709521,8.008375,0.286010,0.337941
3,10.871197,2816.563232,0.011239,-494.205658,108.418320,12.990735,-13.772038,8.819846,12.204424,-9.161975,...,19.253401,15.252623,16.195330,11.147857,12.416432,11.534579,14.863925,13.977903,0.296864,0.330037
4,5.225391,2614.565430,0.008089,-532.548645,108.744789,13.694969,-4.846498,16.790934,13.777178,-7.775505,...,7.906692,9.549625,8.805034,14.976290,20.167164,19.595228,17.330147,15.090613,0.264286,0.344044
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2795,8.618461,3987.424561,0.037362,-348.233398,35.602421,-4.365129,15.534871,6.135697,0.063725,-4.578150,...,10.840261,10.172109,13.005855,9.719935,6.564490,7.937893,13.299180,13.587028,0.284101,0.343324
2796,14.121445,3996.757324,0.052072,-346.212616,37.196987,-12.763891,36.840496,-3.720520,-0.910189,-11.683501,...,20.022799,17.421070,12.904879,10.620160,11.383688,10.656541,8.089457,7.006220,0.289706,0.359303
2797,6.569857,3975.404541,0.019707,-420.886383,80.779999,5.512202,33.144695,1.982193,-3.874799,0.712508,...,13.048112,18.396620,21.589054,15.731396,10.418027,9.548436,8.023066,8.207495,0.289175,0.343892
2798,6.375298,3986.336914,0.021445,-373.980774,61.241165,-4.412523,14.750440,-15.815634,3.444529,-7.860944,...,15.642973,15.663296,13.617710,11.625752,10.183398,9.303178,6.482743,6.609823,0.308730,0.351475


## Join TESS features with targets

In [157]:
#Concat both feature table and target table
newdf_tess = pd.concat([rawdata_tess_final,labels_tess], axis=1)
newdf_tess

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,76,77,78,79,80,81,82,83,84,emotion
0,10.062884,2700.918701,0.032642,-396.604034,78.255562,-19.569338,-21.489851,-2.525299,10.445654,-21.044718,...,11.119743,10.812220,10.045583,8.272487,8.852215,5.614707,10.299593,0.301287,0.330004,angry
1,15.867102,2612.633301,0.009034,-491.927216,120.232536,9.670667,-7.059868,16.235134,3.580734,-5.178472,...,8.984271,6.148209,11.074634,8.206832,8.656436,10.747535,14.910752,0.320630,0.339395,disgust
2,5.322773,2354.373535,0.022158,-447.495026,91.554199,-0.390231,-17.291504,2.782623,4.717841,-13.548041,...,6.550632,6.684436,5.101778,9.185796,7.942301,9.709521,8.008375,0.286010,0.337941,fear
3,10.871197,2816.563232,0.011239,-494.205658,108.418320,12.990735,-13.772038,8.819846,12.204424,-9.161975,...,15.252623,16.195330,11.147857,12.416432,11.534579,14.863925,13.977903,0.296864,0.330037,happy
4,5.225391,2614.565430,0.008089,-532.548645,108.744789,13.694969,-4.846498,16.790934,13.777178,-7.775505,...,9.549625,8.805034,14.976290,20.167164,19.595228,17.330147,15.090613,0.264286,0.344044,neutral
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2795,8.618461,3987.424561,0.037362,-348.233398,35.602421,-4.365129,15.534871,6.135697,0.063725,-4.578150,...,10.172109,13.005855,9.719935,6.564490,7.937893,13.299180,13.587028,0.284101,0.343324,fear
2796,14.121445,3996.757324,0.052072,-346.212616,37.196987,-12.763891,36.840496,-3.720520,-0.910189,-11.683501,...,17.421070,12.904879,10.620160,11.383688,10.656541,8.089457,7.006220,0.289706,0.359303,happy
2797,6.569857,3975.404541,0.019707,-420.886383,80.779999,5.512202,33.144695,1.982193,-3.874799,0.712508,...,18.396620,21.589054,15.731396,10.418027,9.548436,8.023066,8.207495,0.289175,0.343892,neutral
2798,6.375298,3986.336914,0.021445,-373.980774,61.241165,-4.412523,14.750440,-15.815634,3.444529,-7.860944,...,15.663296,13.617710,11.625752,10.183398,9.303178,6.482743,6.609823,0.308730,0.351475,surprised


In [158]:
newdf_tess = newdf_tess.rename(index=str, columns={"0": "label"})

In [159]:
#Verify table shape
newdf_tess.shape

(2800, 86)

In [160]:
#Get dataframe sample data
newdf_tess.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,76,77,78,79,80,81,82,83,84,emotion
0,10.062884,2700.918701,0.032642,-396.604034,78.255562,-19.569338,-21.489851,-2.525299,10.445654,-21.044718,...,11.119743,10.81222,10.045583,8.272487,8.852215,5.614707,10.299593,0.301287,0.330004,angry
1,15.867102,2612.633301,0.009034,-491.927216,120.232536,9.670667,-7.059868,16.235134,3.580734,-5.178472,...,8.984271,6.148209,11.074634,8.206832,8.656436,10.747535,14.910752,0.32063,0.339395,disgust
2,5.322773,2354.373535,0.022158,-447.495026,91.554199,-0.390231,-17.291504,2.782623,4.717841,-13.548041,...,6.550632,6.684436,5.101778,9.185796,7.942301,9.709521,8.008375,0.28601,0.337941,fear
3,10.871197,2816.563232,0.011239,-494.205658,108.41832,12.990735,-13.772038,8.819846,12.204424,-9.161975,...,15.252623,16.19533,11.147857,12.416432,11.534579,14.863925,13.977903,0.296864,0.330037,happy
4,5.225391,2614.56543,0.008089,-532.548645,108.744789,13.694969,-4.846498,16.790934,13.777178,-7.775505,...,9.549625,8.805034,14.97629,20.167164,19.595228,17.330147,15.090613,0.264286,0.344044,neutral


In [161]:
#Drop nan values
newdf_tess.dropna(inplace=True)
newdf_tess.shape

(2800, 86)

In [162]:
#Shuffle rows
newdf_tess = shuffle(newdf_tess)
newdf_tess.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,76,77,78,79,80,81,82,83,84,emotion
2138,4.389536,3944.551758,0.057123,-339.634766,64.181877,-17.939482,26.457355,7.6764,-3.768758,-12.599228,...,16.362474,12.414372,12.234034,9.946301,12.549728,11.617042,6.878532,0.173467,0.309926,happy
1015,18.39558,3999.380371,0.029142,-368.873657,65.829399,-7.490242,-10.531256,-20.769943,4.158392,-25.107798,...,11.930583,13.701681,12.388184,11.702263,10.962331,8.676499,10.368308,0.280505,0.330424,angry
240,2.469429,3732.161621,0.03188,-414.249725,83.013596,7.881763,-20.455198,-10.560904,3.968034,-5.785289,...,8.387837,9.964905,5.838369,11.433331,5.774076,5.679412,10.811961,0.220907,0.325948,fear
1650,10.022429,3988.878662,0.022184,-371.3237,99.342133,7.054325,-17.336226,-40.062748,1.568912,-3.301582,...,14.158523,17.398338,16.263157,14.735069,12.050506,10.845682,9.008085,0.316777,0.323382,surprised
1995,10.302295,3993.914551,0.033623,-335.81546,61.750175,-5.037285,21.75811,-29.805979,-2.06541,-3.750561,...,17.960112,18.853142,14.116278,13.875124,10.729192,12.391649,16.265953,0.25079,0.317289,angry
1662,19.147631,3990.072998,0.06901,-277.307434,62.181252,-27.49947,14.996569,-16.18082,-12.212072,-6.022898,...,17.337158,15.690388,9.900699,8.949929,12.345491,9.6786,8.058136,0.23996,0.320515,happy
2216,15.065917,3999.382568,0.014478,-443.493652,74.1036,2.150172,42.248398,3.153545,-5.967714,-2.773653,...,8.453822,8.4259,10.978436,16.255165,19.488625,16.623407,9.204816,0.284304,0.333647,neutral
2249,42.377419,3998.285645,0.028614,-345.497284,35.544926,-12.675355,2.405415,-9.869371,11.07544,-7.526254,...,8.94276,6.692133,5.412963,8.068698,9.122081,6.837036,12.077929,0.383079,0.340098,fear
607,4.009905,2825.45874,0.015029,-459.788635,99.622353,14.145724,-3.657416,5.03071,0.930784,-2.375072,...,17.29203,17.93668,13.578833,15.826643,18.259428,19.182848,16.935219,0.330448,0.357068,surprised
50,3.564288,3925.186768,0.016402,-488.093323,110.24662,42.144588,6.774078,2.373889,2.147609,-4.454261,...,12.209483,11.224138,12.176056,10.224042,14.58921,13.619342,17.350914,0.272247,0.328873,disgust


In [163]:
#Verify there are no nan values
newdf_tess.isnull().values.any()

False

In [210]:
#Check shape
newdf_tess.shape

(2800, 86)

In [211]:
# See number of emotions
newdf_tess[newdf_tess.columns[-1]].nunique()

7

In [212]:
newdf_tess['emotion'].value_counts()

surprised    400
disgust      400
neutral      400
fear         400
angry        400
sad          400
happy        400
Name: emotion, dtype: int64

In [213]:
#Move dataframe into separate file
newdf_tess.to_csv('data/diff_tess_data.csv')

# Join RAVDESS + TESS dataframes

In [214]:
ravdess_df.columns

Index([        0,         1,         2,         3,         4,         5,
               6,         7,         8,         9,        10,        11,
              12,        13,        14,        15,        16,        17,
              18,        19,        20,        21,        22,        23,
              24,        25,        26,        27,        28,        29,
              30,        31,        32,        33,        34,        35,
              36,        37,        38,        39,        40,        41,
              42,        43,        44,        45,        46,        47,
              48,        49,        50,        51,        52,        53,
              54,        55,        56,        57,        58,        59,
              60,        61,        62,        63,        64,        65,
              66,        67,        68,        69,        70,        71,
              72,        73,        74,        75,        76,        77,
              78,        79,        80,        81, 

In [215]:
newdf_tess.columns

Index([        0,         1,         2,         3,         4,         5,
               6,         7,         8,         9,        10,        11,
              12,        13,        14,        15,        16,        17,
              18,        19,        20,        21,        22,        23,
              24,        25,        26,        27,        28,        29,
              30,        31,        32,        33,        34,        35,
              36,        37,        38,        39,        40,        41,
              42,        43,        44,        45,        46,        47,
              48,        49,        50,        51,        52,        53,
              54,        55,        56,        57,        58,        59,
              60,        61,        62,        63,        64,        65,
              66,        67,        68,        69,        70,        71,
              72,        73,        74,        75,        76,        77,
              78,        79,        80,        81, 

In [216]:
frames = [ravdess_df,newdf_tess]

final_data = pd.concat(frames, ignore_index=True)
final_data.shape

(5252, 86)

In [217]:
#Check new and final dataframe
final_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,76,77,78,79,80,81,82,83,84,emotion
0,40.755844,3998.540771,0.016281,-494.695129,21.106981,-14.390797,-8.524015,-17.840214,-15.993177,-9.437501,...,15.505001,11.635184,8.597304,6.602968,6.760071,12.745721,19.217274,0.344926,0.349399,neutral
1,87.427071,3999.430176,0.024899,-403.938202,36.262287,-41.813370,5.657478,-27.468971,-16.871235,0.037341,...,6.164628,5.792684,4.838076,4.070064,5.289231,5.589336,6.913059,0.444162,0.319737,angry
2,38.043034,3995.927002,0.011388,-484.794891,62.422585,-11.896454,14.755815,7.944480,-13.963470,-7.956229,...,9.050076,7.524532,8.427584,8.299865,8.247096,7.541253,4.993912,0.393158,0.332756,sad
3,10.065931,3989.753174,0.005435,-577.055359,69.674881,-14.483016,14.063886,-2.686594,-3.753006,-15.049387,...,9.239218,9.124434,6.464316,9.620693,12.556333,14.276609,17.946737,0.379075,0.341756,calm
4,54.398445,3999.153076,0.007179,-563.696594,38.974346,-10.978131,-2.156035,-5.074760,-10.062261,-9.639794,...,7.758176,6.876032,6.716540,7.187038,5.511990,4.943897,4.870331,0.382393,0.352424,disgust
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5247,3.279994,3886.642578,0.008712,-523.227356,98.282227,24.993261,1.426068,4.992365,5.870661,-4.103897,...,10.838665,9.931822,13.493686,18.375092,18.494427,19.303026,15.102575,0.249018,0.337536,neutral
5248,6.157344,3993.888916,0.028506,-374.418457,72.787247,11.771193,19.231989,2.808780,1.326450,-1.996125,...,7.711277,7.392266,11.471171,15.474387,19.060448,18.629265,18.505571,0.270754,0.318052,neutral
5249,2.153737,3542.910645,0.010828,-506.534454,110.276695,32.196453,39.890236,11.506146,0.194724,-2.213565,...,8.423018,9.692572,10.315808,11.501614,9.799644,7.944196,5.697847,0.242058,0.335333,neutral
5250,2.775037,1703.064819,0.015334,-483.341156,119.828064,30.630983,-8.823859,4.329844,-0.235005,-2.335994,...,16.352425,13.966016,11.487831,13.101591,15.659996,16.100912,12.642600,0.265564,0.333570,happy


In [218]:
#Move dataframe into separate file
final_data.to_csv('data/diff_final_data.csv')

# Thats All for Data Preperation