In [2]:
import pandas as pd, numpy as np
import librosa, os

## Create initial data frame
**store filenames, filepaths, emotion code as label**

In [3]:
#create full filepaths
d = "..\\SER_IGP_Project\\RAVDESS\\audio_speech_actors_01-24"
filepaths = []
for path, subdirs, files in os.walk(d):
    for name in files:
        filepaths.append(os.path.join(path, name))
        
#filenames and labels
df=pd.DataFrame()
df['path'] = [x for x in filepaths]
df['file'] = [x[-24:] for x in filepaths] #populate filenames
df['label'] = [x.split('-')[2] for x in df['file']] #read emotion code and set as label
df=df.loc[(df["label"]=='01')|(df["label"]=='03')|(df["label"]=='04')].reset_index(drop=True) #filter neutral, happy, sad

## Raw MFCC data
**store raw MFCC data    
output to csv**

In [6]:
#extract raw mfcc data
df1 = pd.DataFrame()
for x in df['path']:
    y, sr = librosa.load(x) #set file and  sample rate
    mfcc = librosa.feature.mfcc(y=y, sr=sr) #extract mfcc
    mfcc = mfcc.reshape(1,-1) #flatten vector array
    mfcc_df = pd.DataFrame(mfcc) 
    df1 = pd.concat([df1,mfcc_df],ignore_index=True) #add to df
    
#auto-increment column names
df1.set_axis(['mfcc_'+ str((x)+1) for x in range(len(df1.columns))],axis=1,inplace=True)

#concatenate label + all vectors to one df
vectordf = pd.concat([df['label'], df1], axis=1)

#replace NaN with 0
vectordf = vectordf.fillna(0)

In [10]:
#write to CSV
vectordf.to_csv('raw_mfcc.csv',index=False)

In [7]:
vectordf

Unnamed: 0,label,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,...,mfcc_4091,mfcc_4092,mfcc_4093,mfcc_4094,mfcc_4095,mfcc_4096,mfcc_4097,mfcc_4098,mfcc_4099,mfcc_4100
0,01,-857.309448,-857.309448,-857.309448,-857.309448,-857.309448,-857.309448,-857.309448,-857.165649,-857.309448,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,01,-864.890259,-864.890259,-864.890259,-864.890259,-861.939392,-860.492126,-863.877686,-862.146851,-860.825317,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,01,-849.792480,-847.553406,-844.977417,-848.275757,-847.952209,-847.026367,-849.022888,-848.187866,-851.179871,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,01,-838.594116,-837.025696,-834.580566,-827.005920,-830.058838,-831.195984,-839.331665,-838.307556,-834.128906,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,03,-830.168213,-830.081482,-829.513245,-830.168213,-830.168213,-830.168213,-830.168213,-830.168213,-830.168213,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
475,04,-869.806824,-869.806824,-869.806824,-869.806824,-869.806824,-869.806824,-869.806824,-869.806824,-869.806824,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
476,04,-714.589478,-714.589478,-714.589478,-714.589478,-714.589478,-714.589478,-714.589478,-711.529785,-705.080750,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
477,04,-748.102051,-748.102051,-748.102051,-748.102051,-748.102051,-748.102051,-748.102051,-748.102051,-748.102051,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
478,04,-789.986755,-789.986755,-789.986755,-789.986755,-789.986755,-789.589172,-784.815491,-778.694397,-775.755493,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Mean MFCC data
**store mean MFCC data    
output to csv**

In [8]:
#extract mean stft data
df2 = pd.DataFrame()
for x in df['path']:
    y, sr = librosa.load(x) #set file and  sample rate
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr).T, axis=0)
    mfcc = mfcc.reshape(1,-1) #flatten vector array
    mfcc_df = pd.DataFrame(mfcc) 
    df2 = pd.concat([df2,mfcc_df],ignore_index=True) #add to df
    
#auto-increment column names
df2.set_axis(['mfcc_'+ str((x)+1) for x in range(len(df2.columns))],axis=1,inplace=True)

#concatenate label + all vectors to one df
vectordf = pd.concat([df['label'], df2], axis=1)

#replace NaN with 0
vectordf = vectordf.fillna(0)

In [11]:
#write to CSV
vectordf.to_csv('mean_mfcc.csv',index=False)

In [9]:
vectordf

Unnamed: 0,label,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,...,mfcc_11,mfcc_12,mfcc_13,mfcc_14,mfcc_15,mfcc_16,mfcc_17,mfcc_18,mfcc_19,mfcc_20
0,01,-697.984192,55.228489,0.323863,12.775377,7.396148,0.866224,-3.550276,-2.828331,-11.305533,...,0.496104,-2.722926,1.645800,-3.431964,0.140357,1.073160,-4.355672,-1.583771,-1.593009,-0.659361
1,01,-693.069702,55.734566,-1.919739,16.408995,8.449355,0.220736,-1.738342,-4.931291,-11.981820,...,0.494107,-2.296898,0.678700,-2.939321,-0.450768,0.734229,-4.824006,-0.677381,-2.233056,0.827009
2,01,-691.770508,58.350018,-0.166346,13.950068,5.050162,1.484918,-2.403114,-5.065946,-10.645894,...,0.255538,-2.538125,1.539981,-3.140177,-2.291840,1.174088,-5.226076,-2.830360,-0.170291,0.329760
3,01,-685.284668,56.200539,2.462238,13.572750,6.669916,3.299325,-1.901864,-6.648471,-10.658185,...,0.874369,-1.201197,0.259234,-2.465910,-1.450006,2.123519,-4.646614,-1.757404,-0.356890,-0.364099
4,03,-646.131653,63.434139,-4.706915,12.905929,3.924454,-2.587290,-5.984715,-6.313463,-11.839384,...,-1.590220,-6.508152,0.893351,-4.388482,-2.692124,2.438276,-8.272143,-2.603855,-2.699693,-2.328228
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
475,04,-691.838379,51.025204,1.528606,7.234895,-7.146982,-2.796479,-10.606523,-6.303048,-8.571804,...,-5.746953,-3.333542,-4.633162,-3.792846,-0.687691,-5.407513,-3.394704,-2.586328,-5.807498,-4.718655
476,04,-566.070190,38.573711,-21.961164,0.774209,-11.924103,-16.088345,-13.564362,-18.070082,-14.756430,...,-10.749922,-5.929782,3.348024,-7.870169,-0.924233,-2.566499,-5.875726,4.050747,2.000071,4.271482
477,04,-578.961243,50.822678,-12.166848,3.364594,-10.721197,-13.571438,-5.704255,-13.109696,-10.658675,...,-8.932396,-1.846142,0.661773,-6.163666,2.208789,-4.292562,-2.556764,1.581657,-2.721096,2.370033
478,04,-576.196533,51.361618,-5.730363,3.922425,-6.612978,-10.168428,-17.951244,-18.211090,-15.969697,...,-16.820774,-4.536894,-0.192583,-8.280168,-1.331849,-7.257451,-6.240160,0.133657,-5.337532,-0.754052
