In [1]:
import warnings
warnings.filterwarnings('ignore')

import IPython.display as ipd
import librosa
import librosa.display
import pandas as pd
import numpy as np
import os

In [None]:
from google.colab import drive
drive.mount('/content/drive')

dataset = '/content/drive/MyDrive/Dataset/audio_speech_actors_01-24/'

In [None]:
actor_folders = os.listdir(dataset)
actor_folders.sort() 
actor_folders

In [17]:
emotions = []
gender = []
actor_ID = []
file_path = []
for i in actor_folders:
    filename = os.listdir(dataset + i) #iterate over Actor folders
    for f in filename: # go through files in the corresponding Actor's folder
        part = f.split('.')[0].split('-')#We split the file name initially as['03-01-01-01-01-01-01','wav'];Now,the 0th element is converted as ['03','01','01','01','01','01','01']
        emotions.append(int(part[2])) #Since,the second element represents the emotion of the actor.
        actor_ID.append(int(part[6])) #Since,the sixth element represents the ID of the actor.
        bg = int(part[6])
        if bg%2 == 0:
            bg = "female" #Since,even IDs correspond to females.
        else:
            bg = "male"  #Since,odd IDs correspond to females.
        gender.append(bg)
        file_path.append(dataset + i + '/' + f)

#Creating a dataframe:
audio_df = pd.DataFrame(emotions)
audio_df = audio_df.replace({1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'})
audio_df = pd.concat([pd.DataFrame(gender),audio_df,pd.DataFrame(actor_ID)],axis=1)
audio_df.columns = ['Gender','Emotion','Actor_ID']
audio_df = pd.concat([audio_df,pd.DataFrame(file_path, columns = ['Path'])],axis=1)
audio_df

Unnamed: 0,Gender,Emotion,Actor_ID,Path
0,male,angry,1,/content/drive/MyDrive/Dataset/audio_speech_ac...
1,male,calm,1,/content/drive/MyDrive/Dataset/audio_speech_ac...
2,male,sad,1,/content/drive/MyDrive/Dataset/audio_speech_ac...
3,male,fear,1,/content/drive/MyDrive/Dataset/audio_speech_ac...
4,male,sad,1,/content/drive/MyDrive/Dataset/audio_speech_ac...
...,...,...,...,...
1435,female,surprise,24,/content/drive/MyDrive/Dataset/audio_speech_ac...
1436,female,surprise,24,/content/drive/MyDrive/Dataset/audio_speech_ac...
1437,female,disgust,24,/content/drive/MyDrive/Dataset/audio_speech_ac...
1438,female,disgust,24,/content/drive/MyDrive/Dataset/audio_speech_ac...


In [35]:
def extract_features(data):
    # ZCR
    result = np.array([])
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
    result=np.hstack((result, zcr)) # stacking horizontally

    # Chroma_stft
    stft = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma_stft)) # stacking horizontally

    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mfcc)) # stacking horizontally

    # MelSpectogram
    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mel)) # stacking horizontally
    
    return result

def get_features(path):
    # duration and offset are used to take care of the no audio in start and the ending of each audio files as seen above.
    data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)
    
    # without augmentation
    res1 = extract_features(data)
    result = np.array(res1)
    
    return result

In [37]:
path = np.array(audio_df.Path)[1]
data, sample_rate = librosa.load(path)

X, Y = [], []
for path, emotion in zip(audio_df.Path, audio_df.Emotion):
    feature = get_features(path)
    # for ele in feature:
    #     X.append(ele)
    #     Y.append(emotion)
    X.append(feature)
    Y.append(emotion)

In [38]:
X

[array([ 2.72266529e-01,  6.89450800e-01,  7.08028257e-01,  6.66472554e-01,
         7.15468287e-01,  6.94820106e-01,  6.27660632e-01,  6.32560372e-01,
         6.87715113e-01,  7.12156713e-01,  7.06115842e-01,  6.96561396e-01,
         6.66424036e-01, -5.05009247e+02,  6.40000992e+01, -2.74965978e+00,
         1.69503708e+01, -1.08946717e+00, -2.04643250e+00, -7.82998085e+00,
        -8.71675110e+00, -1.92733173e+01, -5.29409122e+00, -5.58445549e+00,
        -5.78362846e+00, -1.87099063e+00, -7.14663792e+00, -3.67526293e+00,
        -4.51763481e-01, -1.12534103e+01, -3.52127671e+00, -3.48284173e+00,
        -5.80235529e+00,  3.61985149e-05,  1.72553962e-04,  1.30933290e-03,
         1.80951525e-02,  1.34510517e-01,  2.29614004e-01,  1.27857670e-01,
         1.72488794e-01,  1.52339548e-01,  1.39209196e-01,  1.38478443e-01,
         1.31716475e-01,  9.31021050e-02,  9.75392684e-02,  2.89877594e-01,
         4.89141107e-01,  6.45108044e-01,  3.02384138e-01,  1.99851289e-01,
         1.3

In [39]:
Features = pd.DataFrame(X)
Features['labels'] = Y
Features.to_csv('features.csv', index=False)
Features.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,labels
0,0.272267,0.689451,0.708028,0.666473,0.715468,0.69482,0.627661,0.63256,0.687715,0.712157,0.706116,0.696561,0.666424,-505.009247,64.000099,-2.74966,16.950371,-1.089467,-2.046432,-7.829981,-8.716751,-19.273317,-5.294091,-5.584455,-5.783628,-1.870991,-7.146638,-3.675263,-0.451763,-11.25341,-3.521277,-3.482842,-5.802355,3.6e-05,0.000173,0.001309,0.018095,0.134511,0.229614,0.127858,...,0.000877,0.001587,0.00146,0.00196,0.001981,0.002306,0.001958,0.002031,0.001465,0.001945,0.003857,0.003231,0.002353,0.003053,0.002795,0.002131,0.001839,0.001425,0.000817,0.000714,0.000658,0.001076,0.001073,0.001062,0.000639,0.000917,0.001054,0.001587,0.001744,0.001006,0.000687,0.000502,0.000372,0.000197,0.000137,0.000288,0.000349,0.000143,1.498768e-05,angry
1,0.262035,0.603476,0.668302,0.692199,0.709884,0.658301,0.605176,0.609343,0.640842,0.689348,0.702884,0.687124,0.663653,-626.262817,93.897247,-0.691273,17.833763,9.502007,2.030928,-2.721135,-8.514406,-12.427499,-6.575863,-0.015912,-2.750585,0.777975,-5.365466,-0.337154,1.482861,-8.703282,-2.764846,-1.618086,-1.523441,6e-06,0.000112,0.008725,0.090577,0.060794,0.002684,0.004591,...,4.7e-05,3.8e-05,5.5e-05,9.8e-05,0.000262,0.000406,0.000398,0.000671,0.00065,0.000318,0.000125,0.000135,0.000105,9.3e-05,0.000109,0.000126,0.000105,5.3e-05,5.1e-05,3.4e-05,2.7e-05,2.1e-05,1.8e-05,4.5e-05,4.4e-05,2e-05,1e-05,4e-06,4e-06,4e-06,9e-06,1.2e-05,2.8e-05,3.4e-05,3.6e-05,3.5e-05,3.2e-05,1.1e-05,8.432723e-07,calm
2,0.195466,0.628032,0.687169,0.651985,0.621273,0.604192,0.640623,0.626136,0.65243,0.685134,0.653014,0.649654,0.6324,-535.881226,82.281357,-9.010551,20.842283,5.421832,-3.754339,-10.541499,-13.465772,-27.917681,-6.894572,-3.809465,-10.429282,0.157545,-7.953777,-6.011678,2.456674,-10.448029,-6.485257,-4.68783,-3.553447,2e-06,2.4e-05,0.00049,0.016251,0.11055,0.186236,0.199403,...,9.9e-05,0.000113,0.000261,0.000204,0.000287,0.00027,0.000436,0.000757,0.000782,0.000723,0.000844,0.000456,0.000389,0.000483,0.000514,0.000573,0.000368,0.000192,0.00017,0.000132,8.4e-05,5.5e-05,5.7e-05,7.6e-05,6.2e-05,6.1e-05,5.1e-05,4.6e-05,5.2e-05,7.6e-05,8.6e-05,0.000107,6.1e-05,5.2e-05,5.9e-05,9.5e-05,9e-05,3.1e-05,2.326331e-06,sad
3,0.173769,0.720864,0.685492,0.655122,0.652557,0.587786,0.550012,0.63817,0.707171,0.648498,0.604207,0.638241,0.707306,-526.520569,84.466164,-6.822329,22.75692,8.021371,-0.83671,-6.375116,-13.950517,-15.801805,-1.701238,-3.240356,-2.12092,-1.001574,-5.576652,-0.277861,0.180505,-5.214784,-4.889361,-1.206443,2.497521,0.000392,0.000137,0.001436,0.052773,0.284222,0.078999,0.00575,...,0.000138,0.000183,0.000524,0.000483,0.000608,0.000806,0.001164,0.001016,0.001356,0.000967,0.000642,0.000433,0.000357,0.000369,0.00031,0.00032,0.000237,0.000182,0.000108,8.6e-05,8.8e-05,8.1e-05,5.9e-05,0.000119,0.000216,0.000222,0.000176,0.000146,0.000119,9.7e-05,6.4e-05,5.1e-05,7.4e-05,0.000129,0.000198,0.000243,0.00019,7.4e-05,4.691918e-06,fear
4,0.207284,0.692981,0.737456,0.726056,0.685032,0.636497,0.568223,0.528898,0.598124,0.635435,0.643268,0.671737,0.665797,-591.298523,92.935883,-4.376369,22.136271,9.728477,-3.868228,-4.231765,-12.517565,-17.417633,-6.273466,-7.159021,-2.124696,-2.085358,-9.489192,-3.802913,-1.608241,-9.055273,-6.693238,-5.338201,-0.922801,2e-06,4e-05,0.001787,0.018344,0.063987,0.03972,0.003364,...,3.4e-05,4.6e-05,0.000155,0.000217,0.00038,0.000631,0.000542,0.000424,0.000528,0.000116,0.000108,9e-05,0.000116,0.00012,7.5e-05,5.2e-05,5.3e-05,2.1e-05,1.2e-05,9e-06,7e-06,7e-06,8e-06,9e-06,8e-06,8e-06,8e-06,4e-06,6e-06,1e-05,1.8e-05,1.4e-05,1.4e-05,5e-06,7e-06,1.1e-05,1.6e-05,8e-06,4.218449e-07,sad


In [40]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler

X = Features.iloc[: ,:-1].values
y = Features['labels'].values

encoder = OneHotEncoder()
y = encoder.fit_transform(np.array(y).reshape(-1,1)).toarray()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=0, shuffle=True)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [41]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1152, 161), (1152, 8), (288, 161), (288, 8))