In [0]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
import pandas as pd
from keras.preprocessing import sequence
from keras.models import Sequential
from sklearn.utils import shuffle
from keras.layers import Input, Flatten, Dropout, Activation
from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
from keras.models import Model
from keras.callbacks import ModelCheckpoint
from sklearn import model_selection
from sklearn.metrics import confusion_matrix
from keras import regularizers
import os

In [0]:
# load data from google drive
from google.colab import drive
drive.mount('/content/gdrive')
path = ('/content/gdrive/My Drive/ProjectDataSet/RAVDESS/Speech')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
dataset = os.listdir(path)
def emo_labels(rawEmo,gender):
  switcher = {
      '02': 'calm_female' if gender else 'calm_male',
      '03': 'happy_female' if gender else 'happy_male',
      '04': 'sad_female' if gender else 'sad_male',
      '05': 'angry_female' if gender else 'angry_male',
      '06': 'fearful_female' if gender else 'fearful_male',
      'a': 'angry_male',
      'f': 'fearful_male',
      'h': 'happy_male',
      'sa': 'sad_male',
  }
  return switcher.get(emo)
def isFemale(s):
  if int(s[-6:-4])%2==0:
    return True
  else:
    return False

In [0]:
datafeatures = pd.DataFrame(columns=['feature'])
num_row = 0 # number of items
emotions = []
signal_count=0
for s in (dataset):
  emo = s[6:8]
  if (emo in notAvai):
    continue
  else: 
    # Dataset sample rate is 44100 Hz
    emotions.append(emo_labels(emo,isFemale(s)))
    signal,sample_rate = librosa.load(path+'/'+s,res_type='kaiser_fast',duration=2.5,offset=0.5,sr=44100)
    signal_count +=1
    sample_rate = np.array(sample_rate)
    mfcc_result = np.mean(librosa.feature.mfcc(y=signal,sr=sample_rate),axis=0)
    features = mfcc_result
    # add features to the data fure
    datafeatures.loc[num_row]=[features]
    num_row += 1
labels = pd.DataFrame(emotions)
print(datafeatures)

                                               feature
0    [-31.225976359050843, -30.360910952614223, -29...
1    [-36.727920763174176, -36.19586083434094, -34....
2    [-34.11293679338285, -33.303795125524694, -33....
3    [-35.04491932887174, -35.43746065352777, -36.8...
4    [-28.260846852118636, -28.19863450174745, -29....
..                                                 ...
955  [-32.57304124240317, -32.47509560331276, -32.3...
956  [-33.05663428442073, -33.39110548216278, -32.1...
957  [-33.92867534096095, -32.353367106973074, -31....
958  [-31.50154619188452, -30.856772960319915, -29....
959  [-33.03304830484642, -33.874237913030235, -34....

[960 rows x 1 columns]


In [0]:
# divide features to each columns
df = pd.DataFrame(datafeatures['feature'].values.tolist())
# combine features and it labels
temp = pd.concat([df,labels],axis=1)
# Change the name of colums 0(respresent labels) to labels
# also suffle the data
total_feat = shuffle(temp.rename(index=str, columns={"0": "label"}))
print(total_feat)

           0          1          2    ...        214        215             0  
41  -29.232220 -30.733137 -31.110382  ... -26.807050 -27.604548        sad_male
123 -36.272136 -37.108761 -38.335186  ... -34.272618 -36.529412        sad_male
506 -34.271076 -33.812120 -34.628319  ... -19.728301 -19.309484       calm_male
339 -34.331044 -34.331044 -34.331044  ... -24.408461 -23.008585    angry_female
103 -32.034278 -31.912527 -32.937392  ... -29.036553 -26.648856  fearful_female
..         ...        ...        ...  ...        ...        ...             ...
715 -42.709974 -42.709974 -40.212790  ... -33.989821 -32.443143     calm_female
102 -26.626321 -25.969556 -25.813515  ... -28.304442 -29.306849  fearful_female
359 -33.578089 -32.091351 -32.425807  ... -30.472274 -33.751325       calm_male
856 -23.922212 -24.199619 -26.462817  ... -30.624032 -31.073882    fearful_male
857 -40.142145 -38.325250 -34.713256  ... -34.834427 -33.524597    fearful_male

[960 rows x 217 columns]


In [0]:
total_feat = total_feat.fillna(0)
# divide test set and train set
# pop 80% as training
train = total_feat.head(int(len(total_feat)*(80/100)))
test = total_feat.tail(int(len(total_feat)*(20/100)))
print(train)

           0          1          2    ...        214        215             0  
41  -29.232220 -30.733137 -31.110382  ... -26.807050 -27.604548        sad_male
123 -36.272136 -37.108761 -38.335186  ... -34.272618 -36.529412        sad_male
506 -34.271076 -33.812120 -34.628319  ... -19.728301 -19.309484       calm_male
339 -34.331044 -34.331044 -34.331044  ... -24.408461 -23.008585    angry_female
103 -32.034278 -31.912527 -32.937392  ... -29.036553 -26.648856  fearful_female
..         ...        ...        ...  ...        ...        ...             ...
794 -41.943492 -41.943492 -41.943492  ... -26.608460 -26.711110     calm_female
484 -36.351764 -36.177740 -33.860721  ... -40.428649 -40.428649      sad_female
354 -31.012344 -29.942650 -29.589789  ... -31.225634 -31.747886      happy_male
323 -41.818690 -41.818690 -37.644385  ... -39.980077 -39.662173      sad_female
672 -40.368154 -40.368154 -40.368154  ... -40.368154 -40.368154        sad_male

[768 rows x 217 columns]


In [0]:
# get the feature (0~len-2 th column)
train_feat = train.iloc[:,:-1]
# label is the last column
train_labels = train.iloc[:,-1:]
# similar for test set
test_feat = test.iloc[:,:-1]
test_labels = test.iloc[:,-1:]

In [0]:
x_train,x_test,y_train,y_test = np.array(train_feat),np.array(test_feat),np.array(train_labels),np.array(test_labels)

In [0]:
# convert data to numberic type (model understandable numerical data)
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
labelencoder = LabelEncoder()
y_test = to_categorical(labelencoder.fit_transform(y_test))
y_train = to_categorical(labelencoder.fit_transform(y_train))

  y = column_or_1d(y, warn=True)


In [0]:
# Expand dimension through axis 2 for x_train and x_test
x_test_expended = np.expand_dims(x_test, axis = 2)
x_train_expended = np.expand_dims(x_train, axis = 2)

In [0]:
# CNN
model = Sequential()
model.add(Conv1D(256, 5,padding='same', input_shape=(216,1)))
model.add(Activation('relu'))
model.add(Conv1D(128, 5,padding='same'))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(MaxPooling1D(pool_size=(8)))
model.add(Conv1D(128, 5,padding='same',))
model.add(Activation('relu'))
model.add(Conv1D(128, 5,padding='same',))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(10))
model.add(Activation('softmax'))
opt = keras.optimizers.rmsprop(lr=0.00001, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=opt,metrics=['accuracy'])

In [0]:
model.summary()

In [0]:
# train the model
savemodel = model.fit(x_train_expended,y_train,batch_size=16,epochs=700,validation_data=(x_test_expended,y_test))