In [25]:
import os
import librosa
import matplotlib.pyplot as plt
import soundfile
import numpy as np
import librosa.display
import glob
import sklearn.model_selection as model_selection
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier as knn
from sklearn.metrics import accuracy_score

In [26]:
def extract_feature(file_name, mfcc, chroma, mel):
    #here file_name is the full path location of file
    with soundfile.SoundFile(file_name) as sound_file:
#         print("Currently running: ", file_name)
        X, sample_rate = librosa.load(file_name)
        if chroma:
            #stft is used to determine the sinusoidal frequency and phase content of local sections of a signal as it changes over time
            stft = np.abs(librosa.stft(X))
        result = np.array([])
        #mfc is a representation of the short-term power spectrum of a sound
        # mfcc collectively make a mfc
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate,n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
    return result

In [27]:
emotions = {'01':'neutral', '02':'calm', '03':'happy', '04':'sad', '05':'angry', '06':'fearful', '07':'disgust', '08':'surprised'}
observed_emotions=['calm', 'happy', 'sad', 'angry']

In [28]:
def load_data(test_size):
        x,y = [],[]
        for file in glob.glob("C:\\Users\\Rishabh Tiwari\\Desktop\\SER\\Actor_*\\*"):
            file_name = os.path.basename(file)
            emotion = emotions[file_name.split("-")[2]]
            if emotion not in observed_emotions:
                continue
            feature = extract_feature(file, mfcc=True, chroma=True, mel=True)
            x.append(feature)
            y.append(emotion)
        return model_selection.train_test_split(x, y, train_size=1-test_size, test_size=test_size, random_state=101)

In [29]:
X_train = []
X_test = []
y_train = []
y_test = []
X_train, X_test, y_train, y_test = load_data(test_size=0.20)

In [30]:
print(len(X_train))
print(len(X_test))

614
154


In [31]:
# pre-processing the data
# for applying PCA data must be normalised first
from sklearn.preprocessing import StandardScaler 
sc = StandardScaler() 
  
# fit_transform finds mean and standard deviation and then returns the transformed data
X_train_norm = sc.fit_transform(X_train)
# transform only transforms the data with previous required values
X_test_norm = sc.transform(X_test)

print(X_train_norm)

[[-0.82293001  0.89432591  0.71564485 ... -0.07101651 -0.45758372
  -0.22359131]
 [ 0.35891905 -1.23954895 -0.49543797 ... -0.32590528 -0.10573632
  -0.23294874]
 [-1.6820654   0.80941904  2.0017516  ... -1.23899756 -0.70526135
  -0.58666644]
 ...
 [ 1.96949821 -3.12406592 -2.06976256 ...  0.19817993 -0.17371296
   0.49340017]
 [-1.06041136  0.86089555  2.35094504 ... -0.40144094 -0.89088884
  -1.04129511]
 [ 0.35161732  0.85362681  0.06508395 ... -0.53246428 -0.05419897
  -0.07885826]]


In [32]:
# apply PCA
from sklearn.decomposition import PCA 
  
pca = PCA(n_components = 'mle')
  
X_train_pca = pca.fit_transform(X_train_norm)
X_test_pca = pca.transform(X_test_norm)
  
explained_variance = pca.explained_variance_ratio_ 
print(explained_variance)

[0.24421205 0.14919659 0.13137659 0.07387014 0.04728523 0.03927546
 0.0341891  0.02797482 0.02505939 0.02335726 0.01974018 0.01764763
 0.01585538 0.01301684 0.01292695 0.01182376 0.01037964 0.01010761
 0.00918408 0.00844163 0.00718018 0.00709679 0.00602028 0.00553482
 0.00514399 0.00502929 0.00442475 0.00417603 0.00371105 0.00349578
 0.00332897 0.00319241 0.00276215 0.00261272 0.00239888]


# Applying SVM

In [33]:
from sklearn.svm import SVC # "Support vector classifier"  
model_SVM = SVC(kernel='rbf', random_state=0, gamma=0.01, C=3)  
model_SVM.fit(X_train, y_train)
#Predicting the test set result  
y_pred_SVM = model_SVM.predict(X_test)

print ("Accuracy of SVM : ", accuracy_score(y_test, y_pred_SVM))

Accuracy of SVM :  0.5909090909090909


In [34]:
from sklearn.svm import SVC # "Support vector classifier"  
model_SVM = SVC(kernel='rbf', random_state=0, gamma=0.01, C=3)  
model_SVM.fit(X_train_pca, y_train)
#Predicting the test set result  
y_pred_SVM = model_SVM.predict(X_test_pca)
print ("Accuracy of SVM : ", accuracy_score(y_test, y_pred_SVM))

Accuracy of SVM :  0.7662337662337663



# Applying MLP

In [35]:
from sklearn.neural_network import MLPClassifier

model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)
#Train the model
model.fit(X_train,y_train)
#DataFlair - Predict for the test set
y_pred=model.predict(X_test)
#DataFlair - Calculate the accuracy of our model
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)
#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 57.14%


In [36]:
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)
#Train the model
model.fit(X_train_pca,y_train)
#DataFlair - Predict for the test set
y_pred=model.predict(X_test_pca)
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 85.06%


# Applying Knn

In [37]:
from sklearn.neighbors import KNeighborsClassifier as knn


In [38]:
from sklearn.neighbors import KNeighborsClassifier as knn

knn = knn(n_neighbors=4) 
#Train the model
knn.fit(X_train, y_train)
y_pred=knn.predict(X_test)
#DataFlair - Calculate the accuracy of our model
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)
#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 70.13%


In [39]:
from sklearn.neighbors import KNeighborsClassifier as knn


knn = knn(n_neighbors=3) 
#Train the model
knn.fit(X_train_pca, y_train)
#DataFlair - Predict for the test set
y_pred=knn.predict(X_test_pca)
#DataFlair - Calculate the accuracy of our model
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)
#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 73.38%


# Random Forest

In [40]:
from sklearn.ensemble import RandomForestClassifier
randomForest = RandomForestClassifier(max_depth=20, random_state=0)
randomForest.fit(X_train, y_train)

RandomForestClassifier(max_depth=20, random_state=0)

In [41]:
y_pred_forest = randomForest.predict(X_test)
print("Accuracy: ", accuracy_score(y_test, y_pred_forest))

Accuracy:  0.7337662337662337


In [42]:
randomForest.fit(X_train_pca, y_train)

RandomForestClassifier(max_depth=20, random_state=0)

In [43]:
y_pred_forest = randomForest.predict(X_test_pca)
print("Accuracy: ", accuracy_score(y_test, y_pred_forest))

Accuracy:  0.8051948051948052


# Decision Tree

In [44]:
#Fitting Decision Tree classifier to the training set  
from sklearn.tree import DecisionTreeClassifier  
classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)  
classifier.fit(X_train, y_train)  
y_pred= classifier.predict(X_test)  
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)
#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 61.69%


In [45]:
#Fitting Decision Tree classifier to the training set  
from sklearn.tree import DecisionTreeClassifier  
classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)  
classifier.fit(X_train_pca, y_train)  
y_pred= classifier.predict(X_test_pca)  
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 53.90%


# AdaBoost

In [46]:
from sklearn.ensemble import AdaBoostClassifier
ada = AdaBoostClassifier(n_estimators=100, random_state=0, algorithm='SAMME')
ada.fit(X_train, y_train)

AdaBoostClassifier(algorithm='SAMME', n_estimators=100, random_state=0)

In [47]:
y_pred_ada = ada.predict(X_test)

In [48]:
print("Accuracy: ", accuracy_score(y_test, y_pred_ada))

Accuracy:  0.4675324675324675
