In [2]:
from sklearn.metrics import confusion_matrix 
from sklearn.model_selection import train_test_split 
from sklearn.naive_bayes import GaussianNB
import pandas as pd

In [3]:
df = pd.read_csv("mfccFeatures_emotion.csv")
# print(set(df.label))

In [4]:
from sklearn.utils import shuffle
df = shuffle(df)

In [5]:
print(df.columns)

Index(['Unnamed: 0', 'feature1', 'feature2', 'feature3', 'feature4',
       'feature5', 'feature6', 'feature7', 'feature8', 'feature9', 'feature10',
       'feature11', 'feature12', 'feature13', 'label'],
      dtype='object')


In [6]:
X = df[df.columns[1:14]]
y = df[df.columns[14]]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)

# print(y_train)

## Naive Bayes

In [7]:
gnb = GaussianNB().fit(X_train, y_train) 
gnb_predictions = gnb.predict(X_test) 
  
# accuracy on X_test 
accuracy = gnb.score(X_test, y_test) 
print(accuracy) 
  
# creating a confusion matrix 
cm = confusion_matrix(y_test, gnb_predictions)
print(cm)

0.33611111111111114
[[ 9  9  0  2  0  0  2  7]
 [12 24  1  4  0  0  2 13]
 [ 9  8 12  3  2  6  1  7]
 [ 9 12  3  5  0  3  1  8]
 [ 2  1  2  1 28  2  1  8]
 [ 3  7  2  0 10 10  2 10]
 [ 6  9  4  2  4  4  9  9]
 [ 7  3  5  1  3  4  3 24]]


## KNN

In [8]:
from sklearn.neighbors import KNeighborsClassifier 

knn = KNeighborsClassifier(n_neighbors = 7).fit(X_train, y_train) 
  
# accuracy on X_test 
accuracy = knn.score(X_test, y_test) 
print(accuracy) 
  
# creating a confusion matrix 
knn_predictions = knn.predict(X_test)  
cm = confusion_matrix(y_test, knn_predictions) 
print(cm)

0.41944444444444445
[[16  7  0  3  0  0  1  2]
 [13 35  0  2  0  3  2  1]
 [ 0  3 20  4  7  6  4  4]
 [ 4 11  7 12  0  4  2  1]
 [ 1  0  4  2 27  5  3  3]
 [ 4  1  7  4  8 15  3  2]
 [ 2  6  5  1  8  6 14  5]
 [ 5  2  7  5  6  7  6 12]]


## Decision Tree

In [9]:
from sklearn.tree import DecisionTreeClassifier 
dtree_model = DecisionTreeClassifier(max_depth = 18).fit(X_train, y_train) 
dtree_predictions = dtree_model.predict(X_test)

accuracy = dtree_model.score(X_test, y_test)
print(accuracy)
  
# creating a confusion matrix 
cm = confusion_matrix(y_test, dtree_predictions)
print(cm)

0.375
[[ 9  8  0  4  2  2  2  2]
 [ 9 22  0 12  3  6  3  1]
 [ 0  3 16  4  4  7  5  9]
 [ 4  3  6 18  2  1  3  4]
 [ 0  0  3  3 27  3  6  3]
 [ 1  0  5  5  6 15  2 10]
 [ 1  4  9  5  4  7 13  4]
 [ 1  1  5  4  6 12  6 15]]


In [10]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
adaboost = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    learning_rate=1.5,
    n_estimators=10)
model = adaboost.fit(X_train,y_train)


predictions = model.predict(X_test)
accuracy = model.score(X_test, y_test)
print(accuracy)
  
# creating a confusion matrix 
cm = confusion_matrix(y_test, dtree_predictions)
print(cm)

0.2361111111111111
[[ 9  8  0  4  2  2  2  2]
 [ 9 22  0 12  3  6  3  1]
 [ 0  3 16  4  4  7  5  9]
 [ 4  3  6 18  2  1  3  4]
 [ 0  0  3  3 27  3  6  3]
 [ 1  0  5  5  6 15  2 10]
 [ 1  4  9  5  4  7 13  4]
 [ 1  1  5  4  6 12  6 15]]


## Random Forest

In [4]:
import time
import os
import pandas as pd
import glob 
import librosa
from librosa import display
import numpy as np



path = "../Dataset/Audio_Speech_Actors_01-24"
lst = []

start_time = time.time()

for subdir, dirs, files in os.walk(path):
  for file in files:
      try:
        #Load librosa array, obtain mfcss, store the file and the mcss information in a new array
        X, sample_rate = librosa.load(os.path.join(subdir,file), res_type='kaiser_fast')
        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0) 
        file = int(file[7:8]) - 1 
        arr = mfccs, file
        lst.append(arr)
      except ValueError:
        continue

print("--- Data loaded. Loading time: %s seconds ---" % (time.time() - start_time))

--- Data loaded. Loading time: 135.81854939460754 seconds ---


In [5]:
# Creating X and y: zip makes a list of all the first elements, and a list of all the second elements.
X, y = zip(*lst)

In [6]:
import numpy as np
X = np.asarray(X)
y = np.asarray(y)


X.shape, y.shape

((1440, 40), (1440,))

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [9]:
from sklearn.ensemble import RandomForestClassifier
rforest = RandomForestClassifier(criterion="gini", max_depth=10, max_features="log2", 
                                 max_leaf_nodes = 100, min_samples_leaf = 3, min_samples_split = 20, 
                                 n_estimators= 22000, random_state= 5)
rforest.fit(X_train, y_train)
predictions = rforest.predict(X_test)
print(classification_report(y_test,predictions))

NameError: name 'classification_report' is not defined

In [10]:
accuracy = rforest.score(X_test, y_test)
print(accuracy)

0.5147058823529411
