In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import BaggingClassifier
from xgboost import XGBClassifier

In [2]:
df = pd.read_csv('C:/Users/grees/OneDrive/Documents/Computer Science/Data Analytics/Audio-Emotion-Recognition/extractedData/details.csv')

In [3]:
X = df[list(df.columns)[3:]] #to get the relevant columnns (tonnetz, MFCC, chroma etc.)
X = preprocessing.normalize(X) #to normalize the data
y = df['emotion']
X = np.asarray(X)
y = np.asarray(y)
#X.shape, y.shape

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
#to split data into training and testing => 75:25 ratio

In [5]:
#decision tree classifier
dtree = DecisionTreeClassifier()
dtree.fit(X_train, y_train)
predictions = dtree.predict(X_test)
accuracy=metrics.accuracy_score(y_test, predictions)
print(classification_report(y_test,predictions))
print("Confusion matrix for decision trees:")
print(confusion_matrix(y_test,predictions))
scores1 = cross_val_score(dtree, X, y, cv=10)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores1.mean(), scores1.std()))

              precision    recall  f1-score   support

         1.0       0.24      0.18      0.21        22
         2.0       0.57      0.46      0.51        56
         3.0       0.23      0.26      0.25        42
         4.0       0.35      0.36      0.35        50
         5.0       0.34      0.48      0.40        50
         6.0       0.32      0.41      0.36        39
         7.0       0.29      0.26      0.27        46
         8.0       0.37      0.24      0.29        55

   micro avg       0.34      0.34      0.34       360
   macro avg       0.34      0.33      0.33       360
weighted avg       0.35      0.34      0.34       360

Confusion matrix for decision trees:
[[ 4  4  1  5  1  3  2  2]
 [ 6 26  4 12  2  2  1  3]
 [ 0  0 11  3 13  2  7  6]
 [ 3 11  5 18  3  4  5  1]
 [ 1  0  5  5 24  7  6  2]
 [ 0  2 10  2  6 16  2  1]
 [ 0  3  5  3  8  8 12  7]
 [ 3  0  6  4 14  8  7 13]]
Accuracy: 0.27 (+/- 0.05)


In [6]:
#random forest classifier
rfor = RandomForestClassifier(n_estimators=10, random_state = 45)
rfor = rfor.fit(X_train, y_train)
predictions2 = rfor.predict(X_test)
print(classification_report(y_test,predictions2))
accuracy=metrics.accuracy_score(y_test, predictions2)
print("Confusion matrix for random forest:")
print(confusion_matrix(y_test,predictions2))
scores2 = cross_val_score(rfor, X, y, cv=5)      
print("Accuracy: %0.2f (+/- %0.2f)" % (scores2.mean(), scores2.std()))

              precision    recall  f1-score   support

         1.0       0.27      0.32      0.29        22
         2.0       0.53      0.73      0.62        56
         3.0       0.27      0.43      0.33        42
         4.0       0.31      0.26      0.28        50
         5.0       0.58      0.50      0.54        50
         6.0       0.57      0.51      0.54        39
         7.0       0.29      0.22      0.25        46
         8.0       0.50      0.33      0.40        55

   micro avg       0.42      0.42      0.42       360
   macro avg       0.42      0.41      0.41       360
weighted avg       0.43      0.42      0.42       360

Confusion matrix for random forest:
[[ 7  9  2  2  0  0  2  0]
 [ 3 41  0  9  0  0  3  0]
 [ 2  2 18  2  4  8  3  3]
 [ 4 17  6 13  2  1  3  4]
 [ 1  0 10  2 25  1  6  5]
 [ 2  0  7  3  4 20  1  2]
 [ 4  4 13  6  3  2 10  4]
 [ 3  4 11  5  5  3  6 18]]
Accuracy: 0.31 (+/- 0.02)


In [7]:
#Extra trees classifier
clf = ExtraTreesClassifier(n_estimators=10, max_depth=None,min_samples_split=2, random_state=45)
clf.fit(X_train, y_train)
predictions6 = clf.predict(X_test)
accuracy=metrics.accuracy_score(y_test, predictions6)
print(classification_report(y_test,predictions6))
print("Confusion matrix for Extra Trees Classifier:")
print(confusion_matrix(y_test,predictions2))
scores3 = cross_val_score(clf, X, y, cv=10)
print("\nAccuracy for Extra Trees: %0.2f (+/- %0.2f)" % (scores3.mean(), scores3.std()))

              precision    recall  f1-score   support

         1.0       0.35      0.50      0.42        22
         2.0       0.48      0.59      0.53        56
         3.0       0.18      0.24      0.20        42
         4.0       0.28      0.22      0.24        50
         5.0       0.47      0.52      0.50        50
         6.0       0.36      0.41      0.39        39
         7.0       0.35      0.24      0.29        46
         8.0       0.33      0.20      0.25        55

   micro avg       0.36      0.36      0.36       360
   macro avg       0.35      0.36      0.35       360
weighted avg       0.36      0.36      0.35       360

Confusion matrix for Extra Trees Classifier:
[[ 7  9  2  2  0  0  2  0]
 [ 3 41  0  9  0  0  3  0]
 [ 2  2 18  2  4  8  3  3]
 [ 4 17  6 13  2  1  3  4]
 [ 1  0 10  2 25  1  6  5]
 [ 2  0  7  3  4 20  1  2]
 [ 4  4 13  6  3  2 10  4]
 [ 3  4 11  5  5  3  6 18]]

Accuracy for Extra Trees: 0.31 (+/- 0.02)


In [8]:
#AdaBoost Classifier
clf = AdaBoostClassifier(n_estimators=20)
clf.fit(X_train, y_train)
predictions7 = clf.predict(X_test)
accuracy=metrics.accuracy_score(y_test, predictions7)
print(classification_report(y_test,predictions7))
print("Confusion matrix for AdaBoost Classifier:")
print(confusion_matrix(y_test,predictions2))
scores4 = cross_val_score(clf, X, y, cv=10)
print("\nAccuracy for AdaBoost: %0.2f (+/- %0.2f)" % (scores4.mean(), scores4.std()))

              precision    recall  f1-score   support

         1.0       0.00      0.00      0.00        22
         2.0       0.46      0.62      0.53        56
         3.0       0.08      0.07      0.08        42
         4.0       0.27      0.20      0.23        50
         5.0       0.32      0.74      0.45        50
         6.0       0.08      0.05      0.06        39
         7.0       0.30      0.39      0.34        46
         8.0       0.67      0.07      0.13        55

   micro avg       0.30      0.30      0.30       360
   macro avg       0.27      0.27      0.23       360
weighted avg       0.31      0.30      0.26       360

Confusion matrix for AdaBoost Classifier:
[[ 7  9  2  2  0  0  2  0]
 [ 3 41  0  9  0  0  3  0]
 [ 2  2 18  2  4  8  3  3]
 [ 4 17  6 13  2  1  3  4]
 [ 1  0 10  2 25  1  6  5]
 [ 2  0  7  3  4 20  1  2]
 [ 4  4 13  6  3  2 10  4]
 [ 3  4 11  5  5  3  6 18]]

Accuracy for AdaBoost: 0.27 (+/- 0.03)


In [9]:
#Gradient boosting classifier
gb_clf = GradientBoostingClassifier(n_estimators=20, learning_rate=0.05, max_features=2, max_depth=2, random_state=0)
gb = gb_clf.fit(X_train, y_train)
predictions3 = gb.predict(X_test)
print(classification_report(y_test,predictions3))
accuracy=metrics.accuracy_score(y_test, predictions3)
print("Confusion matrix for gradient boosting:")
print(confusion_matrix(y_test,predictions3))
scores5 = cross_val_score(gb_clf, X, y, cv=10)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores5.mean(), scores5.std()))

              precision    recall  f1-score   support

         1.0       0.00      0.00      0.00        22
         2.0       0.47      0.82      0.60        56
         3.0       0.26      0.36      0.30        42
         4.0       0.50      0.28      0.36        50
         5.0       0.62      0.48      0.54        50
         6.0       0.46      0.33      0.39        39
         7.0       0.36      0.46      0.40        46
         8.0       0.37      0.29      0.33        55

   micro avg       0.41      0.41      0.41       360
   macro avg       0.38      0.38      0.36       360
weighted avg       0.41      0.41      0.40       360

Confusion matrix for gradient boosting:
[[ 0 18  1  0  0  0  2  1]
 [ 3 46  2  3  0  0  2  0]
 [ 1  2 15  2  7  2  8  5]
 [ 0 20  3 14  0  1  6  6]
 [ 0  1  9  0 24  3  5  8]
 [ 0  2  7  4  7 13  4  2]
 [ 1  5  8  3  1  2 21  5]
 [ 4  4 12  2  0  7 10 16]]
Accuracy: 0.32 (+/- 0.03)


In [10]:
#Bagging using Decision Trees
bagging = BaggingClassifier(DecisionTreeClassifier(), max_samples=0.3, max_features=0.7) #more dependent on features than samples
bagging.fit(X_train, y_train)
predictions3 = bagging.predict(X_test)
accuracy=metrics.accuracy_score(y_test, predictions3)
print(classification_report(y_test,predictions3))
print("Confusion matrix for bagging with Decision Trees:")
print(confusion_matrix(y_test,predictions3))
scores6 = cross_val_score(bagging, X, y, cv=10)
print("\nAccuracy after bagging with Decision Trees: %0.2f (+/- %0.2f)" % (scores6.mean(), scores6.std()))

              precision    recall  f1-score   support

         1.0       0.20      0.23      0.21        22
         2.0       0.49      0.66      0.56        56
         3.0       0.30      0.31      0.30        42
         4.0       0.33      0.26      0.29        50
         5.0       0.47      0.56      0.51        50
         6.0       0.37      0.41      0.39        39
         7.0       0.28      0.28      0.28        46
         8.0       0.38      0.20      0.26        55

   micro avg       0.38      0.38      0.38       360
   macro avg       0.35      0.36      0.35       360
weighted avg       0.37      0.38      0.37       360

Confusion matrix for bagging with Decision Trees:
[[ 5 10  1  3  1  0  2  0]
 [ 6 37  0  9  0  2  2  0]
 [ 2  1 13  0  9  7  6  4]
 [ 1 18  4 13  3  3  5  3]
 [ 0  1  9  2 28  5  2  3]
 [ 1  1  7  5  4 16  2  3]
 [ 5  4  6  4  5  4 13  5]
 [ 5  3  4  3  9  6 14 11]]

Accuracy after bagging with Decision Trees: 0.29 (+/- 0.03)


In [11]:
xg = XGBClassifier()
xg.fit(X_train, y_train)
predictions4 = xg.predict(X_test)
accuracy=metrics.accuracy_score(y_test, predictions4)
print(classification_report(y_test,predictions4))
print("Confusion matrix for decision trees:")
print(confusion_matrix(y_test,predictions4))
scores7 = cross_val_score(xg, X, y, cv=10)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores7.mean(), scores7.std()))

              precision    recall  f1-score   support

         1.0       0.33      0.27      0.30        22
         2.0       0.49      0.66      0.56        56
         3.0       0.27      0.36      0.31        42
         4.0       0.40      0.34      0.37        50
         5.0       0.54      0.52      0.53        50
         6.0       0.59      0.41      0.48        39
         7.0       0.32      0.35      0.33        46
         8.0       0.47      0.36      0.41        55

   micro avg       0.42      0.42      0.42       360
   macro avg       0.43      0.41      0.41       360
weighted avg       0.43      0.42      0.42       360

Confusion matrix for decision trees:
[[ 6  9  0  3  0  0  4  0]
 [ 3 37  1  8  0  0  7  0]
 [ 1  3 15  4  9  1  5  4]
 [ 1 16  3 17  0  3  4  6]
 [ 0  1 11  0 26  2  4  6]
 [ 1  3  6  4  4 16  2  3]
 [ 2  6  8  6  3  1 16  4]
 [ 4  1 11  1  6  4  8 20]]
Accuracy: 0.33 (+/- 0.04)
