In [None]:
from sklearn.model_selection import cross_val_score, cross_val_predict, GridSearchCV, train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
import pandas as pd
import numpy as np
from time import time
from sklearn.preprocessing import MinMaxScaler

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn import metrics

# WINDOW_SIZE = 257, CODEBOOK_SIZE = 10000

enable_norm = False

X_train = np.loadtxt("MFCC/derived_feature_train.csv", delimiter = ",")
y_train = np.loadtxt("MFCC/derived_label_train.csv", delimiter = ",")
X_test = np.loadtxt("MFCC/derived_feature_test.csv", delimiter=",")
y_test = np.loadtxt("MFCC/derived_label_test.csv", delimiter=",")

In [None]:
if enable_norm:
    X_train = np.transpose(X_train)
    X_test = np.transpose(X_test)

    model_normalizer_horizontal = MinMaxScaler()
    model_normalizer_horizontal.fit(X_train)
    X_train = model_normalizer_horizontal.transform(X_train)

    model_normalizer_horizontal = MinMaxScaler()
    model_normalizer_horizontal.fit(X_test)
    X_test = model_normalizer_horizontal.transform(X_test)

    X_train = np.transpose(X_train)
    X_test = np.transpose(X_test)

    model_normalizer_vertical = MinMaxScaler()
    model_normalizer_vertical.fit(X_train)

    X_train = model_normalizer_vertical.transform(X_train)
    X_test = model_normalizer_vertical.transform(X_test)

In [None]:
label_names = ['Anger', 'Disgust', 'Fear', 'Happy', 'Neutral', 'sad']

def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    """
    given a sklearn confusion matrix (cm), make a nice plot

    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix

    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']

    title:        the text to display at the top of the matrix

    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues

    normalize:    If False, plot the raw numbers
                  If True, plot the proportions

    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """
    import matplotlib.pyplot as plt
    import numpy as np
    import itertools

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()

In [None]:
# LR
param_grid_ = {'C': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000], "penalty":["l1","l2"]}
print('-> Processing 10-Fold Cross Validation and Grid Search\n')

bow_search = GridSearchCV(LogisticRegression(), cv=10, param_grid=param_grid_, scoring='f1_micro', n_jobs=-1, verbose=10)
t0 = time()
bow_search.fit(X_train, y_train)
training_time = round(time()-t0, 3)
print('-> Done! Show Grid scores\n')

print(bow_search.cv_results_,'\n\n')

print("Best parameters set found on development set:\n")
print(bow_search.best_params_,'\n')
print("Grid scores on development set:\n")
means = bow_search.cv_results_['mean_test_score']
stds = bow_search.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, bow_search.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
print('\n\n')
print("Detailed classification report:\n")
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.\n\n")
t0 = time()
y_true, y_pred = y_test, bow_search.predict(X_test)
test_time = round(time()-t0, 3)
cmat = confusion_matrix(y_true, y_pred)
plot_confusion_matrix(cm           = cmat, 
                      normalize    = False,
                      target_names = label_names,
                      cmap = plt.get_cmap('Blues'),
                      title        = "Confusion Matrix LR Dataset_Norm = %s" % str(enable_norm))
plot_confusion_matrix(cm           = cmat, 
                      target_names = label_names,
                      cmap = plt.get_cmap('Blues'),
                      title        = "Normalized Confusion Matrix LR Dataset_Norm = %s" % str(enable_norm))
print('\n\n')
print(classification_report(y_true, y_pred))
print()
print('Accuracy', metrics.accuracy_score(y_pred,y_test))
print("Training time : {}\n".format(training_time))
print("Test time : {}\n".format(test_time))
print()

In [None]:
# NB
parameters = {'alpha': (1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1, 10, 100, 1000)}
bow_search = GridSearchCV(MultinomialNB(), parameters, cv=10, scoring='f1_micro', n_jobs=-1, verbose=10)
t0 = time()
bow_search.fit(X_train, y_train)
training_time = round(time()-t0, 3)
print('-> Done! Show Grid scores\n')

print(bow_search.cv_results_,'\n\n')

print("Best parameters set found on development set:\n")
print(bow_search.best_params_,'\n')
print("Grid scores on development set:\n")
means = bow_search.cv_results_['mean_test_score']
stds = bow_search.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, bow_search.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
print('\n\n')
print("Detailed classification report:\n")
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.\n\n")
t0 = time()
y_true, y_pred = y_test, bow_search.predict(X_test)
test_time = round(time()-t0, 3)
print(confusion_matrix(y_true, y_pred))
print('\n\n')
print(classification_report(y_true, y_pred))
print()
print('Accuracy', metrics.accuracy_score(y_pred,y_test))
print("Training time : {}\n".format(training_time))
print("Test time : {}\n".format(test_time))
print()

In [None]:
# SVM
param_grid_ = {'C': [0.001, 0.01, 0.1, 1, 10], "kernel":["linear"]}
print('-> Processing 10-Fold Cross Validation and Grid Search\n')

bow_search = GridSearchCV(SVC(), cv=10, param_grid=param_grid_, scoring='f1_micro', n_jobs=-1, verbose=10)
t0 = time()
bow_search.fit(X_train, y_train)
training_time = round(time()-t0, 3)
print('-> Done! Show Grid scores\n')

print(bow_search.cv_results_,'\n\n')

print("Best parameters set found on development set:\n")
print(bow_search.best_params_,'\n')
print("Grid scores on development set:\n")
means = bow_search.cv_results_['mean_test_score']
stds = bow_search.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, bow_search.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
print('\n\n')
print("Detailed classification report:\n")
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.\n\n")
t0 = time()
y_true, y_pred = y_test, bow_search.predict(X_test)
test_time = round(time()-t0, 3)
cmat = confusion_matrix(y_true, y_pred)
plot_confusion_matrix(cm           = cmat, 
                      normalize    = False,
                      target_names = label_names,
                      cmap = plt.get_cmap('Greys'),
                      title        = "Confusion Matrix SVC Dataset_Norm = %s" % str(enable_norm))
plot_confusion_matrix(cm           = cmat, 
                      target_names = label_names,
                      cmap = plt.get_cmap('Greys'),
                      title        = "Normalized Confusion Matrix SVC Dataset_Norm = %s" % str(enable_norm))
print('\n\n')
print(classification_report(y_true, y_pred))
print()
print('Accuracy', metrics.accuracy_score(y_pred,y_test))
print("Training time : {}\n".format(training_time))
print("Test time : {}\n".format(test_time))
print()

In [None]:
# SGD
param_grid_ = [
  {'alpha': [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01]} ]
bow_search = GridSearchCV(SGDClassifier(max_iter=2), cv=10, param_grid=param_grid_, scoring='f1_micro', n_jobs=-1, verbose=10)
t0 = time()
bow_search.fit(X_train, y_train)
training_time = round(time()-t0, 3)
print('-> Done! Show Grid scores\n')

print(bow_search.cv_results_,'\n\n')

print("Best parameters set found on development set:\n")
print(bow_search.best_params_,'\n')
print("Grid scores on development set:\n")
means = bow_search.cv_results_['mean_test_score']
stds = bow_search.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, bow_search.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
print('\n\n')
print("Detailed classification report:\n")
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.\n\n")
t0 = time()
y_true, y_pred = y_test, bow_search.predict(X_test)
test_time = round(time()-t0, 3)
print(confusion_matrix(y_true, y_pred))
print('\n\n')
print(classification_report(y_true, y_pred))
print()
print('Accuracy', metrics.accuracy_score(y_pred,y_test))
print("Training time : {}\n".format(training_time))
print("Test time : {}\n".format(test_time))
print()

In [None]:
# RF
param_grid_ = {"min_samples_leaf": [2, 3, 5, 7, 10, 100, 250, 500, 1000]}
bow_search = GridSearchCV(RandomForestClassifier(), param_grid=param_grid_, cv=10, scoring='f1_micro', n_jobs=-1, verbose=10)
t0 = time()
bow_search.fit(X_train, y_train)
training_time = round(time()-t0, 3)
print('-> Done! Show Grid scores\n')

print(bow_search.cv_results_,'\n\n')

print("Best parameters set found on development set:\n")
print(bow_search.best_params_,'\n')
print("Grid scores on development set:\n")
means = bow_search.cv_results_['mean_test_score']
stds = bow_search.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, bow_search.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
print('\n\n')
print("Detailed classification report:\n")
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.\n\n")
t0 = time()
y_true, y_pred = y_test, bow_search.predict(X_test)
test_time = round(time()-t0, 3)
print(confusion_matrix(y_true, y_pred))
print('\n\n')
print(classification_report(y_true, y_pred))
print()
print('Accuracy', metrics.accuracy_score(y_pred,y_test))
print("Training time : {}\n".format(training_time))
print("Test time : {}\n".format(test_time))
print()

In [None]:
# DT
param_grid_ = {"min_samples_leaf": [2, 3, 5, 7, 10, 100, 250, 500, 1000]}
bow_search = GridSearchCV(DecisionTreeClassifier(), param_grid=param_grid_, cv=10, scoring='f1_micro', n_jobs=-1, verbose=10)
t0 = time()
bow_search.fit(X_train, y_train)
training_time = round(time()-t0, 3)
print('-> Done! Show Grid scores\n')

print(bow_search.cv_results_,'\n\n')

print("Best parameters set found on development set:\n")
print(bow_search.best_params_,'\n')
print("Grid scores on development set:\n")
means = bow_search.cv_results_['mean_test_score']
stds = bow_search.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, bow_search.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
print('\n\n')
print("Detailed classification report:\n")
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.\n\n")
t0 = time()
y_true, y_pred = y_test, bow_search.predict(X_test)
test_time = round(time()-t0, 3)
print(confusion_matrix(y_true, y_pred))
print('\n\n')
print(classification_report(y_true, y_pred))
print()
print('Accuracy', metrics.accuracy_score(y_pred,y_test))
print("Training time : {}\n".format(training_time))
print("Test time : {}\n".format(test_time))
print()

In [None]:
# KNN
param_grid_ = {'n_neighbors': [1], 'weights': ['uniform', 'distance'], 'metric': ['euclidean', 'manhattan']}
bow_search = GridSearchCV(KNeighborsClassifier(), param_grid=param_grid_, cv=10, scoring='f1_micro', n_jobs=-1, verbose=10)
t0 = time()
bow_search.fit(X_train, y_train)
training_time = round(time()-t0, 3)
print('-> Done! Show Grid scores\n')

print(bow_search.cv_results_,'\n\n')

print("Best parameters set found on development set:\n")
print(bow_search.best_params_,'\n')
print("Grid scores on development set:\n")
means = bow_search.cv_results_['mean_test_score']
stds = bow_search.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, bow_search.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
print('\n\n')
print("Detailed classification report:\n")
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.\n\n")
t0 = time()
y_true, y_pred = y_test, bow_search.predict(X_test)
test_time = round(time()-t0, 3)
print(confusion_matrix(y_true, y_pred))
print('\n\n')
print(classification_report(y_true, y_pred))
print()
print('Accuracy', metrics.accuracy_score(y_pred,y_test))
print("Training time : {}\n".format(training_time))
print("Test time : {}\n".format(test_time))
print()

In [None]:
# MLPClassifier
clf = MLPClassifier(activation='tanh', alpha=0.03, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(64, 64, 64), learning_rate='constant',
       learning_rate_init=0.001, max_iter=100000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=48, shuffle=True,
       solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)


clf.fit(X_train, y_train) 

y_pred = clf.predict(X_test)

cmat = confusion_matrix(y_true, y_pred)
plot_confusion_matrix(cm           = cmat, 
                      normalize    = False,
                      target_names = label_names,
                      cmap = plt.get_cmap('Greens'),
                      title        = "Confusion Matrix MLP Dataset_Norm = %s" % str(enable_norm))
plot_confusion_matrix(cm           = cmat, 
                      target_names = label_names,
                      cmap = plt.get_cmap('Greens'),
                      title        = "Normalized Confusion Matrix MLP Dataset_Norm = %s" % str(enable_norm))
print(classification_report(y_true, y_pred))
print('Accuracy', metrics.accuracy_score(y_pred,y_test))