In [1]:
# import general packages
import warnings
import pickle
import pandas as pd
import numpy as np

# import sklearn functionalities
import sklearn
from sklearn import pipeline
from sklearn import model_selection
from sklearn import svm
from sklearn import metrics
from sklearn import preprocessing
from sklearn import linear_model
from sklearn import neighbors
from sklearn import naive_bayes
from sklearn import ensemble
from sklearn import neural_network
from sklearn import decomposition

In [2]:
# toggle warnings
warnings.filterwarnings('ignore')
# warnings.filterwarnings('default')

In [3]:
# load features
basic_features = pd.read_csv("../data/features.csv")
eng_features = pd.read_csv("../data/engineered_features.csv")
nn_features = pd.read_csv("../data/nn_features.csv")

In [4]:
# add mood to the neural network dataframe
combined_nn = pd.concat([basic_features, nn_features], axis = 1)
combined_nn['song_check'] = combined_nn['title'] + ' - ' + combined_nn['artist']

# print a message to check if merge was successful
if combined_nn['song'].equals(combined_nn['song_check']):
    print ("Neural network features match.")
    combined_nn = combined_nn.drop(columns = ['song', 'song_check'])
else:
    print ("Neural network features DO NOT match.")

Neural network features match.


In [5]:
# function to separate features and labels
def build_xy(features, feature_type):
    if feature_type == "basic":
        # basic features only include the five listed
        x = features[['tempo', 'chroma_number', 'zero_crossing_rate', 'energy_entropy', 'spectral_centroid']]
    elif feature_type == "eng":
        # 2 to skip the title and artist, -2 to skip last two mood columns
        x = features.iloc[:, 2:(features.shape[1] - 2)]
    elif feature_type == "neural_net":
        # 9 is the index of zcr_mean, the first neural network feature
        x = features.iloc[:, 9:]
    else:
        print ("Invalid feature type.")
        return (None, None)
    
    y = features[['primary', 'secondary']]
    # generate a combined label containing both primary and secondary moods
    y['combined'] = y['primary'].astype(str) + y['secondary'].astype(str)
    return (x, y)

In [6]:
# function to generate scoring based on primary and secondary moods
def evaluate_mood(y_true, y_predict):
    true_list = y_true.tolist()
    score_values = []
    for index, value in enumerate(y_predict):
        true_moods = true_list[index]
        if str(value) == true_moods[0]:
            # give full credit for getting the primary mood
            score_values.append(1)
        elif str(value) in list(true_moods):
            # give partial credit for getting the secondary mood instead
            score_values.append(0.5)
        else:
            score_values.append(0)
    return pd.Series(score_values)

In [7]:
# Support Vector Machine with basic features
x, y = build_xy(basic_features, "basic")
svm_model = svm.SVC()
scaler = preprocessing.MinMaxScaler()
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('svc', svm_model)])
param_grid = {
    'svc__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'svc__decision_function_shape': ['ovr', 'ovo'],
    'svc__C': [0.25, 0.5, 0.75, 1, 3, 5, 10, 25, 50],
    'svc__gamma': [0.0001, 0.001, 0.01, 0.1, 1, 5, 10],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

KeyboardInterrupt: 

In [None]:
# Support Vector Machine with engineered features
x, y = build_xy(eng_features, "eng")
svm_model = svm.SVC()
scaler = preprocessing.MinMaxScaler()
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('svc', svm_model)])
param_grid = {
    'svc__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'svc__decision_function_shape': ['ovr', 'ovo'],
    'svc__C': [0.25, 0.5, 0.75, 1, 3, 5, 10, 25, 50],
    'svc__gamma': [0.0001, 0.001, 0.01, 0.1, 1, 5, 10],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

In [None]:
# Support Vector Machine with PCA transformed engineered features
x, y = build_xy(eng_features, "eng")
svm_model = svm.SVC()
scaler = preprocessing.MinMaxScaler()
pca = decomposition.PCA(n_components = 0.95, svd_solver = "full")
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('pca', pca), ('svc', svm_model)])
param_grid = {
    'svc__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'svc__decision_function_shape': ['ovr', 'ovo'],
    'svc__C': [0.25, 0.5, 0.75, 1, 3, 5, 10, 25, 50],
    'svc__gamma': [0.0001, 0.001, 0.01, 0.1, 1, 5, 10],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

In [None]:
# final basic SVM model
x, y = build_xy(basic_features, "basic")
svm_model = svm.SVC()
scaler = preprocessing.MinMaxScaler()
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('svc', svm_model)])
param_grid = {
    'svc__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'svc__decision_function_shape': ['ovr', 'ovo'],
    'svc__C': [0.25, 0.5, 0.75, 1, 3, 5, 10, 25, 50],
    'svc__gamma': [0.0001, 0.001, 0.01, 0.1, 1, 5, 10],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***SVM Best Parameters***")
print (model.best_params_)

In [None]:
# final engineered SVM model
x, y = build_xy(eng_features, "eng")
svm_model = svm.SVC()
scaler = preprocessing.MinMaxScaler()
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('svc', svm_model)])
param_grid = {
    'svc__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'svc__decision_function_shape': ['ovr', 'ovo'],
    'svc__C': [0.25, 0.5, 0.75, 1, 3, 5, 10, 25, 50],
    'svc__gamma': [0.0001, 0.001, 0.01, 0.1, 1, 5, 10],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***SVM Best Parameters***")
print (model.best_params_)

In [None]:
# final transformed and engineered SVM model
x, y = build_xy(eng_features, "eng")
svm_model = svm.SVC()
scaler = preprocessing.MinMaxScaler()
pca = decomposition.PCA(n_components = 0.95, svd_solver = "full")
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('pca', pca), ('svc', svm_model)])
param_grid = {
    'svc__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'svc__decision_function_shape': ['ovr', 'ovo'],
    'svc__C': [0.25, 0.5, 0.75, 1, 3, 5, 10, 25, 50],
    'svc__gamma': [0.0001, 0.001, 0.01, 0.1, 1, 5, 10],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***SVM Best Parameters***")
print (model.best_params_)

In [None]:
# Logistic Regression with basic features
x, y = build_xy(basic_features, "basic")
lr_model = linear_model.LogisticRegression()
scaler = preprocessing.MinMaxScaler()
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('log_reg', lr_model)])
param_grid = {
    'log_reg__penalty': ['l1', 'l2', 'elasticnet', 'none'],
    'log_reg__solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'log_reg__C': [0.25, 0.5, 0.75, 1, 3, 5, 10, 25, 50]
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

In [None]:
# Logistic Regression with engineered features
x, y = build_xy(eng_features, "eng")
lr_model = linear_model.LogisticRegression()
scaler = preprocessing.MinMaxScaler()
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('log_reg', lr_model)])
param_grid = {
    'log_reg__penalty': ['l1', 'l2', 'elasticnet', 'none'],
    'log_reg__solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'log_reg__C': [0.25, 0.5, 0.75, 1, 3, 5, 10, 25, 50]
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

In [None]:
# Logistic Regression with PCA transformed engineered features
x, y = build_xy(eng_features, "eng")
lr_model = linear_model.LogisticRegression()
scaler = preprocessing.MinMaxScaler()
pca = decomposition.PCA(n_components = 0.95, svd_solver = "full")
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('pca', pca), ('log_reg', lr_model)])
param_grid = {
    'log_reg__penalty': ['l1', 'l2', 'elasticnet', 'none'],
    'log_reg__solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'log_reg__C': [0.25, 0.5, 0.75, 1, 3, 5, 10, 25, 50]
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

In [None]:
# final basic LR model
x, y = build_xy(basic_features, "basic")
lr_model = linear_model.LogisticRegression()
scaler = preprocessing.MinMaxScaler()
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('log_reg', lr_model)])
param_grid = {
    'log_reg__penalty': ['l1', 'l2', 'elasticnet', 'none'],
    'log_reg__solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'log_reg__C': [0.25, 0.5, 0.75, 1, 3, 5, 10, 25, 50]
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***LR Best Parameters***")
print (model.best_params_)

In [None]:
# final engineered LR model
x, y = build_xy(eng_features, "eng")
lr_model = linear_model.LogisticRegression()
scaler = preprocessing.MinMaxScaler()
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('log_reg', lr_model)])
param_grid = {
    'log_reg__penalty': ['l1', 'l2', 'elasticnet', 'none'],
    'log_reg__solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'log_reg__C': [0.25, 0.5, 0.75, 1, 3, 5, 10, 25, 50]
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***LR Best Parameters***")
print (model.best_params_)

In [None]:
# final transformed and engineered LR model
x, y = build_xy(eng_features, "eng")
lr_model = linear_model.LogisticRegression()
scaler = preprocessing.MinMaxScaler()
pca = decomposition.PCA(n_components = 0.95, svd_solver = "full")
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('pca', pca), ('log_reg', lr_model)])
param_grid = {
    'log_reg__penalty': ['l1', 'l2', 'elasticnet', 'none'],
    'log_reg__solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'log_reg__C': [0.25, 0.5, 0.75, 1, 3, 5, 10, 25, 50]
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***LR Best Parameters***")
print (model.best_params_)

In [None]:
# K Nearest Neighbor with basic features
x, y = build_xy(basic_features, "basic")
knn_model = neighbors.KNeighborsClassifier()
scaler = preprocessing.MinMaxScaler()
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('knn', knn_model)])
param_grid = {
    'knn__n_neighbors': [1, 3, 5, 7, 9, 11, 13, 15],
    'knn__weights': ['uniform', 'distance'],
    'knn__metric': ['euclidean', 'manhattan', 'chebyshev', 'minkowski', 'wminkowski', 'seuclidean', 'mahalanobis'],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

In [None]:
# K Nearest Neighbor with engineered features
x, y = build_xy(eng_features, "eng")
knn_model = neighbors.KNeighborsClassifier()
scaler = preprocessing.MinMaxScaler()
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('knn', knn_model)])
param_grid = {
    'knn__n_neighbors': [1, 3, 5, 7, 9, 11, 13, 15],
    'knn__weights': ['uniform', 'distance'],
    'knn__metric': ['euclidean', 'manhattan', 'chebyshev', 'minkowski', 'wminkowski', 'seuclidean', 'mahalanobis'],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

In [None]:
# K Nearest Neighbor with PCA transformed engineered features
x, y = build_xy(eng_features, "eng")
knn_model = neighbors.KNeighborsClassifier()
scaler = preprocessing.MinMaxScaler()
pca = decomposition.PCA(n_components = 0.95, svd_solver = "full")
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('pca', pca), ('knn', knn_model)])
param_grid = {
    'knn__n_neighbors': [1, 3, 5, 7, 9, 11, 13, 15],
    'knn__weights': ['uniform', 'distance'],
    'knn__metric': ['euclidean', 'manhattan', 'chebyshev', 'minkowski', 'wminkowski', 'seuclidean', 'mahalanobis'],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

In [None]:
# final basic KNN model
x, y = build_xy(basic_features, "basic")
knn_model = neighbors.KNeighborsClassifier()
scaler = preprocessing.MinMaxScaler()
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('knn', knn_model)])
param_grid = {
    'knn__n_neighbors': [1, 3, 5, 7, 9, 11, 13, 15],
    'knn__weights': ['uniform', 'distance'],
    'knn__metric': ['euclidean', 'manhattan', 'chebyshev', 'minkowski', 'wminkowski', 'seuclidean', 'mahalanobis'],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***KNN Best Parameters***")
print (model.best_params_)

In [None]:
# final engineered KNN model
x, y = build_xy(eng_features, "eng")
knn_model = neighbors.KNeighborsClassifier()
scaler = preprocessing.MinMaxScaler()
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('knn', knn_model)])
param_grid = {
    'knn__n_neighbors': [1, 3, 5, 7, 9, 11, 13, 15],
    'knn__weights': ['uniform', 'distance'],
    'knn__metric': ['euclidean', 'manhattan', 'chebyshev', 'minkowski', 'wminkowski', 'seuclidean', 'mahalanobis'],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***KNN Best Parameters***")
print (model.best_params_)

In [None]:
# final transformed and engineered KNN model
x, y = build_xy(eng_features, "eng")
knn_model = neighbors.KNeighborsClassifier()
scaler = preprocessing.MinMaxScaler()
pca = decomposition.PCA(n_components = 0.95, svd_solver = "full")
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('pca', pca), ('knn', knn_model)])
param_grid = {
    'knn__n_neighbors': [1, 3, 5, 7, 9, 11, 13, 15],
    'knn__weights': ['uniform', 'distance'],
    'knn__metric': ['euclidean', 'manhattan', 'chebyshev', 'minkowski', 'wminkowski', 'seuclidean', 'mahalanobis'],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***KNN Best Parameters***")
print (model.best_params_)

In [None]:
# Gaussian Naive Bayes with basic features
x, y = build_xy(basic_features, "basic")
nb_model = naive_bayes.GaussianNB()
param_grid = {
    'var_smoothing': [0.000000001, 0.00000001, 0.0000001, 0.000001],
}
model = model_selection.GridSearchCV(nb_model, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

In [None]:
# Gaussian Naive Bayes with engineered features
x, y = build_xy(eng_features, "eng")
nb_model = naive_bayes.GaussianNB()
param_grid = {
    'var_smoothing': [0.000000001, 0.00000001, 0.0000001, 0.000001],
}
model = model_selection.GridSearchCV(nb_model, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

In [None]:
# Gaussian Naive Bayes with PCA transformed engineered features
x, y = build_xy(eng_features, "eng")
nb_model = naive_bayes.GaussianNB()
scaler = preprocessing.MinMaxScaler()
pca = decomposition.PCA(n_components = 0.95, svd_solver = "full")
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('pca', pca), ('gnb', nb_model)])
param_grid = {
    'gnb__var_smoothing': [0.000000001, 0.00000001, 0.0000001, 0.000001],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

In [None]:
# final basic GNB model
x, y = build_xy(basic_features, "basic")
nb_model = naive_bayes.GaussianNB()
param_grid = {
    'var_smoothing': [0.000000001, 0.00000001, 0.0000001, 0.000001],
}
model = model_selection.GridSearchCV(nb_model, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***GNB Best Parameters***")
print (model.best_params_)

In [None]:
# final engineered GNB model
x, y = build_xy(eng_features, "eng")
nb_model = naive_bayes.GaussianNB()
param_grid = {
    'var_smoothing': [0.000000001, 0.00000001, 0.0000001, 0.000001],
}
model = model_selection.GridSearchCV(nb_model, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***GNB Best Parameters***")
print (model.best_params_)

In [None]:
# final transformed and engineered GNB model
x, y = build_xy(eng_features, "eng")
nb_model = naive_bayes.GaussianNB()
scaler = preprocessing.MinMaxScaler()
pca = decomposition.PCA(n_components = 0.95, svd_solver = "full")
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('pca', pca), ('gnb', nb_model)])
param_grid = {
    'gnb__var_smoothing': [0.000000001, 0.00000001, 0.0000001, 0.000001],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***GNB Best Parameters***")
print (model.best_params_)

In [None]:
# Random Forest with basic features
x, y = build_xy(basic_features, "basic")
rf_model = ensemble.RandomForestClassifier()
param_grid = {
    'max_depth': [5, 10, 15, 20, 25, 50, 75, None],
    'min_samples_leaf': [1, 2, 5, 7, 10, 12, 15],
    'max_features': ["sqrt", "log2"]
}
model = model_selection.GridSearchCV(rf_model, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

In [None]:
# Random Forest with engineered features
x, y = build_xy(eng_features, "eng")
rf_model = ensemble.RandomForestClassifier()
param_grid = {
    'max_depth': [5, 10, 15, 20, 25, 50, 75, None],
    'min_samples_leaf': [1, 2, 5, 7, 10, 12, 15],
    'max_features': ["sqrt", "log2"]
}
model = model_selection.GridSearchCV(rf_model, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

In [None]:
# Random Forest with PCA transformed engineered features
x, y = build_xy(eng_features, "eng")
rf_model = ensemble.RandomForestClassifier()
scaler = preprocessing.MinMaxScaler()
pca = decomposition.PCA(n_components = 0.95, svd_solver = "full")
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('pca', pca), ('forest', rf_model)])
param_grid = {
    'forest__max_depth': [5, 10, 15, 20, 25, 50, 75, None],
    'forest__min_samples_leaf': [1, 2, 5, 7, 10, 12, 15],
    'forest__max_features': ["sqrt", "log2"]
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

In [None]:
# final basic RF model
x, y = build_xy(basic_features, "basic")
rf_model = ensemble.RandomForestClassifier()
param_grid = {
    'max_depth': [5, 10, 15, 20, 25, 50, 75, None],
    'min_samples_leaf': [1, 2, 5, 7, 10, 12, 15],
    'max_features': ["sqrt", "log2"]
}
model = model_selection.GridSearchCV(rf_model, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***RF Best Parameters***")
print (model.best_params_)

In [None]:
# final engineered RF model
x, y = build_xy(eng_features, "eng")
rf_model = ensemble.RandomForestClassifier()
param_grid = {
    'max_depth': [5, 10, 15, 20, 25, 50, 75, None],
    'min_samples_leaf': [1, 2, 5, 7, 10, 12, 15],
    'max_features': ["sqrt", "log2"]
}
model = model_selection.GridSearchCV(rf_model, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***RF Best Parameters***")
print (model.best_params_)

In [None]:
# final transformed and engineered RF model
x, y = build_xy(eng_features, "eng")
rf_model = ensemble.RandomForestClassifier()
scaler = preprocessing.MinMaxScaler()
pca = decomposition.PCA(n_components = 0.95, svd_solver = "full")
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('pca', pca), ('forest', rf_model)])
param_grid = {
    'forest__max_depth': [5, 10, 15, 20, 25, 50, 75, None],
    'forest__min_samples_leaf': [1, 2, 5, 7, 10, 12, 15],
    'forest__max_features': ["sqrt", "log2"]
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***RF Best Parameters***")
print (model.best_params_)

In [None]:
# Multi-Layer Perceptron with neural network features
x, y = build_xy(combined_nn, "neural_net")
mlp_model = neural_network.MLPClassifier()
scaler = preprocessing.MinMaxScaler()
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('mlp', mlp_model)])
param_grid = {
    'mlp__hidden_layer_sizes': [(10,), (20,), (30,), (40,), (50,), (60,)],
    'mlp__activation': ['logistic', 'tanh', 'relu', 'identity'],
    'mlp__solver': ['lbfgs', 'sgd', 'adam'],
    'mlp__alpha': [0.00001, 0.0001, 0.001, 0.01, 0.1],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))

In [None]:
# final MLP model with neural network features
x, y = build_xy(combined_nn, "neural_net")
mlp_model = neural_network.MLPClassifier()
scaler = preprocessing.MinMaxScaler()
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('mlp', mlp_model)])
param_grid = {
    'mlp__hidden_layer_sizes': [(10,), (20,), (30,), (40,), (50,), (60,)],
    'mlp__activation': ['logistic', 'tanh', 'relu', 'identity'],
    'mlp__solver': ['lbfgs', 'sgd', 'adam'],
    'mlp__alpha': [0.00001, 0.0001, 0.001, 0.01, 0.1],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
model.fit(x, y['primary'])
print ("***MLP Best Parameters***")
print (model.best_params_)

In [None]:
# export the final MLP model
x, y = build_xy(combined_nn, "neural_net")
mlp_model = neural_network.MLPClassifier()
scaler = preprocessing.MinMaxScaler()
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('mlp', mlp_model)])
param_grid = {
    'mlp__hidden_layer_sizes': [(10,), (20,), (30,), (40,), (50,), (60,)],
    'mlp__activation': ['logistic', 'tanh', 'relu', 'identity'],
    'mlp__solver': ['lbfgs', 'sgd', 'adam'],
    'mlp__alpha': [0.00001, 0.0001, 0.001, 0.01, 0.1],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
model.fit(x, y['primary'])

# use pickle to save the model
final_mlp_model = model
pickle.dump(final_mlp_model, open('mood_mlp_model.sav', 'wb'))

In [None]:
# export the stacking model with best parameters from each individual model
# use engineered features, which performed the best on all individual models when compared to basic features
x, y = build_xy(eng_features, "eng")

svc_pipe = pipeline.make_pipeline(preprocessing.MinMaxScaler(), 
                                  svm.SVC(C = 5, decision_function_shape = 'ovr', gamma = 0.1, kernel = 'sigmoid'))
lr_pipe = pipeline.make_pipeline(preprocessing.MinMaxScaler(),
                                 decomposition.PCA(n_components = 0.95, svd_solver = "full"),
                                 linear_model.LogisticRegression(C = 0.25, penalty = 'l2', solver = 'liblinear'))
knn_pipe = pipeline.make_pipeline(preprocessing.MinMaxScaler(),
                                  decomposition.PCA(n_components = 0.95, svd_solver = "full"),
                                  neighbors.KNeighborsClassifier(metric = 'chebyshev', n_neighbors = 9, weights = 'distance'))
gnb_pipe = pipeline.make_pipeline(preprocessing.MinMaxScaler(),
                                  decomposition.PCA(n_components = 0.95, svd_solver = "full"),
                                  naive_bayes.GaussianNB(var_smoothing = 1e-09))
rf_pipe = pipeline.make_pipeline(preprocessing.MinMaxScaler(),
                                 ensemble.RandomForestClassifier(max_depth = None, max_features = 'log2', min_samples_leaf = 7))

estimators = [('svc', svc_pipe),
              ('lr', lr_pipe),
              ('knn', knn_pipe),
              ('gnb', gnb_pipe),
              ('rf', rf_pipe)]

stacker = ensemble.StackingClassifier(estimators = estimators, 
                                      final_estimator = linear_model.LogisticRegression(),
                                      passthrough = True)
stacker.fit(x, y['primary'])

# use pickle to save the model
final_stacking_model = stacker
pickle.dump(final_stacking_model, open('mood_stacking_model.sav', 'wb'))