In [1]:
import pandas as pd
import numpy as np
import sklearn
import pickle
from sklearn import pipeline
from sklearn import model_selection
from sklearn import metrics
from sklearn import preprocessing
from sklearn import neural_network
from sklearn import decomposition

In [None]:
raw_features = pd.read_csv("data/raw_features.csv")
basic_features = pd.read_csv("data/features.csv")

In [2]:
combined_raw = pd.concat([basic_features, raw_features], axis = 1)
combined_raw['song_check'] = combined_raw['title'] + ' - ' + combined_raw['artist']
if combined_raw['song'].equals(combined_raw['song_check']):
    !echo "Raw features match."
    combined_raw = combined_raw.drop(columns = ['song', 'song_check'])
else:
    !echo "Raw features DO NOT match."

In [4]:
def build_xy(features):
    x = features.iloc[:, 9:]
    y = features[['primary', 'secondary']]
    y['combined'] = y['primary'].astype(str) + y['secondary'].astype(str)
    return (x, y)

In [None]:
# Multi-Layer Perceptron with raw features
x, y = build_xy(combined_raw)
mlp_model = neural_network.MLPClassifier()
scaler = preprocessing.MinMaxScaler()
pca = decomposition.PCA(n_components = 0.95, svd_solver = "full")
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('pca', pca), ('mlp', mlp_model)])
param_grid = {
    'mlp__hidden_layer_sizes': [(1000,), (10000,), (1000,50), (1000,50,6)],
    'mlp__activation': ['logistic', 'tanh', 'relu', 'identity'],
    'mlp__solver': ['lbfgs', 'sgd', 'adam'],
    'mlp__alpha': [0.00001, 0.0001, 0.001, 0.01, 0.1],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
avg = np.mean(accuracies)
!echo "cross validation average"
!echo $avg

In [None]:
# final Multi-Layer Perceptron model with raw features
x, y = build_xy(combined_raw)
mlp_model = neural_network.MLPClassifier()
scaler = preprocessing.MinMaxScaler()
pca = decomposition.PCA(n_components = 0.95, svd_solver = "full")
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('pca', pca), ('mlp', mlp_model)])
param_grid = {
    'mlp__hidden_layer_sizes': [(1000,), (10000,), (1000,50), (1000,50,6)],
    'mlp__activation': ['logistic', 'tanh', 'relu', 'identity'],
    'mlp__solver': ['lbfgs', 'sgd', 'adam'],
    'mlp__alpha': [0.00001, 0.0001, 0.001, 0.01, 0.1],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
model.fit(x, y['primary'])
best = model.best_params_
!echo "MLP Best Parameters"
!echo $best