In [None]:
# mount google drive to load raw features
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# import general packages
import pandas as pd
import numpy as np
import pickle

# import sklearn utility
import sklearn
from sklearn import pipeline
from sklearn import model_selection
from sklearn import metrics
from sklearn import preprocessing
from sklearn import ensemble
from sklearn import neural_network
from sklearn import decomposition

In [None]:
# import the raw features
raw_features_path = "/content/drive/MyDrive/moody_playlist_data/raw_features.csv"
raw_features = pd.read_csv(raw_features_path)

In [None]:
# import the basic features for mood labels
basic_features_path = "/content/drive/MyDrive/moody_playlist_data/features.csv"
basic_features = pd.read_csv(basic_features_path)

In [None]:
# combine the raw features with the basic features
combined_raw = pd.concat([basic_features, raw_features], axis = 1)
combined_raw['song_check'] = combined_raw['title'] + ' - ' + combined_raw['artist']
# display a message based on whether merge was successful
if combined_raw['song'].equals(combined_raw['song_check']):
    print ("Raw features match.")
    combined_raw = combined_raw.drop(columns = ['song', 'song_check'])
else:
    print ("Raw features DO NOT match.")

In [None]:
# function to separate the raw features from the mood labels
def build_xy(features):
    # skip all the basic features to only keep raw features
    x = features.iloc[:, 9:]
    y = features[['primary', 'secondary']]
    # create a combined label with primary and secondary moods listed
    y['combined'] = y['primary'].astype(str) + y['secondary'].astype(str)
    return (x, y)

In [None]:
# Multi-Layer Perceptron with raw features
x, y = build_xy(combined_raw)
mlp_model = neural_network.MLPClassifier()
scaler = preprocessing.MinMaxScaler()
pca = decomposition.PCA(n_components = 0.95, svd_solver = "full")
pipe = pipeline.Pipeline(steps = [('scaler', scaler), ('pca', pca), ('mlp', mlp_model)])
param_grid = {
    'mlp__hidden_layer_sizes': [(2000,), (5000,), (10000,), (2000, 25), (5000, 25), (10000, 25)],
}
model = model_selection.GridSearchCV(pipe, param_grid, cv = 5)
accuracies = model_selection.cross_val_score(model, x, y['primary'], cv = 5)
print("Average accuracy:", np.mean(accuracies))