In [8]:
# import packages
import warnings
import pandas as pd
import numpy as np
import sklearn
from sklearn import pipeline
from sklearn import model_selection
from sklearn import svm
from sklearn import metrics
from sklearn import preprocessing
from sklearn import linear_model
from sklearn import neighbors
from sklearn import naive_bayes
from sklearn import ensemble
from sklearn import neural_network

In [89]:
# toggle warnings
warnings.filterwarnings('ignore')
# warnings.filterwarnings('default')

In [144]:
features = pd.read_csv("../data/features.csv")
nn_features = pd.read_csv("../data/nn_features.csv")

In [157]:
def build_xy(features):
    x = features[['tempo', 'chroma_number', 'zero_crossing_rate', 'energy_entropy', 'spectral_centroid']]
#     x = features[['tempo', 'zero_crossing_rate', 'energy_entropy', 'spectral_centroid']]
    y = features[['primary', 'secondary']]
    y['combined'] = y['primary'].astype(str) + y['secondary'].astype(str)
    return (x, y)

In [158]:
def evaluate_mood(y_true, y_predict):
    true_list = y_true.tolist()
    score_values = []
    for index, value in enumerate(y_predict):
        true_moods = true_list[index]
        if str(value) == true_moods[0]:
            score_values.append(1)
        elif str(value) in list(true_moods):
            score_values.append(0.5)
        else:
            score_values.append(0)
    return pd.Series(score_values)

# mood_scorer = metrics.make_scorer(evaluate_mood, greater_is_better = True)

In [164]:
def score_results(model, x, y):
    
    print ("Normal Scaler:")
    scaler = preprocessing.StandardScaler()
    pipe = pipeline.make_pipeline(scaler, model)
    
    scores = model_selection.cross_val_score(pipe, x, y['primary'], cv = 5)
    print ("Accuracy of", round(np.mean(scores), 3), "on primary moods")
    
    # save this score
    accuracy = round(np.mean(scores), 3)
    
    predictions = model_selection.cross_val_predict(pipe, x, y['primary'], cv = 5)
    scores = evaluate_mood(y['combined'], predictions)
    print ("Accuracy of", round(np.mean(scores), 3), "on both primary and secondary moods")
    
    print (" ")
    print ("Min-Max Scaler:")
    scaler = preprocessing.MinMaxScaler()
    pipe = pipeline.make_pipeline(scaler, model)
    
    scores = model_selection.cross_val_score(pipe, x, y['primary'], cv = 5)
    print ("Accuracy of", round(np.mean(scores), 3), "on primary moods")
    
    predictions = model_selection.cross_val_predict(pipe, x, y['primary'], cv = 5)
    scores = evaluate_mood(y['combined'], predictions)
    print ("Accuracy of", round(np.mean(scores), 3), "on both primary and secondary moods")
    
    return accuracy

In [171]:
# SVM (specifically SVC)
x, y = build_xy(features)
kernel_list = ['linear', 'poly', 'rbf', 'sigmoid']
decision_list = ['ovr', 'ovo']
for kernel in kernel_list:
    print ("***** KERNEL", kernel.upper(), "*****")
    print (" ")
    for decision in decision_list:
        accuracies = []
        print ("Decision Function", decision.upper(), "with SVC")
        model = svm.SVC(C = 0.5, kernel = kernel, decision_function_shape = decision)
        accuracy = score_results(model, x, y)
        print (" ")
    print (" ")

***** KERNEL LINEAR *****
 
Decision Function OVR with SVC
Normal Scaler:
Accuracy of 0.425 on primary moods
Accuracy of 0.502 on both primary and secondary moods
 
Min-Max Scaler:
Accuracy of 0.36 on primary moods
Accuracy of 0.465 on both primary and secondary moods
 
Decision Function OVO with SVC
Normal Scaler:
Accuracy of 0.425 on primary moods
Accuracy of 0.502 on both primary and secondary moods
 
Min-Max Scaler:
Accuracy of 0.36 on primary moods
Accuracy of 0.465 on both primary and secondary moods
 
 
***** KERNEL POLY *****
 
Decision Function OVR with SVC
Normal Scaler:
Accuracy of 0.33 on primary moods
Accuracy of 0.438 on both primary and secondary moods
 
Min-Max Scaler:
Accuracy of 0.405 on primary moods
Accuracy of 0.495 on both primary and secondary moods
 
Decision Function OVO with SVC
Normal Scaler:
Accuracy of 0.33 on primary moods
Accuracy of 0.438 on both primary and secondary moods
 
Min-Max Scaler:
Accuracy of 0.405 on primary moods
Accuracy of 0.495 on both pr

In [21]:
# Logistic regression
accuracy = 0
num_folds = 5
folds = np.array_split(features, num_folds)
min_max_scaler = preprocessing.MinMaxScaler()

for i in range(num_folds):
    end = i + 1
    train = pd.concat(folds[0:i] + folds[end:num_folds])
    test = folds[i]
    
    train_main = train[['tempo', 'chroma_number', 'zero_crossing_rate', 'energy_entropy', 'spectral_centroid']]
    train_mood = train['primary']
    test_main = test[['tempo', 'chroma_number', 'zero_crossing_rate', 'energy_entropy', 'spectral_centroid']]
    test_mood = test['primary']
    
    train_main = train_main.to_numpy()
    train_main = min_max_scaler.fit_transform(train_main)
    train_mood = train_mood.to_numpy()
    test_main = test_main.to_numpy()
    test_main = min_max_scaler.transform(test_main)
    test_mood = test_mood.to_numpy()
    
    clf = linear_model.LogisticRegression(random_state = 0)
    clf.fit(train_main, train_mood)
    predictions = clf.predict(test_main)
    
    curr_acc = metrics.accuracy_score(test_mood, predictions)
    accuracy += curr_acc
    print ("Accuracy for fold", i, "is:", curr_acc)

print ("Average accuracy is:", (accuracy / 5) * 100)

Accuracy for fold 0 is: 0.3
Accuracy for fold 1 is: 0.35
Accuracy for fold 2 is: 0.4
Accuracy for fold 3 is: 0.475
Accuracy for fold 4 is: 0.45
Average accuracy is: 39.49999999999999


In [23]:
# K nearest neighbor
accuracy = 0
num_folds = 5
folds = np.array_split(features, num_folds)
min_max_scaler = preprocessing.MinMaxScaler()

for i in range(num_folds):
    end = i + 1
    train = pd.concat(folds[0:i] + folds[end:num_folds])
    test = folds[i]
    
    train_main = train[['tempo', 'chroma_number', 'zero_crossing_rate', 'energy_entropy', 'spectral_centroid']]
    train_mood = train['primary']
    test_main = test[['tempo', 'chroma_number', 'zero_crossing_rate', 'energy_entropy', 'spectral_centroid']]
    test_mood = test['primary']
    
    train_main = train_main.to_numpy()
    train_main = min_max_scaler.fit_transform(train_main)
    train_mood = train_mood.to_numpy()
    test_main = test_main.to_numpy()
    test_main = min_max_scaler.transform(test_main)
    test_mood = test_mood.to_numpy()
    
    clf = neighbors.KNeighborsClassifier()
    clf.fit(train_main, train_mood)
    predictions = clf.predict(test_main)

    curr_acc = metrics.accuracy_score(test_mood, predictions)
    accuracy += curr_acc
    print ("Accuracy for fold", i, "is:", curr_acc)

print ("Average accuracy is:", (accuracy / 5) * 100)

Accuracy for fold 0 is: 0.35
Accuracy for fold 1 is: 0.5
Accuracy for fold 2 is: 0.275
Accuracy for fold 3 is: 0.45
Accuracy for fold 4 is: 0.4
Average accuracy is: 39.5


In [25]:
# Naive Bayes
accuracy = 0
num_folds = 5
folds = np.array_split(features, num_folds)
min_max_scaler = preprocessing.MinMaxScaler()

for i in range(num_folds):
    end = i + 1
    train = pd.concat(folds[0:i] + folds[end:num_folds])
    test = folds[i]
    
    train_main = train[['tempo', 'chroma_number', 'zero_crossing_rate', 'energy_entropy', 'spectral_centroid']]
    train_mood = train['primary']
    test_main = test[['tempo', 'chroma_number', 'zero_crossing_rate', 'energy_entropy', 'spectral_centroid']]
    test_mood = test['primary']
    
    train_main = train_main.to_numpy()
    train_main = min_max_scaler.fit_transform(train_main)
    train_mood = train_mood.to_numpy()
    test_main = test_main.to_numpy()
    test_main = min_max_scaler.transform(test_main)
    test_mood = test_mood.to_numpy()
    
    clf = naive_bayes.GaussianNB()
    clf.fit(train_main, train_mood)
    predictions = clf.predict(test_main)
    
    curr_acc = metrics.accuracy_score(test_mood, predictions)
    accuracy += curr_acc
    print ("Accuracy for fold", i, "is:", curr_acc)

print ("Average accuracy is:", (accuracy / 5) * 100)

Accuracy for fold 0 is: 0.35
Accuracy for fold 1 is: 0.4
Accuracy for fold 2 is: 0.45
Accuracy for fold 3 is: 0.375
Accuracy for fold 4 is: 0.45
Average accuracy is: 40.5


In [28]:
# Random forest
accuracy = 0
num_folds = 5
folds = np.array_split(features, num_folds)
min_max_scaler = preprocessing.MinMaxScaler()

for i in range(num_folds):
    end = i + 1
    train = pd.concat(folds[0:i] + folds[end:num_folds])
    test = folds[i]
    
    train_main = train[['tempo', 'chroma_number', 'zero_crossing_rate', 'energy_entropy', 'spectral_centroid']]
    train_mood = train['primary']
    test_main = test[['tempo', 'chroma_number', 'zero_crossing_rate', 'energy_entropy', 'spectral_centroid']]
    test_mood = test['primary']
    
    train_main = train_main.to_numpy()
    train_main = min_max_scaler.fit_transform(train_main)
    train_mood = train_mood.to_numpy()
    test_main = test_main.to_numpy()
    test_main = min_max_scaler.transform(test_main)
    test_mood = test_mood.to_numpy()
    
    clf = ensemble.RandomForestClassifier(max_depth = 10, random_state = 0)
    clf.fit(train_main, train_mood)
    predictions = clf.predict(test_main)
    
    curr_acc = metrics.accuracy_score(test_mood, predictions)
    accuracy += curr_acc
    print ("Accuracy for fold", i, "is:", curr_acc)

print ("Average accuracy is:", (accuracy / 5) * 100)

Accuracy for fold 0 is: 0.375
Accuracy for fold 1 is: 0.4
Accuracy for fold 2 is: 0.425
Accuracy for fold 3 is: 0.425
Accuracy for fold 4 is: 0.45
Average accuracy is: 41.5


In [None]:
clf = neural_network.MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
clf.fit(X, y)
clf.predict([[2., 2.], [-1., -2.]])