In [157]:
import IPython, nussl, numpy as np, scipy as sp, matplotlib.pyplot as plt, matplotlib, sklearn, librosa, cmath,math,time,vamp
import os
from IPython.display import Audio, display
from scipy.fftpack import fft, ifft
import csv
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.preprocessing import normalize

In [158]:
def createTestAndTrain(path_to_csv, percent_train=70, num_genres=10):
    """
    given path to csv
    returns partitioned training and testing sets
    """
    counter = 0
    train = np.array([])
    test = np.array([])
    genres_train = np.array([])
    genres_test = np.array([])
    
    # get csv data
    csv = np.genfromtxt(path_to_csv, dtype='string', delimiter=',', skip_header=1)
    genre_subsets = np.split(csv, num_genres)
    for i, value in enumerate(genre_subsets):
        np.random.shuffle(genre_subsets[i])
    csv = np.concatenate(genre_subsets)
    # separate into vectors and labels
    labels = csv[:,-1]
    feature_vectors = csv[:,:-1].astype(float)
    feature_vectors = normalize(feature_vectors, axis=0)
#     print feature_vectors
    
    train = feature_vectors[np.where(np.arange(feature_vectors.shape[0]) % 100 < percent_train)]
    test = feature_vectors[np.where(np.arange(feature_vectors.shape[0]) % 100 >= percent_train)]
    genres_train = labels[np.where(np.arange(labels.shape[0]) % 100 < percent_train)]
    genres_test = labels[np.where(np.arange(labels.shape[0]) % 100 >= percent_train)]
    
    return (train, genres_train), (test, genres_test)

In [159]:
def trainGaussianNB(path_to_csv):
    """
    given a path to the feature vector table (csv file)
    returns a trained Gaussian Naive Bayes model
    """
    # partition out training and testing sets
    training, testing = createTestAndTrain(path_to_csv)
    train = training[0]
    genres_train = training[1]
    test = testing[0]
    genres_test = testing[1]
    
    # train
    gnb = GaussianNB()
    gnb.fit(train, genres_train) 
    
    # output prediction accuracy
    predictions = gnb.predict(test)
    count = 0
    for i, prediction in enumerate(predictions):
        if prediction == genres_test[i]:
            count += 1
    print "Prediction Accuracy: ", float(count)/len(genres_test)

    return gnb

In [160]:
gnb_control = trainGaussianNB('features/control.csv')

Prediction Accuracy:  0.43


In [161]:
gnb_hpss_percussive = trainGaussianNB('features/hpss_percussive.csv')

Prediction Accuracy:  0.44


In [162]:
gnb_hpss_harmonic = trainGaussianNB('features/hpss_harmonic.csv')

Prediction Accuracy:  0.366666666667


In [163]:
#gnb_hpss_both = trainGaussianNB('features/hpss_h_and_p.csv')

In [164]:
gnb_repet_fg = trainGaussianNB('features/repet_foreground.csv')

Prediction Accuracy:  0.453333333333


In [165]:
gnb_repet_bg = trainGaussianNB('features/repet_background.csv')

Prediction Accuracy:  0.43


In [166]:
#gnb_repet_both = trainGaussianNB('features/repet_f_and_b.csv')

In [167]:
def trainNN(path_to_csv, num_neighbors):
    """
    given a path to the feature vector table (csv file)
    returns a trained nearest neighbor model, the validation set and the testing (validation) target labels
    """
    # partition out training and testing sets
    training, testing = createTestAndTrain(path_to_csv)
    train = training[0]
    genres_train = training[1]
    test = testing[0]
    genres_test = testing[1]
    
    # train
    nn = KNeighborsClassifier(n_neighbors=num_neighbors)
    nn.fit(train, genres_train) 
    
    # output prediction accuracy
    predictions = nn.predict(test)
    count = 0
    for i, prediction in enumerate(predictions):
        if prediction == genres_test[i]:
            count += 1
    print "Prediction Accuracy: ", float(count)/len(genres_test)
    
#     print nn.predict_proba(test)

    return nn, test, genres_train

In [168]:
nn_control = trainNN('features/control.csv', 4)

Prediction Accuracy:  0.446666666667


In [169]:
nn_hpss_percussive = trainNN('features/hpss_percussive.csv', 4)

Prediction Accuracy:  0.36


In [170]:
nn_hpss_harmonic = trainNN('features/hpss_harmonic.csv', 4)

Prediction Accuracy:  0.37


In [171]:
#nn_hpss_both = trainNN('features/hpss_h_and_p.csv', 4)

In [172]:
nn_repet_background = trainNN('features/repet_background.csv', 4)

Prediction Accuracy:  0.396666666667


In [173]:
nn_repet_foreground = trainNN('features/repet_foreground.csv', 4)

Prediction Accuracy:  0.373333333333


In [174]:
#nn_repet_both = trainNN('features/repet_f_and_b.csv', 4)

In [175]:
def trainSVC(path_to_csv):
    """
    given a path to the feature vector table (csv file)
    returns trained support vector classifcation, with test vectors and test genre labels
    """
    # partition out training and testing sets
    training, testing = createTestAndTrain(path_to_csv)
    train = training[0]
    genres_train = training[1]
    test = testing[0]
    genres_test = testing[1]
    
    # train
    clf = SVC()
    clf.fit(train, genres_train) 

    # output prediction accuracy
    predictions = clf.predict(test)
    count = 0
    for i, prediction in enumerate(predictions):
        if prediction == genres_test[i]:
            count += 1
    print "Prediction Accuracy: ", float(count)/len(genres_test)
    
#     print clf.predict_proba(test)

    return clf, test, genres_train

In [176]:
svc_control = trainSVC('features/control.csv')

Prediction Accuracy:  0.293333333333


In [177]:
svc_hpss_harmonic = trainSVC('features/hpss_harmonic.csv')

Prediction Accuracy:  0.32


In [178]:
svc_hpss_percussive = trainSVC('features/hpss_percussive.csv')

Prediction Accuracy:  0.316666666667


In [179]:
#svc_hpss_both = trainSVC('features/hpss_h_and_p.csv')

In [180]:
svc_repet_fg = trainSVC('features/repet_foreground.csv')

Prediction Accuracy:  0.326666666667


In [181]:
svc_repet_bg = trainSVC('features/repet_background.csv')

Prediction Accuracy:  0.38


In [182]:
#svc_repet_both = trainSVC('features/repet_f_and_b.csv')