In [1]:
import IPython, nussl, numpy as np, scipy as sp, matplotlib.pyplot as plt, matplotlib, sklearn, librosa, cmath,math,time,vamp
import os
from IPython.display import Audio, display
from scipy.fftpack import fft, ifft
import csv
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.preprocessing import normalize

In [2]:
def createTestAndTrain(path_to_csv, percent_train=80, num_genres=10):
    """
    given path to csv
    returns partitioned training and testing sets
    """
    counter = 0
    train = np.array([])
    test = np.array([])
    genres_train = np.array([])
    genres_test = np.array([])
    
    # get csv data
    csv = np.genfromtxt(path_to_csv, dtype='string', delimiter=',', skip_header=1)
    genre_subsets = np.split(csv, num_genres)
    for i, value in enumerate(genre_subsets):
        np.random.shuffle(genre_subsets[i])
    csv = np.concatenate(genre_subsets)
    # separate into vectors and labels
    labels = csv[:,-1]
    feature_vectors = csv[:,:-1].astype(float)
    feature_vectors = normalize(feature_vectors, axis=0)
    
    train = feature_vectors[np.where(np.arange(feature_vectors.shape[0]) % 100 < percent_train)]
    test = feature_vectors[np.where(np.arange(feature_vectors.shape[0]) % 100 >= percent_train)]
    genres_train = labels[np.where(np.arange(labels.shape[0]) % 100 < percent_train)]
    genres_test = labels[np.where(np.arange(labels.shape[0]) % 100 >= percent_train)]
    
    return (train, genres_train), (test, genres_test)

In [3]:
def trainGaussianNB(path_to_csv):
    """
    given a path to the feature vector table (csv file)
    returns a trained Gaussian Naive Bayes model
    """
    # partition out training and testing sets
    training, testing = createTestAndTrain(path_to_csv)
    train = training[0]
    genres_train = training[1]
    test = testing[0]
    genres_test = testing[1]
    
    # train
    gnb = GaussianNB()
    gnb.fit(train, genres_train) 
    
    # output prediction accuracy
    predictions = gnb.predict(test)
    count = 0
    for i, prediction in enumerate(predictions):
        if prediction == genres_test[i]:
            count += 1
    prediction_accuracy = float(count)/len(genres_test)

    return prediction_accuracy

In [4]:
gnb_control = trainGaussianNB('features/control.csv')

In [5]:
gnb_hpss_percussive = trainGaussianNB('features/hpss_percussive.csv')

In [6]:
gnb_hpss_harmonic = trainGaussianNB('features/hpss_harmonic.csv')

In [7]:
#gnb_hpss_both = trainGaussianNB('features/hpss_h_and_p.csv')

In [8]:
gnb_repet_fg = trainGaussianNB('features/repet_foreground.csv')

In [9]:
gnb_repet_bg = trainGaussianNB('features/repet_background.csv')

In [10]:
#gnb_repet_both = trainGaussianNB('features/repet_f_and_b.csv')

In [35]:
def trainNN(path_to_csv, num_neighbors = 4):
    """
    given a path to the feature vector table (csv file)
    returns a trained nearest neighbor model, the validation set and the testing (validation) target labels
    """
    # partition out training and testing sets
    training, testing = createTestAndTrain(path_to_csv)
    train = training[0]
    genres_train = training[1]
    test = testing[0]
    genres_test = testing[1]
    
    # train
    nn = KNeighborsClassifier(n_neighbors=num_neighbors)
    nn.fit(train, genres_train) 
    
    # output prediction accuracy
    predictions = nn.predict(test)
    count = 0
    for i, prediction in enumerate(predictions):
        if prediction == genres_test[i]:
            count += 1
    prediction_accuracy = float(count)/len(genres_test)

    return prediction_accuracy

In [12]:
nn_control = trainNN('features/control.csv', 4)

In [13]:
nn_hpss_percussive = trainNN('features/hpss_percussive.csv', 4)

In [14]:
nn_hpss_harmonic = trainNN('features/hpss_harmonic.csv', 4)

In [15]:
#nn_hpss_both = trainNN('features/hpss_h_and_p.csv', 4)

In [16]:
nn_repet_background = trainNN('features/repet_background.csv', 4)

In [17]:
nn_repet_foreground = trainNN('features/repet_foreground.csv', 4)

In [18]:
#nn_repet_both = trainNN('features/repet_f_and_b.csv', 4)

In [19]:
def trainSVC(path_to_csv):
    """
    given a path to the feature vector table (csv file)
    returns trained support vector classifcation, with test vectors and test genre labels
    """
    # partition out training and testing sets
    training, testing = createTestAndTrain(path_to_csv)
    train = training[0]
    genres_train = training[1]
    test = testing[0]
    genres_test = testing[1]
    
    # train
    clf = SVC()
    clf.fit(train, genres_train) 

    # output prediction accuracy
    predictions = clf.predict(test)
    count = 0
    for i, prediction in enumerate(predictions):
        if prediction == genres_test[i]:
            count += 1
            
    prediction_accuracy = float(count)/len(genres_test)

    return prediction_accuracy

In [29]:
svc_control = trainSVC('features/control.csv')

In [30]:
svc_hpss_harmonic = trainSVC('features/hpss_harmonic.csv')

In [22]:
svc_hpss_percussive = trainSVC('features/hpss_percussive.csv')

In [23]:
#svc_hpss_both = trainSVC('features/hpss_h_and_p.csv')

In [24]:
svc_repet_fg = trainSVC('features/repet_foreground.csv')

In [25]:
svc_repet_bg = trainSVC('features/repet_background.csv')

In [26]:
#svc_repet_both = trainSVC('features/repet_f_and_b.csv')

In [33]:
def getAveragePredictionAccuracy(modelFunction, csv_file, num_times):
    prediction_scores = np.array([])
    for i in np.arange(num_times):
        prediction_accuracy = modelFunction(csv_file)
        prediction_scores = np.append(prediction_scores, prediction_accuracy)
    
    mean_score = np.mean(prediction_scores)
    return mean_score

In [43]:
datasets = ['features/control.csv', 'features/hpss_harmonic.csv', 'features/hpss_percussive.csv', 'features/repet_foreground.csv', 'features/repet_background.csv', 'features/repet_f_b_combo.csv', 'features/repet_avg.csv', 'features/hpss_avg.csv', 'features/hpss_h_p_combo.csv']
modelFunctions = [trainNN, trainGaussianNB, trainSVC]
results = {}
for dataset in datasets:
    results[dataset] = []
    for index, model in enumerate(modelFunctions):
        results[dataset].append(getAveragePredictionAccuracy(model, dataset, 100))

with open('results.csv', 'wb') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(['Dataset', 'Nearest Neighbor', 'Gaussian', 'SVC'])
    for key, value in results.items():
       writer.writerow([key] + value)        
