In [83]:
import librosa 
import librosa.display as display

import pandas as pd 
import numpy as np 
import scipy as sp 
import matplotlib.pyplot as plt 
%matplotlib inline

import os
import time

from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold, mutual_info_classif, chi2, SelectKBest, SelectPercentile
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [132]:
train_data = pd.read_csv('train_data_librosa.csv')
valid_data = pd.read_csv('valid_data_librosa.csv')

X_train = train_data.drop(['label'],axis=1)
y_train = train_data['label']

X_valid = valid_data.drop(['label'],axis=1)
y_valid = valid_data['label']

In [133]:
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X_train),columns=X_train.columns)
X_valid = pd.DataFrame(scaler.transform(X_valid),columns=X_valid.columns)

In [134]:
estimators = [GaussianNB(),KNeighborsClassifier(),LogisticRegression(),
              DecisionTreeClassifier(),RandomForestClassifier(),GradientBoostingClassifier()]

In [135]:
param_grids = [{},
               {'n_neighbors':[2,5,7,10,12,15], 'weights':['uniform','distance'],'p':[1,2]},
               {'C':np.logspace(-4,4,9),'penalty':['l1','l2'],'class_weight':[None,'balanced']},
               {'max_depth':np.linspace(2,10,9),'class_weight':[None,'balanced']},
               {'max_depth':np.linspace(2,10,9),'n_estimators':[50,100],'class_weight':[None,'balanced']},
               {'max_depth':np.linspace(2,10,9),'n_estimators':[50,100]}
         ]

In [142]:
X = np.concatenate((X_train,X_valid),axis=0)
y = np.concatenate((y_train,y_valid),axis=0)
test_fold = []
for i in range(len(X_train)):
    test_fold.append(-1)
for i in range(len(X_valid)):
    test_fold.append(0)

cv = PredefinedSplit(test_fold=test_fold)


In [145]:
best_estimator = None
best_score = 0
start=time.time()
print('estimator', 'score' )
for estimator,param_grid in zip(estimators,param_grids):
    gridsearch = GridSearchCV(estimator,param_grid,scoring='accuracy',cv=cv)
    gridsearch.fit(X,y)
    print(str(estimator).split('(')[0], gridsearch.best_score_)
    if gridsearch.best_score_ > best_score:
        best_score = gridsearch.best_score_
        best_estimator = gridsearch.best_estimator_
end=time.time()
print('best: ', str(best_estimator).split('(')[0], best_score)
print('minutes elapsed: ',(end-start)/60)

estimator score
GaussianNB 0.200680272109
KNeighborsClassifier 0.117346938776
LogisticRegression 0.598639455782
DecisionTreeClassifier 0.285714285714
RandomForestClassifier 0.207482993197
GradientBoostingClassifier 0.221088435374
best:  LogisticRegression 0.598639455782
minutes elapsed:  7.391902148723602
