This code does a grid search for the following classifier: SVM,KNN,Adaboost.
To shorten computation time, hyperparameters tuning is done on a subset of the dataset. The subset has the same distribution as the whole dataset.

In [3]:
import numpy as np
import os
import pandas as pd
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier

data_path = os.path.join('.','tests','test_output')
# load first batch
X = np.load(os.path.join(data_path,'x_split_0.npy'),mmap_mode='r')
Y = np.load(os.path.join(data_path,'y_split_0.npy'),mmap_mode='r')

#Grid search parameters
param_svm = [
    {'C': [1, 10, 100, 1000], 'kernel': ['linear']}
 ]

param_knn = [
    {'n_neighbors': [1, 3, 5]}
 ]

param_ada = {
 'n_estimators': [50, 100],
 'learning_rate' : [0.01,0.1,0.3,1]
}

In [None]:
#split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, stratify = Y)

In [None]:
knn = KNeighborsClassifier()
clf_knn = GridSearchCV(knn, param_knn, cv=3, n_jobs = 4,pre_dispatch = 4,return_train_score=True)
clf_knn.fit(X,Y)

In [None]:
results_knn = pd.DataFrame.from_dict(clf_knn.cv_results_)
results_knn.to_csv(os.path.join(data_path,'knn.csv'))

In [None]:
svc = svm.SVC(gamma='auto')
clf_svm = GridSearchCV(svc, param_svm, cv=3,n_jobs = 4,pre_dispatch = 4,return_train_score=True)
clf_svm.fit(X,Y)

In [None]:
results_svm = pd.DataFrame.from_dict(clf_svm.cv_results_)
results_svm.to_csv(os.path.join(data_path,'svm.csv'))

In [None]:
ada = AdaBoostClassifier()
clf_ada = GridSearchCV(ada, param_ada, cv=3,n_jobs = 4,pre_dispatch = 4,return_train_score=True)
clf_ada.fit(X_train,y_train)

In [None]:
results_ada = pd.DataFrame.from_dict(clf_ada.cv_results_)
results_ada.to_csv(os.path.join(data_path,'ada.csv'))