In [1]:
import numpy as np
import os
import pandas as pd
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
    
data_path = os.path.join('.','tests','test_output')

X = np.load(os.path.join(data_path,'x_split_0.npy'),mmap_mode='r')
Y = np.load(os.path.join(data_path,'y_split_0.npy'),mmap_mode='r')

param_svm = [
    {'C': [1, 10, 100, 1000], 'kernel': ['linear']}
 ]

param_knn = [
    {'n_neighbors': [1, 3, 5, 11],'n_jobs':[2]}
 ]

param_ada = {
 'n_estimators': [50, 100],
 'learning_rate' : [0.01,0.1,0.3,1]
}


In [2]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, stratify = Y)


In [3]:
knn = KNeighborsClassifier()
clf_knn = GridSearchCV(knn, param_knn, cv=3, n_jobs = 10,pre_dispatch = 15)
clf_knn.fit(X_train,y_train)

GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform'),
       fit_params=None, iid='warn', n_jobs=10,
       param_grid=[{'n_neighbors': [1, 3, 5, 11], 'n_jobs': [2]}],
       pre_dispatch=15, refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [4]:
print(clf_knn.cv_results_)

{'mean_fit_time': array([30.43816606, 32.02195096, 40.83643794, 35.76144552]), 'std_fit_time': array([0.93374964, 1.41825354, 7.1846899 , 1.2191548 ]), 'mean_score_time': array([224.00110054, 254.88515838, 260.44612916, 240.78767816]), 'std_score_time': array([4.82777487, 5.19525943, 2.43101207, 6.88576316]), 'param_n_jobs': masked_array(data=[2, 2, 2, 2],
             mask=[False, False, False, False],
       fill_value='?',
            dtype=object), 'param_n_neighbors': masked_array(data=[1, 3, 5, 11],
             mask=[False, False, False, False],
       fill_value='?',
            dtype=object), 'params': [{'n_jobs': 2, 'n_neighbors': 1}, {'n_jobs': 2, 'n_neighbors': 3}, {'n_jobs': 2, 'n_neighbors': 5}, {'n_jobs': 2, 'n_neighbors': 11}], 'split0_test_score': array([0.47172619, 0.40922619, 0.3735119 , 0.34970238]), 'split1_test_score': array([0.45982143, 0.38392857, 0.3735119 , 0.40625   ]), 'split2_test_score': array([0.45045045, 0.36486486, 0.32582583, 0.35135135]), 'mean_test_s

In [5]:
results_knn = pd.DataFrame.from_dict(clf_knn.cv_results_)
results_knn.to_csv(os.path.join(data_path,'knn.csv'))



In [6]:
ada = AdaBoostClassifier()
clf_ada = GridSearchCV(ada, param_ada, cv=3,n_jobs = 8,pre_dispatch = 10)
clf_ada.fit(X_train,y_train)

GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=None),
       fit_params=None, iid='warn', n_jobs=8,
       param_grid={'n_estimators': [50, 100], 'learning_rate': [0.01, 0.1, 0.3, 1]},
       pre_dispatch=10, refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [7]:
results_ada = pd.DataFrame.from_dict(clf_ada.cv_results_)
results_ada.to_csv(os.path.join(data_path,'ada.csv'))



In [8]:
from sklearn import svm

svc = svm.SVC(gamma='auto')
clf_svm = GridSearchCV(svc, param_svm, cv=3,n_jobs = 10,pre_dispatch = 15)
clf_svm.fit(X_train,y_train)


GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=10,
       param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']}],
       pre_dispatch=15, refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [9]:
results_svm = pd.DataFrame.from_dict(clf_svm.cv_results_)
results_svm .to_csv(os.path.join(data_path,'svm.csv'))

