# Notebook 4 - Building/GridSearching Models

In [39]:
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC, SVC
from sklearn.feature_selection import SelectKBest, SelectFromModel, RFE, chi2, f_regression
from sklearn.decomposition import PCA

import numpy as np
import pandas as pd
import tqdm

import pickle
from sklearn.externals import joblib

## Fitting Models on UCI

In [40]:
uci_madelon = joblib.load('./pickles/uci_madelonALL.pkl')

In [41]:
feats = joblib.load('./pickles/uci_feats.pkl')

In [42]:
uci_mad_feats = uci_madelon[feats]

In [43]:
y = uci_madelon['target']
X = uci_mad_feats

In [44]:
pipe_tree = Pipeline([('scaler', StandardScaler(with_mean=False)),
                      ('pca', PCA(n_components=5)),
                      ('estimator', DecisionTreeClassifier())])

pipe_logr = Pipeline([('scaler', StandardScaler(with_mean=False)),
                      ('pca', PCA(n_components=5)),
                      ('estimator', LogisticRegression())])

pipe_knc  = Pipeline([('scaler', StandardScaler(with_mean=False)),
                      ('pca', PCA(n_components=5)),
                      ('estimator', KNeighborsClassifier())])

pipe_svc  = Pipeline([('scaler', StandardScaler(with_mean=False)),
                      ('pca', PCA(n_components=5)),
                      ('estimator', SVC())])

pipe_rfc  = Pipeline([('scaler', StandardScaler(with_mean=False)),
                      ('pca', PCA(n_components=5)),
                      ('estimator', RandomForestClassifier())])

tree_params = {'estimator__max_depth' : [2,3,4,5,6,7,8,9,10],
               'estimator__max_features' : ['sqrt', 'log2', 'auto'],}

logr_params = {'estimator__C' : np.logspace(-3,3,7),
               'estimator__penalty' : ['l1', 'l2'],}

knc_params  = {'estimator__n_neighbors' : [3,5,11,],}

svc_params  = {'estimator__C' : np.logspace(-3,3,4),
               'estimator__kernel' : ['rbf']}

rfc_params  = {'estimator__n_estimators' : [25,50,75,100],
               'estimator__max_features' : ['sqrt', 'auto', 'log2'],}

In [45]:
grid_tree = GridSearchCV(pipe_tree, tree_params, verbose = 10)
grid_logr = GridSearchCV(pipe_logr, logr_params, verbose = 10)
grid_knc  = GridSearchCV(pipe_knc, knc_params, verbose = 10)
grid_svc  = GridSearchCV(pipe_svc, svc_params, verbose = 10)
grid_rfc  = GridSearchCV(pipe_rfc, rfc_params, verbose = 10)

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33)

In [55]:
uci_grid_tree = grid_tree.fit(X_train, y_train)
uci_grid_logr = grid_logr.fit(X_train, y_train)
uci_grid_knc  = grid_knc.fit(X_train, y_train)
uci_grid_svc  = grid_svc.fit(X_train, y_train)
uci_grid_rfc  = grid_rfc.fit(X_train, y_train)

uci_grid_tree 
uci_grid_logr 
uci_grid_knc  
uci_grid_svc  
uci_grid_rfc  

Fitting 3 folds for each of 27 candidates, totalling 81 fits
[CV] estimator__max_depth=2, estimator__max_features=sqrt ............
[CV]  estimator__max_depth=2, estimator__max_features=sqrt, score=0.621924, total=   0.0s
[CV] estimator__max_depth=2, estimator__max_features=sqrt ............
[CV]  estimator__max_depth=2, estimator__max_features=sqrt, score=0.612975, total=   0.0s
[CV] estimator__max_depth=2, estimator__max_features=sqrt ............
[CV]  estimator__max_depth=2, estimator__max_features=sqrt, score=0.587444, total=   0.0s
[CV] estimator__max_depth=2, estimator__max_features=log2 ............
[CV]  estimator__max_depth=2, estimator__max_features=log2, score=0.617450, total=   0.0s
[CV] estimator__max_depth=2, estimator__max_features=log2 ............
[CV]  estimator__max_depth=2, estimator__max_features=log2, score=0.612975, total=   0.0s
[CV] estimator__max_depth=2, estimator__max_features=log2 ............
[CV]  estimator__max_depth=2, estimator__max_features=log2, sco

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.1s remaining:    0.0s



[CV]  estimator__max_depth=4, estimator__max_features=log2, score=0.659955, total=   0.0s
[CV] estimator__max_depth=4, estimator__max_features=log2 ............
[CV]  estimator__max_depth=4, estimator__max_features=log2, score=0.740492, total=   0.0s
[CV] estimator__max_depth=4, estimator__max_features=log2 ............
[CV]  estimator__max_depth=4, estimator__max_features=log2, score=0.663677, total=   0.0s
[CV] estimator__max_depth=4, estimator__max_features=auto ............
[CV]  estimator__max_depth=4, estimator__max_features=auto, score=0.648770, total=   0.0s
[CV] estimator__max_depth=4, estimator__max_features=auto ............
[CV]  estimator__max_depth=4, estimator__max_features=auto, score=0.655481, total=   0.0s
[CV] estimator__max_depth=4, estimator__max_features=auto ............
[CV]  estimator__max_depth=4, estimator__max_features=auto, score=0.679372, total=   0.0s
[CV] estimator__max_depth=5, estimator__max_features=sqrt ............
[CV]  estimator__max_depth=5, est

[Parallel(n_jobs=1)]: Done  81 out of  81 | elapsed:    0.8s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.1s remaining:    0.0s


Fitting 3 folds for each of 14 candidates, totalling 42 fits
[CV] estimator__C=0.001, estimator__penalty=l1 .......................
[CV]  estimator__C=0.001, estimator__penalty=l1, score=0.494407, total=   0.0s
[CV] estimator__C=0.001, estimator__penalty=l1 .......................
[CV]  estimator__C=0.001, estimator__penalty=l1, score=0.494407, total=   0.0s
[CV] estimator__C=0.001, estimator__penalty=l1 .......................
[CV]  estimator__C=0.001, estimator__penalty=l1, score=0.493274, total=   0.0s
[CV] estimator__C=0.001, estimator__penalty=l2 .......................
[CV]  estimator__C=0.001, estimator__penalty=l2, score=0.619687, total=   0.0s
[CV] estimator__C=0.001, estimator__penalty=l2 .......................
[CV]  estimator__C=0.001, estimator__penalty=l2, score=0.612975, total=   0.0s
[CV] estimator__C=0.001, estimator__penalty=l2 .......................
[CV]  estimator__C=0.001, estimator__penalty=l2, score=0.594170, total=   0.0s
[CV] estimator__C=0.01, estimator__pena

[Parallel(n_jobs=1)]: Done  42 out of  42 | elapsed:    0.4s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.2s finished


[CV]  estimator__C=0.001, estimator__kernel=rbf, score=0.505593, total=   0.1s
[CV] estimator__C=0.001, estimator__kernel=rbf .......................
[CV]  estimator__C=0.001, estimator__kernel=rbf, score=0.505593, total=   0.1s
[CV] estimator__C=0.001, estimator__kernel=rbf .......................
[CV]  estimator__C=0.001, estimator__kernel=rbf, score=0.506726, total=   0.1s
[CV] estimator__C=0.1, estimator__kernel=rbf .........................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.3s remaining:    0.0s


[CV]  estimator__C=0.1, estimator__kernel=rbf, score=0.852349, total=   0.1s
[CV] estimator__C=0.1, estimator__kernel=rbf .........................
[CV]  estimator__C=0.1, estimator__kernel=rbf, score=0.852349, total=   0.1s
[CV] estimator__C=0.1, estimator__kernel=rbf .........................
[CV]  estimator__C=0.1, estimator__kernel=rbf, score=0.831839, total=   0.1s
[CV] estimator__C=10.0, estimator__kernel=rbf ........................


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.5s remaining:    0.0s


[CV]  estimator__C=10.0, estimator__kernel=rbf, score=0.897092, total=   0.0s
[CV] estimator__C=10.0, estimator__kernel=rbf ........................
[CV]  estimator__C=10.0, estimator__kernel=rbf, score=0.863535, total=   0.0s
[CV] estimator__C=10.0, estimator__kernel=rbf ........................
[CV]  estimator__C=10.0, estimator__kernel=rbf, score=0.874439, total=   0.0s
[CV] estimator__C=1000.0, estimator__kernel=rbf ......................
[CV]  estimator__C=1000.0, estimator__kernel=rbf, score=0.876957, total=   0.0s
[CV] estimator__C=1000.0, estimator__kernel=rbf ......................


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.6s remaining:    0.0s


[CV]  estimator__C=1000.0, estimator__kernel=rbf, score=0.841163, total=   0.0s
[CV] estimator__C=1000.0, estimator__kernel=rbf ......................
[CV]  estimator__C=1000.0, estimator__kernel=rbf, score=0.852018, total=   0.0s
Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV] estimator__max_features=sqrt, estimator__n_estimators=25 ........
[CV]  estimator__max_features=sqrt, estimator__n_estimators=25, score=0.845638, total=   0.1s
[CV] estimator__max_features=sqrt, estimator__n_estimators=25 ........


[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    0.8s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s


[CV]  estimator__max_features=sqrt, estimator__n_estimators=25, score=0.845638, total=   0.1s
[CV] estimator__max_features=sqrt, estimator__n_estimators=25 ........
[CV]  estimator__max_features=sqrt, estimator__n_estimators=25, score=0.838565, total=   0.1s
[CV] estimator__max_features=sqrt, estimator__n_estimators=50 ........


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.2s remaining:    0.0s


[CV]  estimator__max_features=sqrt, estimator__n_estimators=50, score=0.829978, total=   0.1s
[CV] estimator__max_features=sqrt, estimator__n_estimators=50 ........
[CV]  estimator__max_features=sqrt, estimator__n_estimators=50, score=0.843400, total=   0.1s
[CV] estimator__max_features=sqrt, estimator__n_estimators=50 ........


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.5s remaining:    0.0s


[CV]  estimator__max_features=sqrt, estimator__n_estimators=50, score=0.845291, total=   0.1s
[CV] estimator__max_features=sqrt, estimator__n_estimators=75 ........


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.6s remaining:    0.0s


[CV]  estimator__max_features=sqrt, estimator__n_estimators=75, score=0.845638, total=   0.2s
[CV] estimator__max_features=sqrt, estimator__n_estimators=75 ........
[CV]  estimator__max_features=sqrt, estimator__n_estimators=75, score=0.829978, total=   0.2s
[CV] estimator__max_features=sqrt, estimator__n_estimators=75 ........


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    1.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    1.2s remaining:    0.0s


[CV]  estimator__max_features=sqrt, estimator__n_estimators=75, score=0.840807, total=   0.2s
[CV] estimator__max_features=sqrt, estimator__n_estimators=100 .......
[CV]  estimator__max_features=sqrt, estimator__n_estimators=100, score=0.856823, total=   0.2s
[CV] estimator__max_features=sqrt, estimator__n_estimators=100 .......
[CV]  estimator__max_features=sqrt, estimator__n_estimators=100, score=0.845638, total=   0.2s
[CV] estimator__max_features=sqrt, estimator__n_estimators=100 .......
[CV]  estimator__max_features=sqrt, estimator__n_estimators=100, score=0.840807, total=   0.2s
[CV] estimator__max_features=auto, estimator__n_estimators=25 ........
[CV]  estimator__max_features=auto, estimator__n_estimators=25, score=0.827740, total=   0.1s
[CV] estimator__max_features=auto, estimator__n_estimators=25 ........
[CV]  estimator__max_features=auto, estimator__n_estimators=25, score=0.829978, total=   0.1s
[CV] estimator__max_features=auto, estimator__n_estimators=25 ........
[CV]  e

[Parallel(n_jobs=1)]: Done  36 out of  36 | elapsed:    6.0s finished


GridSearchCV(cv=None, error_score='raise',
       estimator=Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=False, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('estimator', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
     ...imators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False))]),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'estimator__n_estimators': [25, 50, 75, 100], 'estimator__max_features': ['sqrt', 'auto', 'log2']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=10)

In [58]:
grid_logr_train_score_uci = uci_grid_logr.score(X_train, y_train)
grid_logr_test_score_uci  = uci_grid_logr.score(X_test, y_test)

grid_tree_train_score_uci = uci_grid_tree.score(X_train, y_train)
grid_tree_test_score_uci  = uci_grid_tree.score(X_test, y_test)

grid_knc_train_score_uci  = uci_grid_knc.score(X_train, y_train)
grid_knc_test_score_uci   = uci_grid_knc.score(X_test, y_test)

grid_svc_train_score_uci  = uci_grid_svc.score(X_train, y_train)
grid_svc_test_score_uci   = uci_grid_svc.score(X_test, y_test)

grid_rfc_train_score_uci  = uci_grid_rfc.score(X_train, y_train)
grid_rfc_test_score_uci   = uci_grid_rfc.score(X_test, y_test)

In [59]:
uci_grid_results = []

uci_grid_results.append({'model': 'LogisticRegressionGrid',
                         'dataset' : 'train',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_logr_train_score_uci})

uci_grid_results.append({'model': 'LogisticRegressionGrid',
                         'dataset' : 'test',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_logr_test_score_uci})

uci_grid_results.append({'model': 'DecisionTreeClassifierGrid',
                         'dataset' : 'train',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_tree_train_score_uci})

uci_grid_results.append({'model': 'DecisionTreeClassifierGrid',
                         'dataset' : 'test',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_tree_test_score_uci})

uci_grid_results.append({'model': 'KNeighborsClassifierGrid',
                         'dataset' : 'train',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_knc_train_score_uci})

uci_grid_results.append({'model': 'KNeighborsClassifierGrid',
                         'dataset' : 'test',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_knc_test_score_uci})

uci_grid_results.append({'model': 'SVCGrid',
                         'dataset' : 'train',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_svc_train_score_uci})

uci_grid_results.append({'model': 'SVCGrid',
                         'dataset' : 'test',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_svc_test_score_uci})

uci_grid_results.append({'model': 'RandomForestClassifierGrid',
                         'dataset' : 'train',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_rfc_train_score_uci})

uci_grid_results.append({'model': 'RandomForestClassifierGrid',
                         'dataset' : 'test',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_rfc_test_score_uci})

In [60]:
uci_grid_results_df = pd.DataFrame(uci_grid_results)
uci_grid_results_df

Unnamed: 0,dataset,model,preprocessing,score
0,train,LogisticRegressionGrid,"scaled, PCA",0.614925
1,test,LogisticRegressionGrid,"scaled, PCA",0.587879
2,train,DecisionTreeClassifierGrid,"scaled, PCA",0.954478
3,test,DecisionTreeClassifierGrid,"scaled, PCA",0.75
4,train,KNeighborsClassifierGrid,"scaled, PCA",0.931343
5,test,KNeighborsClassifierGrid,"scaled, PCA",0.887879
6,train,SVCGrid,"scaled, PCA",0.979851
7,test,SVCGrid,"scaled, PCA",0.884848
8,train,RandomForestClassifierGrid,"scaled, PCA",1.0
9,test,RandomForestClassifierGrid,"scaled, PCA",0.862121


## Fitting Models on Josh's

In [61]:
j_madelon = joblib.load('./pickles/j_madelon1.pkl')

In [62]:
j_madelon = j_madelon.drop(['_id'], axis = 1)

In [63]:
j_feats = joblib.load('./pickles/josh_feats.pkl')

In [64]:
j_mad_feats = j_madelon[j_feats]

In [65]:
Xj = j_mad_feats
yj = j_madelon['target']

In [66]:
X_train_j, X_test_j, y_train_j, y_test_j = train_test_split(Xj, yj, test_size = 0.33)

In [67]:
j_grid_tree = grid_tree.fit(X_train_j, y_train_j)
j_grid_tree

Fitting 3 folds for each of 27 candidates, totalling 81 fits
[CV] estimator__max_depth=2, estimator__max_features=sqrt ............
[CV]  estimator__max_depth=2, estimator__max_features=sqrt, score=0.602194, total=   0.0s
[CV] estimator__max_depth=2, estimator__max_features=sqrt ............
[CV]  estimator__max_depth=2, estimator__max_features=sqrt, score=0.601746, total=   0.0s
[CV] estimator__max_depth=2, estimator__max_features=sqrt ............
[CV]  estimator__max_depth=2, estimator__max_features=sqrt, score=0.607703, total=   0.0s
[CV] estimator__max_depth=2, estimator__max_features=log2 ............
[CV]  estimator__max_depth=2, estimator__max_features=log2, score=0.599955, total=   0.0s
[CV] estimator__max_depth=2, estimator__max_features=log2 ............
[CV]  estimator__max_depth=2, estimator__max_features=log2, score=0.609358, total=   0.0s
[CV] estimator__max_depth=2, estimator__max_features=log2 ............


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.2s remaining:    0.0s


[CV]  estimator__max_depth=2, estimator__max_features=log2, score=0.593596, total=   0.0s
[CV] estimator__max_depth=2, estimator__max_features=auto ............
[CV]  estimator__max_depth=2, estimator__max_features=auto, score=0.599955, total=   0.0s
[CV] estimator__max_depth=2, estimator__max_features=auto ............
[CV]  estimator__max_depth=2, estimator__max_features=auto, score=0.630177, total=   0.0s
[CV] estimator__max_depth=2, estimator__max_features=auto ............
[CV]  estimator__max_depth=2, estimator__max_features=auto, score=0.603448, total=   0.0s
[CV] estimator__max_depth=3, estimator__max_features=sqrt ............
[CV]  estimator__max_depth=3, estimator__max_features=sqrt, score=0.622565, total=   0.0s
[CV] estimator__max_depth=3, estimator__max_features=sqrt ............
[CV]  estimator__max_depth=3, estimator__max_features=sqrt, score=0.628834, total=   0.0s
[CV] estimator__max_depth=3, estimator__max_features=sqrt ............


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.3s remaining:    0.0s


[CV]  estimator__max_depth=3, estimator__max_features=sqrt, score=0.640618, total=   0.0s
[CV] estimator__max_depth=3, estimator__max_features=log2 ............
[CV]  estimator__max_depth=3, estimator__max_features=log2, score=0.632639, total=   0.0s
[CV] estimator__max_depth=3, estimator__max_features=log2 ............
[CV]  estimator__max_depth=3, estimator__max_features=log2, score=0.628162, total=   0.0s
[CV] estimator__max_depth=3, estimator__max_features=log2 ............
[CV]  estimator__max_depth=3, estimator__max_features=log2, score=0.663905, total=   0.0s
[CV] estimator__max_depth=3, estimator__max_features=auto ............
[CV]  estimator__max_depth=3, estimator__max_features=auto, score=0.622789, total=   0.0s
[CV] estimator__max_depth=3, estimator__max_features=auto ............
[CV]  estimator__max_depth=3, estimator__max_features=auto, score=0.618984, total=   0.0s
[CV] estimator__max_depth=3, estimator__max_features=auto ............
[CV]  estimator__max_depth=3, esti

[CV]  estimator__max_depth=9, estimator__max_features=sqrt, score=0.722409, total=   0.0s
[CV] estimator__max_depth=9, estimator__max_features=sqrt ............
[CV]  estimator__max_depth=9, estimator__max_features=sqrt, score=0.712047, total=   0.0s
[CV] estimator__max_depth=9, estimator__max_features=log2 ............
[CV]  estimator__max_depth=9, estimator__max_features=log2, score=0.721961, total=   0.0s
[CV] estimator__max_depth=9, estimator__max_features=log2 ............
[CV]  estimator__max_depth=9, estimator__max_features=log2, score=0.708305, total=   0.0s
[CV] estimator__max_depth=9, estimator__max_features=log2 ............
[CV]  estimator__max_depth=9, estimator__max_features=log2, score=0.728392, total=   0.0s
[CV] estimator__max_depth=9, estimator__max_features=auto ............
[CV]  estimator__max_depth=9, estimator__max_features=auto, score=0.717931, total=   0.0s
[CV] estimator__max_depth=9, estimator__max_features=auto ............
[CV]  estimator__max_depth=9, esti

[Parallel(n_jobs=1)]: Done  81 out of  81 | elapsed:    3.8s finished


GridSearchCV(cv=None, error_score='raise',
       estimator=Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=False, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('estimator', DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
     ...it=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best'))]),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'estimator__max_depth': [2, 3, 4, 5, 6, 7, 8, 9, 10], 'estimator__max_features': ['sqrt', 'log2', 'auto']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=10)

In [68]:
j_grid_logr = grid_logr.fit(X_train_j, y_train_j)
j_grid_logr

Fitting 3 folds for each of 14 candidates, totalling 42 fits
[CV] estimator__C=0.001, estimator__penalty=l1 .......................
[CV]  estimator__C=0.001, estimator__penalty=l1, score=0.609134, total=   0.0s
[CV] estimator__C=0.001, estimator__penalty=l1 .......................
[CV]  estimator__C=0.001, estimator__penalty=l1, score=0.587867, total=   0.0s
[CV] estimator__C=0.001, estimator__penalty=l1 .......................
[CV]  estimator__C=0.001, estimator__penalty=l1, score=0.581281, total=   0.0s
[CV] estimator__C=0.001, estimator__penalty=l2 .......................
[CV]  estimator__C=0.001, estimator__penalty=l2, score=0.614283, total=   0.0s
[CV] estimator__C=0.001, estimator__penalty=l2 .......................
[CV]  estimator__C=0.001, estimator__penalty=l2, score=0.616297, total=   0.0s

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.2s remaining:    0.0s



[CV] estimator__C=0.001, estimator__penalty=l2 .......................
[CV]  estimator__C=0.001, estimator__penalty=l2, score=0.610166, total=   0.0s
[CV] estimator__C=0.01, estimator__penalty=l1 ........................
[CV]  estimator__C=0.01, estimator__penalty=l1, score=0.613163, total=   0.0s
[CV] estimator__C=0.01, estimator__penalty=l1 ........................
[CV]  estimator__C=0.01, estimator__penalty=l1, score=0.615178, total=   0.0s
[CV] estimator__C=0.01, estimator__penalty=l1 ........................
[CV]  estimator__C=0.01, estimator__penalty=l1, score=0.611061, total=   0.0s
[CV] estimator__C=0.01, estimator__penalty=l2 ........................
[CV]  estimator__C=0.01, estimator__penalty=l2, score=0.614730, total=   0.0s
[CV] estimator__C=0.01, estimator__penalty=l2 ........................

[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.4s remaining:    0.0s



[CV]  estimator__C=0.01, estimator__penalty=l2, score=0.614059, total=   0.0s
[CV] estimator__C=0.01, estimator__penalty=l2 ........................
[CV]  estimator__C=0.01, estimator__penalty=l2, score=0.609718, total=   0.0s
[CV] estimator__C=0.1, estimator__penalty=l1 .........................
[CV]  estimator__C=0.1, estimator__penalty=l1, score=0.613387, total=   0.0s
[CV] estimator__C=0.1, estimator__penalty=l1 .........................
[CV]  estimator__C=0.1, estimator__penalty=l1, score=0.614283, total=   0.0s
[CV] estimator__C=0.1, estimator__penalty=l1 .........................
[CV]  estimator__C=0.1, estimator__penalty=l1, score=0.609942, total=   0.0s
[CV] estimator__C=0.1, estimator__penalty=l2 .........................
[CV]  estimator__C=0.1, estimator__penalty=l2, score=0.613835, total=   0.0s
[CV] estimator__C=0.1, estimator__penalty=l2 .........................
[CV]  estimator__C=0.1, estimator__penalty=l2, score=0.613835, total=   0.0s
[CV] estimator__C=0.1, estimator

[Parallel(n_jobs=1)]: Done  42 out of  42 | elapsed:    1.7s finished


GridSearchCV(cv=None, error_score='raise',
       estimator=Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=False, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('estimator', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_i...y='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False))]),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'estimator__C': array([  1.00000e-03,   1.00000e-02,   1.00000e-01,   1.00000e+00,
         1.00000e+01,   1.00000e+02,   1.00000e+03]), 'estimator__penalty': ['l1', 'l2']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=10)

In [69]:
j_grid_knc = grid_knc.fit(X_train_j, y_train_j)
j_grid_knc

Fitting 3 folds for each of 3 candidates, totalling 9 fits
[CV] estimator__n_neighbors=3 ........................................
[CV] ......... estimator__n_neighbors=3, score=0.816208, total=   0.1s
[CV] estimator__n_neighbors=3 ........................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.3s remaining:    0.0s


[CV] ......... estimator__n_neighbors=3, score=0.807029, total=   0.1s
[CV] estimator__n_neighbors=3 ........................................
[CV] ......... estimator__n_neighbors=3, score=0.811017, total=   0.1s
[CV] estimator__n_neighbors=5 ........................................


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.7s remaining:    0.0s


[CV] ......... estimator__n_neighbors=5, score=0.824491, total=   0.1s
[CV] estimator__n_neighbors=5 ........................................
[CV] ......... estimator__n_neighbors=5, score=0.810835, total=   0.1s


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    1.1s remaining:    0.0s


[CV] estimator__n_neighbors=5 ........................................
[CV] ......... estimator__n_neighbors=5, score=0.814375, total=   0.1s
[CV] estimator__n_neighbors=11 .......................................
[CV] ........ estimator__n_neighbors=11, score=0.810611, total=   0.1s
[CV] estimator__n_neighbors=11 .......................................


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    1.4s remaining:    0.0s


[CV] ........ estimator__n_neighbors=11, score=0.811059, total=   0.1s
[CV] estimator__n_neighbors=11 .......................................


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    1.6s remaining:    0.0s


[CV] ........ estimator__n_neighbors=11, score=0.813927, total=   0.1s


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    1.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    1.9s finished


GridSearchCV(cv=None, error_score='raise',
       estimator=Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=False, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('estimator', KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform'))]),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'estimator__n_neighbors': [3, 5, 11]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=10)

In [70]:
j_grid_svc = grid_svc.fit(X_train_j, y_train_j)
j_grid_svc

Fitting 3 folds for each of 4 candidates, totalling 12 fits
[CV] estimator__C=0.001, estimator__kernel=rbf .......................
[CV]  estimator__C=0.001, estimator__kernel=rbf, score=0.503470, total=   6.1s
[CV] estimator__C=0.001, estimator__kernel=rbf .......................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    8.6s remaining:    0.0s


[CV]  estimator__C=0.001, estimator__kernel=rbf, score=0.503470, total=   6.1s
[CV] estimator__C=0.001, estimator__kernel=rbf .......................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   17.2s remaining:    0.0s


[CV]  estimator__C=0.001, estimator__kernel=rbf, score=0.503359, total=   6.1s
[CV] estimator__C=0.1, estimator__kernel=rbf .........................


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   25.8s remaining:    0.0s


[CV]  estimator__C=0.1, estimator__kernel=rbf, score=0.784643, total=   4.3s
[CV] estimator__C=0.1, estimator__kernel=rbf .........................


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:   31.9s remaining:    0.0s


[CV]  estimator__C=0.1, estimator__kernel=rbf, score=0.779942, total=   4.3s
[CV] estimator__C=0.1, estimator__kernel=rbf .........................


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   38.0s remaining:    0.0s


[CV]  estimator__C=0.1, estimator__kernel=rbf, score=0.784371, total=   4.4s
[CV] estimator__C=10.0, estimator__kernel=rbf ........................


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:   44.2s remaining:    0.0s


[CV]  estimator__C=10.0, estimator__kernel=rbf, score=0.835460, total=   4.5s
[CV] estimator__C=10.0, estimator__kernel=rbf ........................


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:   49.9s remaining:    0.0s


[CV]  estimator__C=10.0, estimator__kernel=rbf, score=0.826058, total=   4.5s
[CV] estimator__C=10.0, estimator__kernel=rbf ........................


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:   55.6s remaining:    0.0s


[CV]  estimator__C=10.0, estimator__kernel=rbf, score=0.825571, total=   4.6s
[CV] estimator__C=1000.0, estimator__kernel=rbf ......................


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:  1.0min remaining:    0.0s


[CV]  estimator__C=1000.0, estimator__kernel=rbf, score=0.798746, total=  50.2s
[CV] estimator__C=1000.0, estimator__kernel=rbf ......................
[CV]  estimator__C=1000.0, estimator__kernel=rbf, score=0.801209, total=  43.5s
[CV] estimator__C=1000.0, estimator__kernel=rbf ......................
[CV]  estimator__C=1000.0, estimator__kernel=rbf, score=0.792656, total=  48.2s


[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:  3.4min finished


GridSearchCV(cv=None, error_score='raise',
       estimator=Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=False, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('estimator', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))]),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'estimator__C': array([  1.00000e-03,   1.00000e-01,   1.00000e+01,   1.00000e+03]), 'estimator__kernel': ['rbf']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=10)

In [71]:
j_grid_rfc = grid_rfc.fit(X_train_j, y_train_j)
j_grid_rfc

Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV] estimator__max_features=sqrt, estimator__n_estimators=25 ........
[CV]  estimator__max_features=sqrt, estimator__n_estimators=25, score=0.807701, total=   0.5s
[CV] estimator__max_features=sqrt, estimator__n_estimators=25 ........


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.6s remaining:    0.0s


[CV]  estimator__max_features=sqrt, estimator__n_estimators=25, score=0.811954, total=   0.5s
[CV] estimator__max_features=sqrt, estimator__n_estimators=25 ........


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.1s remaining:    0.0s


[CV]  estimator__max_features=sqrt, estimator__n_estimators=25, score=0.812360, total=   0.5s
[CV] estimator__max_features=sqrt, estimator__n_estimators=50 ........


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    1.6s remaining:    0.0s


[CV]  estimator__max_features=sqrt, estimator__n_estimators=50, score=0.815536, total=   1.0s
[CV] estimator__max_features=sqrt, estimator__n_estimators=50 ........


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    2.7s remaining:    0.0s


[CV]  estimator__max_features=sqrt, estimator__n_estimators=50, score=0.817327, total=   1.0s
[CV] estimator__max_features=sqrt, estimator__n_estimators=50 ........


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    3.8s remaining:    0.0s


[CV]  estimator__max_features=sqrt, estimator__n_estimators=50, score=0.809001, total=   1.0s
[CV] estimator__max_features=sqrt, estimator__n_estimators=75 ........


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    4.8s remaining:    0.0s


[CV]  estimator__max_features=sqrt, estimator__n_estimators=75, score=0.817103, total=   1.5s
[CV] estimator__max_features=sqrt, estimator__n_estimators=75 ........


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    6.4s remaining:    0.0s


[CV]  estimator__max_features=sqrt, estimator__n_estimators=75, score=0.818446, total=   1.4s
[CV] estimator__max_features=sqrt, estimator__n_estimators=75 ........


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    8.0s remaining:    0.0s


[CV]  estimator__max_features=sqrt, estimator__n_estimators=75, score=0.814823, total=   1.5s
[CV] estimator__max_features=sqrt, estimator__n_estimators=100 .......


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    9.5s remaining:    0.0s


[CV]  estimator__max_features=sqrt, estimator__n_estimators=100, score=0.813521, total=   2.0s
[CV] estimator__max_features=sqrt, estimator__n_estimators=100 .......
[CV]  estimator__max_features=sqrt, estimator__n_estimators=100, score=0.820685, total=   1.9s
[CV] estimator__max_features=sqrt, estimator__n_estimators=100 .......
[CV]  estimator__max_features=sqrt, estimator__n_estimators=100, score=0.815495, total=   2.0s
[CV] estimator__max_features=auto, estimator__n_estimators=25 ........
[CV]  estimator__max_features=auto, estimator__n_estimators=25, score=0.814641, total=   0.5s
[CV] estimator__max_features=auto, estimator__n_estimators=25 ........
[CV]  estimator__max_features=auto, estimator__n_estimators=25, score=0.811954, total=   0.5s
[CV] estimator__max_features=auto, estimator__n_estimators=25 ........
[CV]  estimator__max_features=auto, estimator__n_estimators=25, score=0.807434, total=   0.5s
[CV] estimator__max_features=auto, estimator__n_estimators=50 ........
[CV]  e

[Parallel(n_jobs=1)]: Done  36 out of  36 | elapsed:   47.7s finished


GridSearchCV(cv=None, error_score='raise',
       estimator=Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=False, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('estimator', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
     ...imators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False))]),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'estimator__n_estimators': [25, 50, 75, 100], 'estimator__max_features': ['sqrt', 'auto', 'log2']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=10)

In [72]:
grid_logr_train_score_j = grid_logr.score(X_train_j, y_train_j)
grid_logr_test_score_j  = grid_logr.score(X_test_j, y_test_j)

grid_tree_train_score_j = grid_tree.score(X_train_j, y_train_j)
grid_tree_test_score_j  = grid_tree.score(X_test_j, y_test_j)

grid_knc_train_score_j  = grid_knc.score(X_train_j, y_train_j)
grid_knc_test_score_j   = grid_knc.score(X_test_j, y_test_j)

grid_svc_train_score_j  = grid_svc.score(X_train_j, y_train_j)
grid_svc_test_score_j   = grid_svc.score(X_test_j, y_test_j)

grid_rfc_train_score_j  = grid_rfc.score(X_train_j, y_train_j)
grid_rfc_test_score_j   = grid_rfc.score(X_test_j, y_test_j)

In [73]:
josh_grid_results = []

josh_grid_results.append({'model': 'LogisticRegressionGrid',
                         'dataset' : 'train',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_logr_train_score_j})

josh_grid_results.append({'model': 'LogisticRegressionGrid',
                         'dataset' : 'test',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_logr_test_score_j})

josh_grid_results.append({'model': 'DecisionTreeClassifierGrid',
                         'dataset' : 'train',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_tree_train_score_j})

josh_grid_results.append({'model': 'DecisionTreeClassifierGrid',
                         'dataset' : 'test',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_tree_test_score_j})

josh_grid_results.append({'model': 'KNeighborsClassifierGrid',
                         'dataset' : 'train',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_knc_train_score_j})

josh_grid_results.append({'model': 'KNeighborsClassifierGrid',
                         'dataset' : 'test',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_knc_test_score_j})

josh_grid_results.append({'model': 'SVCGrid',
                         'dataset' : 'train',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_svc_train_score_j})

josh_grid_results.append({'model': 'SVCGrid',
                         'dataset' : 'test',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_svc_test_score_j})

josh_grid_results.append({'model': 'RandomForestClassifierGrid',
                         'dataset' : 'train',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_rfc_train_score_j})

josh_grid_results.append({'model': 'RandomForestClassifierGrid',
                         'dataset' : 'test',
                         'preprocessing': 'scaled, PCA',
                         'score': grid_rfc_test_score_j})

In [76]:
josh_grid_results_df = pd.DataFrame(josh_grid_results)
josh_grid_results_df

Unnamed: 0,dataset,model,preprocessing,score
0,train,LogisticRegressionGrid,"scaled, PCA",0.612612
1,test,LogisticRegressionGrid,"scaled, PCA",0.609091
2,train,DecisionTreeClassifierGrid,"scaled, PCA",0.801119
3,test,DecisionTreeClassifierGrid,"scaled, PCA",0.744091
4,train,KNeighborsClassifierGrid,"scaled, PCA",0.883358
5,test,KNeighborsClassifierGrid,"scaled, PCA",0.827576
6,train,SVCGrid,"scaled, PCA",0.883582
7,test,SVCGrid,"scaled, PCA",0.833939
8,train,RandomForestClassifierGrid,"scaled, PCA",1.0
9,test,RandomForestClassifierGrid,"scaled, PCA",0.833182


small change