_Questions_
- Large and small datasets, what is better: same relative or absolute train size?
  (or choose subset of data a priori)

In [1]:
### Imports

import copy
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import itertools as it
import string
from sklearn import datasets
from sklearn import svm
from sklearn import tree
from sklearn import ensemble
from sklearn import neighbors
from sklearn import neural_network
from sklearn import model_selection
from sklearn import linear_model
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score
%config InlineBackend.figure_format = 'retina'

In [2]:
### Config

TRAIN_SIZE = 0.8

MAX_DATA_SIZE = 1000

RND_SEED = 1

CLF_DICT       = {'logreg': linear_model.LogisticRegression(),
                  'knn': neighbors.KNeighborsClassifier(),
                  'rf': ensemble.RandomForestClassifier()}

In [3]:
### Methods and classes

def size_info():
    ### Size info
    print("Data sizes:")
    for data_name, data_tuple in all_data_dict.items():
        print("\n{}:\nX: {}\ny: {}".format(data_name, data_tuple[0].shape, data_tuple[1].shape))

def shuffle(df):
    """
    Shuffles dataset using seed specified in RND_SEED (see config part above).
    
        df:  Dataset to be shuffled.
        
    Returns shuffled dataset.
    """
    
    return(df.sample(frac=1, random_state=np.random.RandomState(seed=RND_SEED)))

def init_clf(clf_name, clf_dict=CLF_DICT):
    return(copy.deepcopy(clf_dict[clf_name]))

class MagicSearcher:
    
    """
    Finds hyperparams for set of datasets and set of classifiers with specified hyperparam grids
    """
    
    data_dict       = None
    clf_param_dict  = None
    cv              = None
    n_jobs          = None
    verbose         = None
    method          = None
    
    # Randomized Search
    n_iter = None
    
    # Results
    searcher_obj_dict = None
    best_params_dict  = None
    
    def __init__(self, clf_param_dict, data_dict=None, cv=5, n_jobs=4, verbose=False, method='grid_search'):
        self.data_dict      = data_dict
        self.clf_param_dict = clf_param_dict
        self.cv             = cv
        self.n_jobs         = n_jobs
        self.verbose        = verbose
        self.method         = method
        
    def search(self, data_dict=None, n_iter=None):
        if self.method == 'randomized_search' and n_iter is None:
            raise Exception('You need to specify n_iter for randomized search')
        self.n_iter = n_iter
            
        if (self.data_dict is None) and (data_dict is None):
            raise Exception('You need to specify data!') 
        
        searcher_obj_dict = dict()
        best_params_dict = dict()
        for data_name, data_tuple in self.data_dict.items():
            X = data_tuple[0]
            y = data_tuple[1]
            
            searcher_obj_dict[data_name] = dict()
            best_params_dict[data_name] = dict()
            for clf_name, param_dict in clf_param_dict.items():
                searcher_obj = ParamSearcher(X, y, clf_name, param_dict, self.method, self.n_jobs, self.cv, self.verbose)
                searcher_obj.search()
                searcher_obj_dict[data_name][clf_name] = searcher_obj
                
                best_params_dict[data_name][clf_name] = searcher_obj.best_params_
        self.searcher_obj_dict = searcher_obj_dict
        self.best_params_dict = best_params_dict

class ParamSearcher:
    
    """
    Finds hyperparams for one dataset and one classifier
    """
    
    X = None
    y = None
    clf_name = None
    param_dict = None
    method = None
    verbose = None
    n_jobs = None
    cv = None
    
    # Randomized Search
    n_iter = None
    
    # Results
    skl_search_obj = None
    best_params_ = None
    
    def __init__(self, X, y, clf_name, param_dict, method='grid_search', n_jobs=4, cv=5, verbose=False):
        self.X = X
        self.y = y
        self.clf_name = clf_name
        self.param_dict = param_dict
        self.method = method
        self.n_jobs = n_jobs
        self.cv = cv
        self.verbose = verbose
        
    def search(self, n_iter=None):
        if self.method == 'grid_search':
            skl_search_obj = GridSearchCV(estimator  = init_clf(self.clf_name),
                                          param_grid = self.param_dict,
                                          n_jobs     = self.n_jobs,
                                          cv         = self.cv,
                                          verbose    = self.verbose)
        elif self.method == 'randomized_search':
            if n_iter is None:
                raise Exception('You need to specify n_iter for randomized search')
            self.n_iter = n_iter
            
            skl_search_obj = RandomizedSearchCV(estimator           = init_clf(self.clf_name),
                                                 param_distributions = self.param_dict,
                                                 n_iter              = n_iter,
                                                 n_jobs              = self.n_jobs,
                                                 cv                  = self.cv,
                                                 verbose             = self.verbose)
        skl_search_obj.fit(self.X, self.y)
        self.skl_search_obj = skl_search_obj
        self.best_params_ = skl_search_obj.best_params_

In [4]:
### Load data
## iris
iris_X = pd.DataFrame(datasets.load_iris()['data'])
iris_y = pd.Series(datasets.load_iris()['target'])


## wdbc
wdbc_X_and_y = pd.read_csv('data/wdbc.data', header = None).iloc[:, 1:] # drop ID, then first col = y
wdbc_y = wdbc_X_and_y.iloc[:, 0]
wdbc_X = wdbc_X_and_y.iloc[:, 1:]

wdbc_y = wdbc_y.map({'B': -1, 'M': 1}) # Transform y from (B, M) to (-1, 1)


## income
# Load, prepare, and shuffle adult income data
income_X_and_y = pd.read_csv('data/adult.data', header=None)
income_X_and_y.columns = ['age', 'workclass', 'fnlwgt', 'education', 'education-num',
                         'marital-status', 'occupation', 'relationship',
                         'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week',
                         'native-country', 'income']

# one-hot encode categorical variables
income_categorical_vars = ['workclass', 'education', 'marital-status', 'occupation',
                           'relationship', 'race', 'sex', 'native-country']
income_X_and_y_onehot = pd.DataFrame()
for var in income_categorical_vars:
    dummy_coded_var_df = pd.get_dummies(income_X_and_y[var], prefix=var)
    income_X_and_y_onehot = pd.concat([income_X_and_y_onehot, dummy_coded_var_df], axis=1)

# add remaining columns to one-hot encoded df
income_X_and_y = pd.concat([income_X_and_y_onehot,
                            income_X_and_y.loc[:, income_X_and_y.columns[
                                np.logical_not(np.in1d(income_X_and_y.columns, income_categorical_vars))]]],
                           axis=1)

income_y = income_X_and_y.loc[:, 'income']
income_X = income_X_and_y.drop('income', axis=1)

# Transform y from (<=50K, >50K) to (-1, 1)
income_y = income_y.map({' <=50K': -1, ' >50K': 1})


## Letter
letter_X_and_y = pd.read_csv('data/letter.data', header=None)
letter_X = letter_X_and_y.iloc[:, 1:]
letter_y = letter_X_and_y.iloc[:, 0]

# Transform y from A:M -> -1 and N:Z -> 1
def alph_to_cat(letter):
    if str.upper(letter) in list(string.ascii_uppercase[:13]):
        return(1)
    elif str.upper(letter) in list(string.ascii_uppercase[13:]):
        return(-1)
    
letter_y = letter_y.map(alph_to_cat)

## covtype
covtype_X_and_y = pd.read_csv('data/covtype.data')
covtype_X = covtype_X_and_y.iloc[:, :-1]
covtype_y = covtype_X_and_y.iloc[:, -1]

covtype_y = covtype_y.map({7:1}).fillna(0)



all_data_dict = {'wdbc':      (wdbc_X, wdbc_y),
                 'income':    (income_X, income_y),
                 'iris':      (iris_X, iris_y),
                 'covtype':   (covtype_X, covtype_y),
                 'letter':    (letter_X, letter_y)}

### Shuffle
for data_name, data_tuple in all_data_dict.items():
    X = data_tuple[0]
    y = data_tuple[1]
    
    X = shuffle(X)
    y = shuffle(y)
    
    all_data_dict[data_name] = (X, y)

size_info()

Data sizes:

wdbc:
X: (569, 30)
y: (569,)

income:
X: (32561, 108)
y: (32561,)

iris:
X: (150, 4)
y: (150,)

covtype:
X: (581011, 54)
y: (581011,)

letter:
X: (20000, 16)
y: (20000,)


In [5]:
### Limit dataset sizes
for data_name, data_tuple in all_data_dict.items():
    X = data_tuple[0]
    y = data_tuple[1]
    
    assert X.shape[0] == y.shape[0]
    
    if y.shape[0] > MAX_DATA_SIZE:
        X = X.sample(MAX_DATA_SIZE, random_state=RND_SEED)
        y = y.sample(MAX_DATA_SIZE, random_state=RND_SEED)

        all_data_dict[data_name] = (X, y)

size_info()

Data sizes:

wdbc:
X: (569, 30)
y: (569,)

income:
X: (1000, 108)
y: (1000,)

iris:
X: (150, 4)
y: (150,)

covtype:
X: (1000, 54)
y: (1000,)

letter:
X: (1000, 16)
y: (1000,)


In [8]:
### Go!
clf_param_dict = {'knn':    {'n_neighbors': np.arange(1, 51)},
                  'logreg': {'C': [1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4]},
                  'rf':     {'n_estimators': [1024],
                             'max_features': [1, 2, 4, 6, 8, 12, 16, 20]}}

everything = MagicSearcher(clf_param_dict, all_data_dict, cv=5, n_jobs=None, verbose=10, method='grid_search')
everything.search()

Fitting 5 folds for each of 50 candidates, totalling 250 fits
[CV] n_neighbors=1 ...................................................
[CV] .......... n_neighbors=1, score=0.9217391304347826, total=   0.0s
[CV] n_neighbors=1 ...................................................
[CV] .......... n_neighbors=1, score=0.9043478260869565, total=   0.0s
[CV] n_neighbors=1 ...................................................
[CV] .......... n_neighbors=1, score=0.9026548672566371, total=   0.0s
[CV] n_neighbors=1 ...................................................
[CV] .......... n_neighbors=1, score=0.9469026548672567, total=   0.0s
[CV] n_neighbors=1 ...................................................
[CV] .......... n_neighbors=1, score=0.9292035398230089, total=   0.0s
[CV] n_neighbors=2 ...................................................
[CV] .......... n_neighbors=2, score=0.8869565217391304, total=   0.0s
[CV] n_neighbors=2 ...................................................
[CV] ..........

[Parallel(n_jobs=None)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   6 out of   6 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   7 out of   7 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   8 out of   8 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   9 out of   9 | elapsed:    0.1s remaining:    0.0s


[CV] .......... n_neighbors=7, score=0.9130434782608695, total=   0.0s
[CV] n_neighbors=7 ...................................................
[CV] ........... n_neighbors=7, score=0.911504424778761, total=   0.0s
[CV] n_neighbors=7 ...................................................
[CV] .......... n_neighbors=7, score=0.9557522123893806, total=   0.0s
[CV] n_neighbors=7 ...................................................
[CV] .......... n_neighbors=7, score=0.9203539823008849, total=   0.0s
[CV] n_neighbors=8 ...................................................
[CV] .......... n_neighbors=8, score=0.9304347826086956, total=   0.0s
[CV] n_neighbors=8 ...................................................
[CV] .......... n_neighbors=8, score=0.9217391304347826, total=   0.0s
[CV] n_neighbors=8 ...................................................
[CV] .......... n_neighbors=8, score=0.9292035398230089, total=   0.0s
[CV] n_neighbors=8 ...................................................
[CV] .

[CV] ......... n_neighbors=22, score=0.9217391304347826, total=   0.0s
[CV] n_neighbors=22 ..................................................
[CV] ......... n_neighbors=22, score=0.9292035398230089, total=   0.0s
[CV] n_neighbors=22 ..................................................
[CV] ......... n_neighbors=22, score=0.9557522123893806, total=   0.0s
[CV] n_neighbors=22 ..................................................
[CV] .......... n_neighbors=22, score=0.911504424778761, total=   0.0s
[CV] n_neighbors=23 ..................................................
[CV] ......... n_neighbors=23, score=0.9043478260869565, total=   0.0s
[CV] n_neighbors=23 ..................................................
[CV] ......... n_neighbors=23, score=0.9217391304347826, total=   0.0s
[CV] n_neighbors=23 ..................................................
[CV] ......... n_neighbors=23, score=0.9292035398230089, total=   0.0s
[CV] n_neighbors=23 ..................................................
[CV] .

[CV] ......... n_neighbors=35, score=0.9203539823008849, total=   0.0s
[CV] n_neighbors=35 ..................................................
[CV] ......... n_neighbors=35, score=0.9469026548672567, total=   0.0s
[CV] n_neighbors=35 ..................................................
[CV] ......... n_neighbors=35, score=0.8938053097345132, total=   0.0s
[CV] n_neighbors=36 ..................................................
[CV] ......... n_neighbors=36, score=0.8956521739130435, total=   0.0s
[CV] n_neighbors=36 ..................................................
[CV] ......... n_neighbors=36, score=0.9130434782608695, total=   0.0s
[CV] n_neighbors=36 ..................................................
[CV] ......... n_neighbors=36, score=0.9203539823008849, total=   0.0s
[CV] n_neighbors=36 ..................................................
[CV] ......... n_neighbors=36, score=0.9469026548672567, total=   0.0s
[CV] n_neighbors=36 ..................................................
[CV] .

[CV] .......... n_neighbors=48, score=0.911504424778761, total=   0.0s
[CV] n_neighbors=48 ..................................................
[CV] ......... n_neighbors=48, score=0.9469026548672567, total=   0.0s
[CV] n_neighbors=48 ..................................................
[CV] ......... n_neighbors=48, score=0.8849557522123894, total=   0.0s
[CV] n_neighbors=49 ..................................................
[CV] ......... n_neighbors=49, score=0.8869565217391304, total=   0.0s
[CV] n_neighbors=49 ..................................................
[CV] ......... n_neighbors=49, score=0.9043478260869565, total=   0.0s
[CV] n_neighbors=49 ..................................................
[CV] .......... n_neighbors=49, score=0.911504424778761, total=   0.0s
[CV] n_neighbors=49 ..................................................
[CV] ......... n_neighbors=49, score=0.9469026548672567, total=   0.0s
[CV] n_neighbors=49 ..................................................
[CV] .

[Parallel(n_jobs=None)]: Done 250 out of 250 | elapsed:    2.2s finished
[Parallel(n_jobs=None)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s


[CV] ................ C=1e-05, score=0.9380530973451328, total=   0.0s
[CV] C=0.0001 ........................................................
[CV] ............... C=0.0001, score=0.9217391304347826, total=   0.0s
[CV] C=0.0001 ........................................................
[CV] ............... C=0.0001, score=0.8956521739130435, total=   0.0s
[CV] C=0.0001 ........................................................
[CV] ............... C=0.0001, score=0.9380530973451328, total=   0.0s
[CV] C=0.0001 ........................................................
[CV] ............... C=0.0001, score=0.9203539823008849, total=   0.0s
[CV] C=0.0001 ........................................................
[CV] ............... C=0.0001, score=0.9380530973451328, total=   0.0s
[CV] C=0.001 .........................................................
[CV] ................ C=0.001, score=0.9304347826086956, total=   0.0s
[CV] C=0.001 .........................................................
[CV] .

[Parallel(n_jobs=None)]: Done  65 out of  65 | elapsed:    0.5s finished


[CV]  max_features=1, n_estimators=1024, score=0.9478260869565217, total=   1.4s
[CV] max_features=1, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   1 out of   1 | elapsed:    1.5s remaining:    0.0s


[CV]  max_features=1, n_estimators=1024, score=0.9565217391304348, total=   1.2s
[CV] max_features=1, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   2 out of   2 | elapsed:    2.8s remaining:    0.0s


[CV]  max_features=1, n_estimators=1024, score=0.9646017699115044, total=   1.2s
[CV] max_features=1, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   3 out of   3 | elapsed:    4.1s remaining:    0.0s


[CV]  max_features=1, n_estimators=1024, score=0.9734513274336283, total=   1.1s
[CV] max_features=1, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   4 out of   4 | elapsed:    5.3s remaining:    0.0s


[CV]  max_features=1, n_estimators=1024, score=0.9646017699115044, total=   1.3s
[CV] max_features=2, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   5 out of   5 | elapsed:    6.7s remaining:    0.0s


[CV]  max_features=2, n_estimators=1024, score=0.9478260869565217, total=   1.3s
[CV] max_features=2, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   6 out of   6 | elapsed:    8.0s remaining:    0.0s


[CV]  max_features=2, n_estimators=1024, score=0.9565217391304348, total=   1.5s
[CV] max_features=2, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   7 out of   7 | elapsed:    9.7s remaining:    0.0s


[CV]  max_features=2, n_estimators=1024, score=0.9380530973451328, total=   1.5s
[CV] max_features=2, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   8 out of   8 | elapsed:   11.2s remaining:    0.0s


[CV]  max_features=2, n_estimators=1024, score=0.9734513274336283, total=   1.6s
[CV] max_features=2, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   9 out of   9 | elapsed:   12.9s remaining:    0.0s


[CV]  max_features=2, n_estimators=1024, score=0.9646017699115044, total=   1.3s
[CV] max_features=4, n_estimators=1024 ...............................
[CV]  max_features=4, n_estimators=1024, score=0.9478260869565217, total=   1.3s
[CV] max_features=4, n_estimators=1024 ...............................
[CV]  max_features=4, n_estimators=1024, score=0.9478260869565217, total=   1.6s
[CV] max_features=4, n_estimators=1024 ...............................
[CV]  max_features=4, n_estimators=1024, score=0.9469026548672567, total=   1.5s
[CV] max_features=4, n_estimators=1024 ...............................
[CV]  max_features=4, n_estimators=1024, score=0.9734513274336283, total=   1.6s
[CV] max_features=4, n_estimators=1024 ...............................
[CV]  max_features=4, n_estimators=1024, score=0.9734513274336283, total=   1.4s
[CV] max_features=6, n_estimators=1024 ...............................
[CV]  max_features=6, n_estimators=1024, score=0.9565217391304348, total=   1.4s
[CV] ma

[Parallel(n_jobs=None)]: Done  40 out of  40 | elapsed:  1.3min finished


Fitting 5 folds for each of 50 candidates, totalling 250 fits
[CV] n_neighbors=1 ...................................................
[CV] .......... n_neighbors=1, score=0.6716417910447762, total=   0.0s
[CV] n_neighbors=1 ...................................................
[CV] ....................... n_neighbors=1, score=0.625, total=   0.0s
[CV] n_neighbors=1 ...................................................
[CV] ........................ n_neighbors=1, score=0.67, total=   0.0s
[CV] n_neighbors=1 ...................................................
[CV] ........................ n_neighbors=1, score=0.65, total=   0.0s
[CV] n_neighbors=1 ...................................................
[CV] .......... n_neighbors=1, score=0.7035175879396985, total=   0.0s
[CV] n_neighbors=2 ...................................................
[CV] ........... n_neighbors=2, score=0.746268656716418, total=   0.0s
[CV] n_neighbors=2 ...................................................
[CV] ..........

[Parallel(n_jobs=None)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   3 out of   3 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   4 out of   4 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   5 out of   5 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   6 out of   6 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   7 out of   7 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   8 out of   8 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   9 out of   9 | elapsed:    0.2s remaining:    0.0s


[CV] .......... n_neighbors=2, score=0.7537688442211056, total=   0.0s
[CV] n_neighbors=3 ...................................................
[CV] .......... n_neighbors=3, score=0.7213930348258707, total=   0.0s
[CV] n_neighbors=3 ...................................................
[CV] ........................ n_neighbors=3, score=0.69, total=   0.0s
[CV] n_neighbors=3 ...................................................
[CV] ....................... n_neighbors=3, score=0.705, total=   0.0s
[CV] n_neighbors=3 ...................................................
[CV] ....................... n_neighbors=3, score=0.695, total=   0.0s
[CV] n_neighbors=3 ...................................................
[CV] .......... n_neighbors=3, score=0.7286432160804021, total=   0.0s
[CV] n_neighbors=4 ...................................................
[CV] .......... n_neighbors=4, score=0.7313432835820896, total=   0.0s
[CV] n_neighbors=4 ...................................................
[CV] .

[CV] ...................... n_neighbors=15, score=0.745, total=   0.0s
[CV] n_neighbors=15 ..................................................
[CV] ......... n_neighbors=15, score=0.7537688442211056, total=   0.0s
[CV] n_neighbors=16 ..................................................
[CV] .......... n_neighbors=16, score=0.746268656716418, total=   0.0s
[CV] n_neighbors=16 ..................................................
[CV] ...................... n_neighbors=16, score=0.755, total=   0.0s
[CV] n_neighbors=16 ..................................................
[CV] ...................... n_neighbors=16, score=0.745, total=   0.0s
[CV] n_neighbors=16 ..................................................
[CV] ....................... n_neighbors=16, score=0.75, total=   0.0s
[CV] n_neighbors=16 ..................................................
[CV] ......... n_neighbors=16, score=0.7537688442211056, total=   0.0s
[CV] n_neighbors=17 ..................................................
[CV] .

[CV] ...................... n_neighbors=27, score=0.755, total=   0.0s
[CV] n_neighbors=27 ..................................................
[CV] ....................... n_neighbors=27, score=0.75, total=   0.0s
[CV] n_neighbors=27 ..................................................
[CV] ....................... n_neighbors=27, score=0.75, total=   0.0s
[CV] n_neighbors=27 ..................................................
[CV] ......... n_neighbors=27, score=0.7537688442211056, total=   0.0s
[CV] n_neighbors=28 ..................................................
[CV] ......... n_neighbors=28, score=0.7512437810945274, total=   0.0s
[CV] n_neighbors=28 ..................................................
[CV] ....................... n_neighbors=28, score=0.75, total=   0.0s
[CV] n_neighbors=28 ..................................................
[CV] ....................... n_neighbors=28, score=0.75, total=   0.0s
[CV] n_neighbors=28 ..................................................
[CV] .

[CV] ....................... n_neighbors=39, score=0.75, total=   0.0s
[CV] n_neighbors=39 ..................................................
[CV] ....................... n_neighbors=39, score=0.75, total=   0.0s
[CV] n_neighbors=39 ..................................................
[CV] ....................... n_neighbors=39, score=0.75, total=   0.0s
[CV] n_neighbors=39 ..................................................
[CV] ......... n_neighbors=39, score=0.7537688442211056, total=   0.0s
[CV] n_neighbors=40 ..................................................
[CV] ......... n_neighbors=40, score=0.7512437810945274, total=   0.0s
[CV] n_neighbors=40 ..................................................
[CV] ....................... n_neighbors=40, score=0.75, total=   0.0s
[CV] n_neighbors=40 ..................................................
[CV] ....................... n_neighbors=40, score=0.75, total=   0.0s
[CV] n_neighbors=40 ..................................................
[CV] .

[Parallel(n_jobs=None)]: Done 250 out of 250 | elapsed:   14.7s finished
[Parallel(n_jobs=None)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   4 out of   4 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   5 out of   5 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   6 out of   6 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   7 out of   7 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   8 out of   8 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   9 out of   9 | elapsed:    0.1s remaining:    0.0s


[CV] .............................. C=1e-07, score=0.76, total=   0.0s
[CV] C=1e-07 .........................................................
[CV] .............................. C=1e-07, score=0.77, total=   0.0s
[CV] C=1e-07 .........................................................
[CV] ............................. C=1e-07, score=0.775, total=   0.0s
[CV] C=1e-07 .........................................................
[CV] ................ C=1e-07, score=0.7487437185929648, total=   0.0s
[CV] C=1e-06 .........................................................
[CV] ................ C=1e-06, score=0.7810945273631841, total=   0.0s
[CV] C=1e-06 .........................................................
[CV] ............................. C=1e-06, score=0.765, total=   0.0s
[CV] C=1e-06 .........................................................
[CV] .............................. C=1e-06, score=0.77, total=   0.0s
[CV] C=1e-06 .........................................................
[CV] .

[Parallel(n_jobs=None)]: Done  65 out of  65 | elapsed:    1.2s finished



[CV] max_features=1, n_estimators=1024 ...............................
[CV]  max_features=1, n_estimators=1024, score=0.835820895522388, total=   3.2s
[CV] max_features=1, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   1 out of   1 | elapsed:    3.4s remaining:    0.0s


[CV] ... max_features=1, n_estimators=1024, score=0.825, total=   1.3s
[CV] max_features=1, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   2 out of   2 | elapsed:    4.9s remaining:    0.0s


[CV] ... max_features=1, n_estimators=1024, score=0.815, total=   1.3s
[CV] max_features=1, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   3 out of   3 | elapsed:    6.4s remaining:    0.0s


[CV] ... max_features=1, n_estimators=1024, score=0.805, total=   1.3s
[CV] max_features=1, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   4 out of   4 | elapsed:    7.9s remaining:    0.0s


[CV]  max_features=1, n_estimators=1024, score=0.8291457286432161, total=   1.4s
[CV] max_features=2, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   5 out of   5 | elapsed:    9.5s remaining:    0.0s


[CV]  max_features=2, n_estimators=1024, score=0.8308457711442786, total=   1.3s
[CV] max_features=2, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   6 out of   6 | elapsed:   11.0s remaining:    0.0s


[CV] ... max_features=2, n_estimators=1024, score=0.835, total=   1.3s
[CV] max_features=2, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   7 out of   7 | elapsed:   12.5s remaining:    0.0s


[CV] ... max_features=2, n_estimators=1024, score=0.815, total=   1.3s
[CV] max_features=2, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   8 out of   8 | elapsed:   13.9s remaining:    0.0s


[CV] .... max_features=2, n_estimators=1024, score=0.82, total=   1.3s
[CV] max_features=2, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   9 out of   9 | elapsed:   15.4s remaining:    0.0s


[CV]  max_features=2, n_estimators=1024, score=0.8391959798994975, total=   1.3s
[CV] max_features=4, n_estimators=1024 ...............................
[CV]  max_features=4, n_estimators=1024, score=0.8308457711442786, total=   1.4s
[CV] max_features=4, n_estimators=1024 ...............................
[CV] .... max_features=4, n_estimators=1024, score=0.83, total=   1.3s
[CV] max_features=4, n_estimators=1024 ...............................
[CV] ... max_features=4, n_estimators=1024, score=0.835, total=   1.3s
[CV] max_features=4, n_estimators=1024 ...............................
[CV] .... max_features=4, n_estimators=1024, score=0.83, total=   1.3s
[CV] max_features=4, n_estimators=1024 ...............................
[CV]  max_features=4, n_estimators=1024, score=0.8090452261306532, total=   1.3s
[CV] max_features=6, n_estimators=1024 ...............................
[CV]  max_features=6, n_estimators=1024, score=0.835820895522388, total=   1.4s
[CV] max_features=6, n_estimators=1024

[Parallel(n_jobs=None)]: Done  40 out of  40 | elapsed:  1.2min finished


Fitting 5 folds for each of 50 candidates, totalling 250 fits
[CV] n_neighbors=1 ...................................................
[CV] ......................... n_neighbors=1, score=1.0, total=   0.0s
[CV] n_neighbors=1 ...................................................
[CV] .......... n_neighbors=1, score=0.9666666666666667, total=   0.0s
[CV] n_neighbors=1 ...................................................
[CV] .......... n_neighbors=1, score=0.9333333333333333, total=   0.0s
[CV] n_neighbors=1 ...................................................
[CV] .......... n_neighbors=1, score=0.9666666666666667, total=   0.0s
[CV] n_neighbors=1 ...................................................
[CV] ......................... n_neighbors=1, score=0.9, total=   0.0s
[CV] n_neighbors=2 ...................................................
[CV] ......................... n_neighbors=2, score=1.0, total=   0.0s
[CV] n_neighbors=2 ...................................................
[CV] ..........

[Parallel(n_jobs=None)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s


[CV] ......... n_neighbors=14, score=0.9333333333333333, total=   0.0s
[CV] n_neighbors=14 ..................................................
[CV] ........................ n_neighbors=14, score=1.0, total=   0.0s
[CV] n_neighbors=14 ..................................................
[CV] ......... n_neighbors=14, score=0.9666666666666667, total=   0.0s
[CV] n_neighbors=15 ..................................................
[CV] ........................ n_neighbors=15, score=1.0, total=   0.0s
[CV] n_neighbors=15 ..................................................
[CV] ......... n_neighbors=15, score=0.9666666666666667, total=   0.0s
[CV] n_neighbors=15 ..................................................
[CV] ......... n_neighbors=15, score=0.9333333333333333, total=   0.0s
[CV] n_neighbors=15 ..................................................
[CV] ........................ n_neighbors=15, score=1.0, total=   0.0s
[CV] n_neighbors=15 ..................................................
[CV] .

[CV] ........................ n_neighbors=27, score=1.0, total=   0.0s
[CV] n_neighbors=27 ..................................................
[CV] ........................ n_neighbors=27, score=0.9, total=   0.0s
[CV] n_neighbors=27 ..................................................
[CV] ......... n_neighbors=27, score=0.8666666666666667, total=   0.0s
[CV] n_neighbors=27 ..................................................
[CV] ......... n_neighbors=27, score=0.9666666666666667, total=   0.0s
[CV] n_neighbors=27 ..................................................
[CV] ......... n_neighbors=27, score=0.9333333333333333, total=   0.0s
[CV] n_neighbors=28 ..................................................
[CV] ........................ n_neighbors=28, score=1.0, total=   0.0s
[CV] n_neighbors=28 ..................................................
[CV] ........................ n_neighbors=28, score=0.9, total=   0.0s
[CV] n_neighbors=28 ..................................................
[CV] .

[CV] n_neighbors=40 ..................................................
[CV] ........................ n_neighbors=40, score=0.9, total=   0.0s
[CV] n_neighbors=40 ..................................................
[CV] ......... n_neighbors=40, score=0.8666666666666667, total=   0.0s
[CV] n_neighbors=40 ..................................................
[CV] ......... n_neighbors=40, score=0.9333333333333333, total=   0.0s
[CV] n_neighbors=40 ..................................................
[CV] ........................ n_neighbors=40, score=0.9, total=   0.0s
[CV] n_neighbors=41 ..................................................
[CV] ........................ n_neighbors=41, score=1.0, total=   0.0s
[CV] n_neighbors=41 ..................................................
[CV] ........................ n_neighbors=41, score=0.9, total=   0.0s
[CV] n_neighbors=41 ..................................................
[CV] ........................ n_neighbors=41, score=0.9, total=   0.0s
[CV] n

[Parallel(n_jobs=None)]: Done 250 out of 250 | elapsed:    1.1s finished
[Parallel(n_jobs=None)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=None)]: Done  65 out of  65 | elapsed:    0.2s finished


Fitting 5 folds for each of 8 candidates, totalling 40 fits
[CV] max_features=1, n_estimators=1024 ...............................
[CV] ..... max_features=1, n_estimators=1024, score=1.0, total=   1.2s
[CV] max_features=1, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   1 out of   1 | elapsed:    1.3s remaining:    0.0s


[CV]  max_features=1, n_estimators=1024, score=0.9333333333333333, total=   1.1s
[CV] max_features=1, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   2 out of   2 | elapsed:    2.4s remaining:    0.0s


[CV]  max_features=1, n_estimators=1024, score=0.9333333333333333, total=   1.0s
[CV] max_features=1, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   3 out of   3 | elapsed:    3.5s remaining:    0.0s


[CV]  max_features=1, n_estimators=1024, score=0.9666666666666667, total=   1.1s
[CV] max_features=1, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   4 out of   4 | elapsed:    4.7s remaining:    0.0s


[CV]  max_features=1, n_estimators=1024, score=0.8666666666666667, total=   1.4s
[CV] max_features=2, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   5 out of   5 | elapsed:    6.1s remaining:    0.0s


[CV] ..... max_features=2, n_estimators=1024, score=1.0, total=   1.1s
[CV] max_features=2, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   6 out of   6 | elapsed:    7.3s remaining:    0.0s


[CV]  max_features=2, n_estimators=1024, score=0.9333333333333333, total=   1.1s
[CV] max_features=2, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   7 out of   7 | elapsed:    8.5s remaining:    0.0s


[CV]  max_features=2, n_estimators=1024, score=0.9333333333333333, total=   1.1s
[CV] max_features=2, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   8 out of   8 | elapsed:    9.6s remaining:    0.0s


[CV]  max_features=2, n_estimators=1024, score=0.9666666666666667, total=   1.0s
[CV] max_features=2, n_estimators=1024 ...............................


[Parallel(n_jobs=None)]: Done   9 out of   9 | elapsed:   10.7s remaining:    0.0s


[CV]  max_features=2, n_estimators=1024, score=0.8666666666666667, total=   0.9s
[CV] max_features=4, n_estimators=1024 ...............................
[CV] ..... max_features=4, n_estimators=1024, score=1.0, total=   1.0s
[CV] max_features=4, n_estimators=1024 ...............................
[CV]  max_features=4, n_estimators=1024, score=0.9333333333333333, total=   1.3s
[CV] max_features=4, n_estimators=1024 ...............................
[CV]  max_features=4, n_estimators=1024, score=0.9666666666666667, total=   1.1s
[CV] max_features=4, n_estimators=1024 ...............................
[CV]  max_features=4, n_estimators=1024, score=0.9666666666666667, total=   1.0s
[CV] max_features=4, n_estimators=1024 ...............................
[CV]  max_features=4, n_estimators=1024, score=0.8666666666666667, total=   1.0s
[CV] max_features=6, n_estimators=1024 ...............................


ValueError: max_features must be in (0, n_features]