In [46]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import KFold, StratifiedKFold
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier

from itertools import combinations
from sklearn.metrics import f1_score
from sklearn.neighbors import KNeighborsClassifier


In [52]:
def process_data(data):
    '''
    remove redundant columns
    '''
    #rems = ['Id', 'Soil_Type7', 'Soil_Type8', 'Soil_Type15', 'Soil_Type25']
    rems = ['Id', 'Soil_Type7', 'Soil_Type15']
    #rems = ['Id']
#     #Add constant columns as they don't help in prediction process
#     for c in data.columns:
#         if data[c].std() == 0: #standard deviation is zero
#             rem.append(c)

    #drop the columns
    for rem in rems:
        data.drop(rem,axis=1,inplace=True)
    

    return data

In [3]:
def score(y, y_pred):

    y_true = np.array(y, dtype=int)
    y_predict = np.array(y_pred, dtype=int)
    
    from sklearn.metrics import f1_score

    return f1_score(y_true, y_predict, average='micro')

In [59]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Normalizer

def normalize_train_data(dataset):
    r, c = dataset.shape
    array = dataset.values
    X_all = array[:,0:(c-1)]
    y_all = array[:,(c-1)]
    size = 10
    X_num = X_all[:,0:size]
    X_cat = X_all[:,size:]

    X_num = StandardScaler().fit_transform(X_num)
    #X_num = MinMaxScaler().fit_transform(X_num)
    X_num = Normalizer().fit_transform(X_num)

    X_all_scaled = np.concatenate((X_num, X_cat), axis=1)
    
    return X_all_scaled, y_all

def normalize_test_data(dataset):
    r, c = dataset.shape
    X_all = dataset.values
    y_all = []
    size = 10
    X_num = X_all[:,0:size]
    X_cat = X_all[:,size:]

    X_num = StandardScaler().fit_transform(X_num)
    #X_num = MinMaxScaler().fit_transform(X_num)
    X_num = Normalizer().fit_transform(X_num)

    X_all_scaled = np.concatenate((X_num, X_cat), axis=1)
    
    return X_all_scaled, y_all

def train_extract(train, test):
    X_train, y_train = normalize_train_data(train)
    X_test, y_test = normalize_train_data(test)
    
    return X_train, y_train, X_test, y_test



In [60]:
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV

def tuner(model, param_grid, dataset):
    X_train, y_train, _, _ = train_extract(dataset, dataset)
    tuning_scorer = make_scorer(score, greater_is_better = True)
    
    tuner_model = GridSearchCV(estimator=model, 
                                param_grid=param_grid, 
                                scoring=tuning_scorer,
                                verbose=10, 
                                n_jobs=-1, 
                                iid=True, 
                                refit=True,
                                cv=5)

    tuner_model.fit(X_train, y_train)
    print("Best score: %0.3f" % tuner_model.best_score_)
    print("Best parameters set:")
    best_parameters = tuner_model.best_estimator_.get_params()
    for param_name in sorted(param_grid.keys()):
        print("\t%s: %r" % (param_name, best_parameters[param_name]))


In [61]:
train_raw = pd.read_csv('data/train.csv')
msk = np.random.rand(len(train_raw)) < 0.9
train = train_raw[msk]
validation = train_raw[~msk]

train_clean = process_data(train)
validation_clean = process_data(validation)

X_train, y_train, X_test, y_test = train_extract(train_clean, validation_clean)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [8]:
model = KNeighborsClassifier(n_jobs=-1)
param_grid = {
        #'n_estimators': np.arange(89, 100, 2),
        'algorithm':["auto", "ball_tree", "kd_tree", "brute"],
        'n_neighbors':[1,2,3]
    }

tuner(model, param_grid, train_clean)



Fitting 5 folds for each of 12 candidates, totalling 60 fits




[CV] algorithm=auto, n_neighbors=1 ...................................
[CV] algorithm=auto, n_neighbors=1 ...................................
[CV] algorithm=auto, n_neighbors=1 ...................................
[CV] algorithm=auto, n_neighbors=1 ...................................
[CV] .......... algorithm=auto, n_neighbors=1, score=0.691517 -   1.2s
[CV] algorithm=auto, n_neighbors=1 ...................................
[CV] .......... algorithm=auto, n_neighbors=1, score=0.698238 -   1.1s
[CV] algorithm=auto, n_neighbors=2 ...................................
[CV] .......... algorithm=auto, n_neighbors=1, score=0.715963 -   1.1s
[CV] algorithm=auto, n_neighbors=2 ...................................
[CV] .......... algorithm=auto, n_neighbors=1, score=0.733284 -   1.2s
[CV] algorithm=auto, n_neighbors=2 ...................................
[CV] .......... algorithm=auto, n_neighbors=1, score=0.798088 -   0.7s
[CV] algorithm=auto, n_neighbors=2 ...................................


[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   10.6s


[CV] .......... algorithm=auto, n_neighbors=2, score=0.654185 -   1.1s
[CV] algorithm=auto, n_neighbors=2 ...................................
[CV] .......... algorithm=auto, n_neighbors=2, score=0.663239 -   1.3s
[CV] algorithm=auto, n_neighbors=3 ...................................
[CV] .......... algorithm=auto, n_neighbors=2, score=0.672294 -   0.9s
[CV] algorithm=auto, n_neighbors=3 ...................................
[CV] .......... algorithm=auto, n_neighbors=2, score=0.692873 -   1.1s
[CV] algorithm=auto, n_neighbors=3 ...................................
[CV] .......... algorithm=auto, n_neighbors=2, score=0.774549 -   0.9s
[CV] algorithm=auto, n_neighbors=3 ...................................


[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   18.0s


[CV] .......... algorithm=auto, n_neighbors=3, score=0.662996 -   1.3s
[CV] algorithm=auto, n_neighbors=3 ...................................
[CV] .......... algorithm=auto, n_neighbors=3, score=0.669358 -   0.9s
[CV] algorithm=ball_tree, n_neighbors=1 ..............................
[CV] .......... algorithm=auto, n_neighbors=3, score=0.657363 -   1.0s
[CV] algorithm=ball_tree, n_neighbors=1 ..............................
[CV] .......... algorithm=auto, n_neighbors=3, score=0.695077 -   1.0s
[CV] algorithm=ball_tree, n_neighbors=1 ..............................
[CV] .......... algorithm=auto, n_neighbors=3, score=0.776388 -   0.9s
[CV] algorithm=ball_tree, n_neighbors=1 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=1, score=0.715963 -   2.0s
[CV] algorithm=ball_tree, n_neighbors=1 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=1, score=0.698238 -   1.9s
[CV] algorithm=ball_tree, n_neighbors=2 ..............................


[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:   32.2s


[CV] ..... algorithm=ball_tree, n_neighbors=1, score=0.733284 -   1.5s
[CV] algorithm=ball_tree, n_neighbors=2 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=1, score=0.691517 -   1.5s
[CV] algorithm=ball_tree, n_neighbors=2 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=1, score=0.798088 -   1.5s
[CV] algorithm=ball_tree, n_neighbors=2 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=2, score=0.672294 -   1.7s
[CV] algorithm=ball_tree, n_neighbors=2 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=2, score=0.663239 -   1.6s
[CV] ..... algorithm=ball_tree, n_neighbors=2, score=0.654185 -   2.1s
[CV] algorithm=ball_tree, n_neighbors=3 ..............................
[CV] algorithm=ball_tree, n_neighbors=3 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=2, score=0.692873 -   1.7s
[CV] algorithm=ball_tree, n_neighbors=3 ..............................


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   43.4s


[CV] ..... algorithm=ball_tree, n_neighbors=2, score=0.774549 -   1.2s
[CV] algorithm=ball_tree, n_neighbors=3 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=3, score=0.662996 -   2.0s
[CV] algorithm=ball_tree, n_neighbors=3 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=3, score=0.669358 -   1.6s
[CV] algorithm=kd_tree, n_neighbors=1 ................................
[CV] ..... algorithm=ball_tree, n_neighbors=3, score=0.657363 -   1.8s
[CV] algorithm=kd_tree, n_neighbors=1 ................................
[CV] ..... algorithm=ball_tree, n_neighbors=3, score=0.695077 -   1.5s
[CV] ....... algorithm=kd_tree, n_neighbors=1, score=0.698238 -   1.1s
[CV] algorithm=kd_tree, n_neighbors=1 ................................
[CV] ....... algorithm=kd_tree, n_neighbors=1, score=0.715963 -   0.8s
[CV] algorithm=kd_tree, n_neighbors=1 ................................
[CV] algorithm=kd_tree, n_neighbors=1 ................................
[CV] .

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  1.0min


[CV] ....... algorithm=kd_tree, n_neighbors=1, score=0.691517 -   0.9s
[CV] algorithm=kd_tree, n_neighbors=2 ................................
[CV] ....... algorithm=kd_tree, n_neighbors=1, score=0.733284 -   1.0s
[CV] algorithm=kd_tree, n_neighbors=2 ................................
[CV] ....... algorithm=kd_tree, n_neighbors=2, score=0.672294 -   1.1s
[CV] algorithm=kd_tree, n_neighbors=2 ................................
[CV] ....... algorithm=kd_tree, n_neighbors=2, score=0.654185 -   1.0s
[CV] algorithm=kd_tree, n_neighbors=3 ................................
[CV] ....... algorithm=kd_tree, n_neighbors=2, score=0.663239 -   1.1s
[CV] algorithm=kd_tree, n_neighbors=3 ................................
[CV] ....... algorithm=kd_tree, n_neighbors=2, score=0.692873 -   1.2s
[CV] algorithm=kd_tree, n_neighbors=3 ................................
[CV] ....... algorithm=kd_tree, n_neighbors=2, score=0.774549 -   1.0s
[CV] algorithm=kd_tree, n_neighbors=3 ................................
[CV] .

  **self._backend_args)
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:  1.2min


[CV] ....... algorithm=kd_tree, n_neighbors=3, score=0.657363 -   1.0s
[CV] algorithm=brute, n_neighbors=1 ..................................


  **self._backend_args)


[CV] ....... algorithm=kd_tree, n_neighbors=3, score=0.695077 -   1.3s
[CV] algorithm=brute, n_neighbors=1 ..................................


  **self._backend_args)


[CV] ....... algorithm=kd_tree, n_neighbors=3, score=0.776388 -   1.1s
[CV] algorithm=brute, n_neighbors=1 ..................................


  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=1, score=0.733284 -   7.9s
[CV] algorithm=brute, n_neighbors=1 ..................................
[CV] ......... algorithm=brute, n_neighbors=1, score=0.691517 -   4.2s
[CV] algorithm=brute, n_neighbors=2 ..................................


  **self._backend_args)
  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=1, score=0.715963 -   4.5s
[CV] algorithm=brute, n_neighbors=2 ..................................


  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=1, score=0.698238 -   4.1s
[CV] algorithm=brute, n_neighbors=2 ..................................


  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=2, score=0.672294 -   8.0s
[CV] algorithm=brute, n_neighbors=2 ..................................


  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=1, score=0.798088 -   8.0s
[CV] algorithm=brute, n_neighbors=2 ..................................


  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=2, score=0.654185 -   9.0s
[CV] algorithm=brute, n_neighbors=3 ..................................


  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=2, score=0.663239 -   5.1s
[CV] algorithm=brute, n_neighbors=3 ..................................


[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed: 16.0min
  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=2, score=0.692873 -  11.3s
[CV] algorithm=brute, n_neighbors=3 ..................................


  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=2, score=0.774549 -   8.7s
[CV] algorithm=brute, n_neighbors=3 ..................................


  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=3, score=0.669358 - 1.3min
[CV] ......... algorithm=brute, n_neighbors=3, score=0.662996 - 1.2min
[CV] algorithm=brute, n_neighbors=3 ..................................


  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=3, score=0.657363 -   9.8s
[CV] ......... algorithm=brute, n_neighbors=3, score=0.695077 -   9.1s
[CV] ......... algorithm=brute, n_neighbors=3, score=0.776388 -   9.7s


[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 25.4min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 25.4min finished


Best score: 0.727
Best parameters set:
	algorithm: 'auto'
	n_neighbors: 1


In [62]:
model = KNeighborsClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)

0.783518639634


In [63]:
model = KNeighborsClassifier(n_jobs=-1, algorithm='auto', n_neighbors=1)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)

0.817527795945


In [29]:
seed = 19
model = DecisionTreeClassifier(random_state=seed)
param_grid = {
    'max_depth': np.arange(10, 30, 2),
    }
tuner(model, param_grid, train_clean)

Fitting 5 folds for each of 10 candidates, totalling 50 fits




[CV] max_depth=10 ....................................................
[CV] max_depth=10 ....................................................
[CV] max_depth=10 ....................................................
[CV] max_depth=10 ....................................................
[CV] ........................... max_depth=10, score=0.642489 -   0.0s
[CV] ........................... max_depth=10, score=0.698489 -   0.0s
[CV] ........................... max_depth=10, score=0.622010 -   0.0s
[CV] max_depth=10 ....................................................
[CV] ........................... max_depth=10, score=0.658195 -   0.0s
[CV] max_depth=12 ....................................................
[CV] max_depth=12 ....................................................
[CV] max_depth=12 ....................................................
[CV] ........................... max_depth=10, score=0.713021 -   0.0s
[CV] ........................... max_depth=12, score=0.660287 -   0.0s
[CV] .

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.9s


[CV] ........................... max_depth=12, score=0.703649 -   0.0s
[CV] ........................... max_depth=12, score=0.749170 -   0.0s
[CV] max_depth=14 ....................................................
[CV] ........................... max_depth=14, score=0.674273 -   0.0s
[CV] max_depth=14 ....................................................
[CV] max_depth=14 ....................................................
[CV] ........................... max_depth=14, score=0.671208 -   0.0s
[CV] max_depth=16 ....................................................


[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    1.3s


[CV] ........................... max_depth=14, score=0.661142 -   0.0s
[CV] max_depth=16 ....................................................
[CV] ........................... max_depth=14, score=0.706598 -   0.0s
[CV] max_depth=16 ....................................................
[CV] ........................... max_depth=14, score=0.756916 -   0.0s
[CV] max_depth=16 ....................................................
[CV] ........................... max_depth=16, score=0.680162 -   0.0s
[CV] max_depth=16 ....................................................
[CV] ........................... max_depth=16, score=0.671208 -   0.0s
[CV] max_depth=18 ....................................................
[CV] ........................... max_depth=16, score=0.700332 -   0.0s
[CV] max_depth=18 ....................................................
[CV] ........................... max_depth=16, score=0.663352 -   0.0s
[CV] max_depth=18 ....................................................


[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    2.1s


[CV] ........................... max_depth=16, score=0.763187 -   0.0s
[CV] max_depth=18 ....................................................
[CV] ........................... max_depth=18, score=0.666789 -   0.0s
[CV] ........................... max_depth=18, score=0.676481 -   0.0s
[CV] max_depth=18 ....................................................
[CV] max_depth=20 ....................................................
[CV] ........................... max_depth=18, score=0.653775 -   0.0s
[CV] max_depth=20 ....................................................
[CV] ........................... max_depth=18, score=0.696646 -   0.0s
[CV] max_depth=20 ....................................................


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    2.7s


[CV] ........................... max_depth=20, score=0.676849 -   0.0s
[CV] ........................... max_depth=18, score=0.761343 -   0.0s
[CV] max_depth=20 ....................................................
[CV] max_depth=20 ....................................................
[CV] ........................... max_depth=20, score=0.670103 -   0.0s
[CV] max_depth=22 ....................................................
[CV] ........................... max_depth=20, score=0.660405 -   0.0s
[CV] max_depth=22 ....................................................
[CV] ........................... max_depth=20, score=0.695171 -   0.0s
[CV] ........................... max_depth=20, score=0.762449 -   0.0s
[CV] max_depth=22 ....................................................
[CV] max_depth=22 ....................................................
[CV] ........................... max_depth=22, score=0.680530 -   0.0s
[CV] max_depth=22 ....................................................
[CV] .

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    3.7s


[CV] ........................... max_depth=24, score=0.690835 -   0.0s
[CV] max_depth=24 ....................................................
[CV] ........................... max_depth=24, score=0.663844 -   0.0s
[CV] max_depth=26 ....................................................
[CV] ........................... max_depth=24, score=0.698489 -   0.0s
[CV] ........................... max_depth=24, score=0.654512 -   0.0s
[CV] max_depth=26 ....................................................
[CV] max_depth=26 ....................................................
[CV] ........................... max_depth=24, score=0.763925 -   0.0s
[CV] max_depth=26 ....................................................
[CV] ........................... max_depth=26, score=0.691572 -   0.0s
[CV] max_depth=26 ....................................................
[CV] ........................... max_depth=26, score=0.657459 -   0.0s
[CV] max_depth=28 ....................................................
[CV] .

[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    4.7s


[CV] ........................... max_depth=26, score=0.696277 -   0.0s
[CV] max_depth=28 ....................................................
[CV] ........................... max_depth=26, score=0.769089 -   0.0s
[CV] max_depth=28 ....................................................
[CV] ........................... max_depth=28, score=0.686787 -   0.0s
[CV] max_depth=28 ....................................................
[CV] ........................... max_depth=28, score=0.668630 -   0.0s
[CV] ........................... max_depth=28, score=0.657459 -   0.0s
[CV] ........................... max_depth=28, score=0.694434 -   0.0s
[CV] ........................... max_depth=28, score=0.769458 -   0.0s


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    5.4s finished


Best score: 0.697
Best parameters set:
	max_depth: 26


In [64]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)

0.752125572269


In [66]:
seed = 19
model = DecisionTreeClassifier(random_state=seed, max_depth=14)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)

0.75408763898


In [36]:
from sklearn.ensemble import BaggingClassifier

base_estimator = DecisionTreeClassifier(random_state=seed,max_depth=14)
model = BaggingClassifier(n_jobs=-1,base_estimator=base_estimator, random_state=seed)
param_grid = {
    'n_estimators': np.arange(80, 100, 2),
    }
tuner(model, param_grid, train_clean)


Fitting 5 folds for each of 10 candidates, totalling 50 fits




[CV] n_estimators=80 .................................................
[CV] n_estimators=80 .................................................
[CV] n_estimators=80 .................................................


  **self._backend_args)


[CV] n_estimators=80 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=80, score=0.708150 -   0.2s
[CV] n_estimators=80 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=80, score=0.750918 -   0.2s
[CV] n_estimators=82 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=80, score=0.723670 -   0.3s
[CV] n_estimators=82 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=80, score=0.697025 -   0.2s
[CV] n_estimators=82 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=80, score=0.795881 -   0.2s
[CV] n_estimators=82 .................................................
[CV] ........................ n_estimators=82, score=0.709985 -   0.2s


  **self._backend_args)


[CV] n_estimators=82 .................................................


  **self._backend_args)
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   28.6s


[CV] ........................ n_estimators=82, score=0.725872 -   0.2s
[CV] n_estimators=84 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=82, score=0.697760 -   0.2s
[CV] n_estimators=84 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=82, score=0.749816 -   0.2s
[CV] n_estimators=84 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=82, score=0.796249 -   0.2s
[CV] n_estimators=84 .................................................


  **self._backend_args)
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   44.6s


[CV] ........................ n_estimators=84, score=0.724771 -   0.3s
[CV] n_estimators=84 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=84, score=0.711087 -   0.2s
[CV] n_estimators=86 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=84, score=0.697025 -   0.2s
[CV] n_estimators=86 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=84, score=0.751286 -   0.2s
[CV] n_estimators=86 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=84, score=0.797720 -   0.2s
[CV] n_estimators=86 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=86, score=0.723303 -   0.2s
[CV] n_estimators=86 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=86, score=0.711454 -   0.2s
[CV] n_estimators=88 .................................................


  **self._backend_args)
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  1.2min


[CV] ........................ n_estimators=86, score=0.752388 -   0.2s
[CV] n_estimators=88 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=86, score=0.697025 -   0.2s
[CV] n_estimators=88 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=86, score=0.797352 -   0.3s
[CV] n_estimators=88 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=88, score=0.722936 -   0.2s
[CV] n_estimators=88 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=88, score=0.707783 -   0.2s
[CV] n_estimators=90 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=88, score=0.697393 -   0.2s
[CV] n_estimators=90 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=88, score=0.751286 -   0.2s
[CV] n_estimators=90 .................................................


  **self._backend_args)
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:  1.5min
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=88, score=0.796984 -   0.2s
[CV] n_estimators=90 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=90, score=0.725505 -   0.2s
[CV] n_estimators=90 .................................................


  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=90, score=0.709985 -   0.2s
[CV] n_estimators=92 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=90, score=0.699229 -   0.2s
[CV] n_estimators=92 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=90, score=0.752021 -   0.2s
[CV] n_estimators=92 .................................................


  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=90, score=0.799191 -   0.2s
[CV] n_estimators=92 .................................................


  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=92, score=0.725505 -   0.2s
[CV] n_estimators=92 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=92, score=0.710352 -   0.2s
[CV] n_estimators=94 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=92, score=0.697760 -   0.2s
[CV] n_estimators=94 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=92, score=0.752388 -   0.2s
[CV] n_estimators=94 .................................................


  **self._backend_args)
  **self._backend_args)
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  2.2min
  **self._backend_args)


[CV] ........................ n_estimators=92, score=0.799191 -   0.2s
[CV] n_estimators=94 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=94, score=0.725872 -   0.2s
[CV] n_estimators=94 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=94, score=0.707783 -   0.2s
[CV] n_estimators=96 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=94, score=0.699229 -   0.2s
[CV] n_estimators=96 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=94, score=0.752021 -   0.2s
[CV] n_estimators=96 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=94, score=0.798823 -   0.2s
[CV] n_estimators=96 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=96, score=0.725872 -   0.3s
[CV] n_estimators=96 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=96, score=0.708150 -   0.2s
[CV] n_estimators=98 .................................................


  **self._backend_args)
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:  2.8min
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=96, score=0.752388 -   0.2s
[CV] n_estimators=98 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=96, score=0.700331 -   0.2s
[CV] n_estimators=98 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=96, score=0.797720 -   0.3s
[CV] n_estimators=98 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=98, score=0.727339 -   0.2s
[CV] n_estimators=98 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=98, score=0.710352 -   0.2s
[CV] ........................ n_estimators=98, score=0.699596 -   0.3s


  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=98, score=0.751286 -   0.2s
[CV] ........................ n_estimators=98, score=0.798823 -   0.2s


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:  3.3min finished


Best score: 0.737
Best parameters set:
	n_estimators: 98


In [39]:
base_estimator = DecisionTreeClassifier(random_state=seed,max_depth=11)
model = BaggingClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)

0.786175710594


In [41]:
base_estimator = DecisionTreeClassifier(random_state=seed,max_depth=11)
model = BaggingClassifier(n_jobs=-1,base_estimator=base_estimator, n_estimators=300, random_state=seed)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)

0.821499668215


In [16]:
from sklearn.svm import SVC
model = SVC(random_state=seed)
param_grid = {
    #'kernel': [‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’ ],
    'C': np.arange(1, 11, 1),
    }
tuner(model, param_grid, train_clean)

Fitting 5 folds for each of 10 candidates, totalling 50 fits




[CV] C=1 .............................................................
[CV] C=1 .............................................................
[CV] C=1 .............................................................
[CV] C=1 .............................................................
[CV] .................................... C=1, score=0.492477 -   3.1s
[CV] C=1 .............................................................
[CV] .................................... C=1, score=0.598972 -   3.2s
[CV] C=2 .............................................................
[CV] .................................... C=1, score=0.548458 -   3.2s
[CV] C=2 .............................................................
[CV] .................................... C=1, score=0.603600 -   3.2s
[CV] C=2 .............................................................
[CV] .................................... C=2, score=0.497615 -   2.8s
[CV] C=2 .............................................................


[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   57.5s


[CV] .................................... C=2, score=0.549559 -   2.8s
[CV] .................................... C=2, score=0.597136 -   2.9s
[CV] C=2 .............................................................
[CV] C=3 .............................................................
[CV] .................................... C=1, score=0.621184 -   3.0s
[CV] C=3 .............................................................
[CV] .................................... C=3, score=0.502752 -   2.7s
[CV] C=3 .............................................................
[CV] .................................... C=2, score=0.602131 -   2.9s
[CV] C=3 .............................................................


[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  1.4min


[CV] .................................... C=2, score=0.630379 -   2.8s
[CV] C=3 .............................................................
[CV] .................................... C=3, score=0.548458 -   2.7s
[CV] C=4 .............................................................
[CV] .................................... C=3, score=0.596034 -   2.8s
[CV] C=4 .............................................................
[CV] .................................... C=3, score=0.610580 -   2.8s
[CV] C=4 .............................................................
[CV] .................................... C=4, score=0.506422 -   2.7s
[CV] C=4 .............................................................
[CV] .................................... C=3, score=0.637367 -   2.8s
[CV] C=4 .............................................................
[CV] .................................... C=4, score=0.548458 -   2.7s
[CV] C=5 .............................................................


[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  2.2min


[CV] .................................... C=4, score=0.600073 -   2.8s
[CV] C=5 .............................................................
[CV] .................................... C=4, score=0.614622 -   2.8s
[CV] C=5 .............................................................
[CV] .................................... C=4, score=0.643987 -   2.8s
[CV] C=5 .............................................................
[CV] .................................... C=5, score=0.520000 -   2.7s
[CV] C=5 .............................................................
[CV] .................................... C=5, score=0.548091 -   2.7s
[CV] C=6 .............................................................
[CV] .................................... C=5, score=0.607051 -   2.8s
[CV] C=6 .............................................................
[CV] .................................... C=5, score=0.617928 -   2.8s
[CV] C=6 .............................................................


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:  2.6min


[CV] .................................... C=5, score=0.649136 -   5.9s
[CV] C=6 .............................................................
[CV] .................................... C=6, score=0.525138 -   5.6s
[CV] C=6 .............................................................
[CV] .................................... C=6, score=0.546623 -   5.5s
[CV] C=7 .............................................................
[CV] .................................... C=6, score=0.615498 -   5.9s
[CV] C=7 .............................................................
[CV] .................................... C=6, score=0.622704 -   7.1s
[CV] C=7 .............................................................
[CV] .................................... C=6, score=0.652078 -   5.8s
[CV] C=7 .............................................................
[CV] .................................... C=7, score=0.530642 -   4.5s
[CV] C=7 .............................................................
[CV] .

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  4.5min


[CV] .................................... C=7, score=0.624173 -   3.0s
[CV] C=8 .............................................................
[CV] .................................... C=7, score=0.657227 -   2.9s
[CV] C=8 .............................................................
[CV] .................................... C=8, score=0.533945 -   2.7s
[CV] C=8 .............................................................
[CV] .................................... C=8, score=0.548825 -   3.0s
[CV] C=9 .............................................................
[CV] .................................... C=8, score=0.626148 -   3.5s
[CV] C=9 .............................................................
[CV] .................................... C=8, score=0.627112 -   3.4s
[CV] C=9 .............................................................
[CV] .................................... C=8, score=0.661273 -   3.4s
[CV] C=9 .............................................................
[CV] .

[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:  5.4min


[CV] .................................... C=9, score=0.630555 -   2.9s
[CV] C=10 ............................................................
[CV] .................................... C=9, score=0.628215 -   2.9s
[CV] C=10 ............................................................
[CV] .................................... C=9, score=0.664215 -   2.7s
[CV] C=10 ............................................................
[CV] ................................... C=10, score=0.540917 -   2.6s
[CV] C=10 ............................................................
[CV] ................................... C=10, score=0.552496 -   2.6s
[CV] ................................... C=10, score=0.633492 -   2.7s
[CV] ................................... C=10, score=0.626745 -   2.0s
[CV] ................................... C=10, score=0.669732 -   1.9s


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:  6.0min finished


Best score: 0.605
Best parameters set:
	C: 10


In [41]:
model = SVC()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)


0.614341085271


In [17]:
model = SVC(random_state=seed, C=10)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)


0.646317186463


In [19]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_jobs=-1, random_state=seed)
param_grid = {
    'n_estimators': np.arange(10, 20, 2),
    }
tuner(model, param_grid, train_clean)

Fitting 5 folds for each of 5 candidates, totalling 25 fits




[CV] n_estimators=10 .................................................
[CV] n_estimators=10 .................................................
[CV] n_estimators=10 .................................................
[CV] n_estimators=10 .................................................
[CV] ........................ n_estimators=10, score=0.698349 -   0.1s
[CV] ........................ n_estimators=10, score=0.696035 -   0.1s
[CV] n_estimators=10 .................................................
[CV] ........................ n_estimators=10, score=0.732550 -   0.1s
[CV] ........................ n_estimators=10, score=0.708777 -   0.1s
[CV] n_estimators=12 .................................................
[CV] n_estimators=12 .................................................
[CV] n_estimators=12 .................................................
[CV] ........................ n_estimators=10, score=0.806547 -   0.2s
[CV] n_estimators=12 .................................................
[CV] .

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    1.7s


[CV] ........................ n_estimators=12, score=0.736591 -   0.2s
[CV] n_estimators=14 .................................................
[CV] ........................ n_estimators=14, score=0.705321 -   0.1s
[CV] ........................ n_estimators=12, score=0.808753 -   0.1s
[CV] ........................ n_estimators=14, score=0.699339 -   0.1s
[CV] n_estimators=14 .................................................
[CV] n_estimators=14 .................................................
[CV] n_estimators=16 .................................................


[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    2.6s


[CV] ........................ n_estimators=14, score=0.730444 -   0.2s
[CV] n_estimators=16 .................................................
[CV] ........................ n_estimators=14, score=0.743938 -   0.1s
[CV] n_estimators=16 .................................................
[CV] ........................ n_estimators=14, score=0.815741 -   0.1s
[CV] n_estimators=16 .................................................
[CV] ........................ n_estimators=16, score=0.707523 -   0.1s
[CV] n_estimators=16 .................................................
[CV] ........................ n_estimators=16, score=0.701909 -   0.2s
[CV] n_estimators=18 .................................................
[CV] ........................ n_estimators=16, score=0.746510 -   0.1s
[CV] ........................ n_estimators=16, score=0.813902 -   0.1s
[CV] n_estimators=18 .................................................
[CV] ........................ n_estimators=16, score=0.727139 -   0.1s


[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    4.4s


[CV] n_estimators=18 .................................................
[CV] n_estimators=18 .................................................
[CV] ........................ n_estimators=18, score=0.708257 -   0.2s
[CV] n_estimators=18 .................................................
[CV] ........................ n_estimators=18, score=0.708884 -   0.1s
[CV] ........................ n_estimators=18, score=0.730444 -   0.1s
[CV] ........................ n_estimators=18, score=0.754592 -   0.1s


[Parallel(n_jobs=-1)]: Done  21 out of  25 | elapsed:    5.5s remaining:    1.1s


[CV] ........................ n_estimators=18, score=0.816109 -   0.1s


[Parallel(n_jobs=-1)]: Done  25 out of  25 | elapsed:    6.2s finished


Best score: 0.744
Best parameters set:
	n_estimators: 18


In [42]:
model = RandomForestClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)

0.782945736434


In [20]:
model = RandomForestClassifier(n_jobs=-1, n_estimators=18, random_state=seed)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)

0.830789648308


In [23]:
from sklearn.ensemble import ExtraTreesClassifier

model = ExtraTreesClassifier(n_jobs=-1, random_state=seed)
param_grid = {
    'n_estimators': np.arange(10, 24, 2),
    }
tuner(model, param_grid, train_clean)

Fitting 5 folds for each of 7 candidates, totalling 35 fits




[CV] n_estimators=10 .................................................
[CV] n_estimators=10 .................................................
[CV] n_estimators=10 .................................................
[CV] n_estimators=10 .................................................
[CV] ........................ n_estimators=10, score=0.720367 -   0.2s
[CV] n_estimators=10 .................................................
[CV] ........................ n_estimators=10, score=0.704479 -   0.2s
[CV] n_estimators=12 .................................................
[CV] ........................ n_estimators=10, score=0.722732 -   0.1s
[CV] ........................ n_estimators=10, score=0.748714 -   0.1s
[CV] n_estimators=12 .................................................
[CV] n_estimators=12 .................................................
[CV] ........................ n_estimators=10, score=0.820890 -   0.2s
[CV] n_estimators=12 .................................................
[CV] .

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    1.3s


[CV] ........................ n_estimators=12, score=0.721835 -   0.1s
[CV] ........................ n_estimators=12, score=0.705580 -   0.1s
[CV] n_estimators=14 .................................................
[CV] n_estimators=14 .................................................
[CV] ........................ n_estimators=12, score=0.754225 -   0.1s
[CV] n_estimators=14 .................................................
[CV] ........................ n_estimators=12, score=0.822361 -   0.1s
[CV] n_estimators=14 .................................................
[CV] ........................ n_estimators=14, score=0.711454 -   0.1s


[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    2.0s


[CV] ........................ n_estimators=14, score=0.720734 -   0.1s
[CV] n_estimators=14 .................................................
[CV] n_estimators=16 .................................................
[CV] ........................ n_estimators=14, score=0.728975 -   0.1s
[CV] ........................ n_estimators=14, score=0.761572 -   0.1s
[CV] n_estimators=16 .................................................
[CV] n_estimators=16 .................................................
[CV] ........................ n_estimators=16, score=0.723670 -   0.1s
[CV] n_estimators=16 .................................................
[CV] ........................ n_estimators=14, score=0.824568 -   0.1s
[CV] n_estimators=16 .................................................
[CV] ........................ n_estimators=16, score=0.715492 -   0.1s
[CV] n_estimators=18 .................................................
[CV] ........................ n_estimators=16, score=0.734851 -   0.1s
[CV] n

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    3.1s


[CV] ........................ n_estimators=16, score=0.764144 -   0.2s
[CV] n_estimators=18 .................................................
[CV] ........................ n_estimators=16, score=0.827510 -   0.1s
[CV] n_estimators=18 .................................................
[CV] ........................ n_estimators=18, score=0.732844 -   0.1s
[CV] n_estimators=18 .................................................
[CV] ........................ n_estimators=18, score=0.716960 -   0.1s
[CV] n_estimators=20 .................................................
[CV] ........................ n_estimators=18, score=0.738891 -   0.1s
[CV] n_estimators=20 .................................................
[CV] ........................ n_estimators=20, score=0.733211 -   0.1s
[CV] n_estimators=20 .................................................
[CV] ........................ n_estimators=18, score=0.830452 -   0.1s
[CV] ........................ n_estimators=18, score=0.766716 -   0.1s
[CV] n

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    4.9s


[CV] n_estimators=20 .................................................
[CV] ........................ n_estimators=20, score=0.721366 -   0.1s
[CV] n_estimators=22 .................................................
[CV] ........................ n_estimators=20, score=0.766716 -   0.2s
[CV] n_estimators=22 .................................................
[CV] ........................ n_estimators=20, score=0.734484 -   0.1s
[CV] ........................ n_estimators=20, score=0.828981 -   0.1s
[CV] n_estimators=22 .................................................
[CV] n_estimators=22 .................................................
[CV] ........................ n_estimators=22, score=0.733945 -   0.1s
[CV] n_estimators=22 .................................................
[CV] ........................ n_estimators=22, score=0.716960 -   0.2s


[Parallel(n_jobs=-1)]: Done  32 out of  35 | elapsed:    6.6s remaining:    0.6s


[CV] ........................ n_estimators=22, score=0.767083 -   0.1s
[CV] ........................ n_estimators=22, score=0.739625 -   0.1s
[CV] ........................ n_estimators=22, score=0.830085 -   0.1s


[Parallel(n_jobs=-1)]: Done  35 out of  35 | elapsed:    7.0s finished


Best score: 0.758
Best parameters set:
	n_estimators: 22


In [43]:
model = ExtraTreesClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)

0.826873385013


In [24]:
model = ExtraTreesClassifier(n_jobs=-1, n_estimators=22, random_state=seed)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)

0.829462508295


In [25]:
model = SGDClassifier(loss='log',penalty='elasticnet', random_state=seed, shuffle=True)
     
param_grid = {
    #'loss':[ "hinge", "log", "modified_huber", "squared_hinge", "perceptron"],
    #'penalty': ["l2", "l1", "elasticnet"],
    'n_iter': [1,2,3,4,5,6,7,8,9,20]
    }
    
tuner(model, param_grid, train_clean)



Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] n_iter=1 ........................................................
[CV] n_iter=1 ........................................................
[CV] n_iter=1 ........................................................
[CV] n_iter=1 ........................................................
[CV] ............................... n_iter=1, score=0.429883 -   0.0s
[CV] ............................... n_iter=1, score=0.524771 -   0.0s
[CV] n_iter=1 ........................................................
[CV] ............................... n_iter=1, score=0.513960 -   0.0s
[CV] ............................... n_iter=1, score=0.560044 -   0.0s
[CV] n_iter=2 ........................................................
[CV] n_iter=2 ........................................................
[CV] n_iter=2 ........................................................
[CV] ............................... n_iter=1, score=0.551673 -   0.0s
[CV] n_iter=2 ..

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.6s


[CV] n_iter=3 ........................................................
[CV] ............................... n_iter=2, score=0.565760 -   0.0s
[CV] ............................... n_iter=2, score=0.575579 -   0.0s
[CV] n_iter=3 ........................................................
[CV] n_iter=3 ........................................................
[CV] ............................... n_iter=3, score=0.568440 -   0.0s
[CV] ............................... n_iter=3, score=0.546256 -   0.0s
[CV] n_iter=4 ........................................................
[CV] n_iter=3 ........................................................


[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    1.0s


[CV] ............................... n_iter=3, score=0.567389 -   0.0s
[CV] ............................... n_iter=3, score=0.642175 -   0.0s
[CV] n_iter=4 ........................................................
[CV] n_iter=4 ........................................................
[CV] ............................... n_iter=3, score=0.615668 -   0.0s
[CV] n_iter=4 ........................................................
[CV] ............................... n_iter=4, score=0.524771 -   0.0s
[CV] n_iter=4 ........................................................
[CV] ............................... n_iter=4, score=0.582446 -   0.0s
[CV] n_iter=5 ........................................................
[CV] ............................... n_iter=4, score=0.532673 -   0.0s
[CV] n_iter=5 ........................................................
[CV] ............................... n_iter=4, score=0.562821 -   0.0s
[CV] n_iter=5 ........................................................


[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    1.9s


[CV] ............................... n_iter=4, score=0.620081 -   0.0s
[CV] n_iter=5 ........................................................
[CV] ............................... n_iter=5, score=0.584587 -   0.0s
[CV] n_iter=5 ........................................................
[CV] ............................... n_iter=5, score=0.538913 -   0.0s
[CV] n_iter=6 ........................................................
[CV] ............................... n_iter=5, score=0.627847 -   0.0s
[CV] n_iter=6 ........................................................
[CV] ............................... n_iter=5, score=0.576203 -   0.0s
[CV] n_iter=6 ........................................................


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    2.6s


[CV] ............................... n_iter=5, score=0.607576 -   0.0s
[CV] n_iter=6 ........................................................
[CV] ............................... n_iter=6, score=0.539648 -   0.0s
[CV] ............................... n_iter=6, score=0.571743 -   0.0s
[CV] n_iter=6 ........................................................
[CV] n_iter=7 ........................................................
[CV] ............................... n_iter=6, score=0.594932 -   0.0s
[CV] n_iter=7 ........................................................
[CV] ............................... n_iter=6, score=0.634827 -   0.0s
[CV] n_iter=7 ........................................................
[CV] ............................... n_iter=6, score=0.638838 -   0.0s
[CV] ............................... n_iter=7, score=0.560573 -   0.0s
[CV] n_iter=7 ........................................................
[CV] n_iter=7 ........................................................
[CV] .

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    4.3s


[CV] n_iter=8 ........................................................
[CV] ............................... n_iter=7, score=0.648032 -   0.0s
[CV] n_iter=8 ........................................................
[CV] ............................... n_iter=8, score=0.568073 -   0.0s
[CV] n_iter=8 ........................................................
[CV] ............................... n_iter=8, score=0.560940 -   0.0s
[CV] n_iter=9 ........................................................
[CV] ............................... n_iter=8, score=0.595299 -   0.0s
[CV] n_iter=9 ........................................................
[CV] ............................... n_iter=8, score=0.602866 -   0.0s
[CV] n_iter=9 ........................................................
[CV] ............................... n_iter=8, score=0.672674 -   0.0s
[CV] n_iter=9 ........................................................
[CV] ............................... n_iter=9, score=0.584220 -   0.0s
[CV] n

[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    6.3s


[CV] ............................... n_iter=9, score=0.632256 -   0.0s
[CV] n_iter=20 .......................................................
[CV] ............................... n_iter=9, score=0.627437 -   0.0s
[CV] n_iter=20 .......................................................
[CV] .............................. n_iter=20, score=0.566239 -   0.0s
[CV] n_iter=20 .......................................................
[CV] .............................. n_iter=20, score=0.524963 -   0.0s
[CV] .............................. n_iter=20, score=0.599339 -   0.0s
[CV] .............................. n_iter=20, score=0.627112 -   0.0s
[CV] .............................. n_iter=20, score=0.659066 -   0.0s


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    9.1s finished


Best score: 0.600
Best parameters set:
	n_iter: 8


In [44]:
model = SGDClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)

0.633720930233


In [26]:
model = SGDClassifier(loss='log',penalty='elasticnet', n_iter=8, random_state=seed, shuffle=True)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)

0.654943596549


In [42]:
from sklearn.ensemble import GradientBoostingClassifier
model = GradientBoostingClassifier(random_state=seed)
param_grid = {
    'max_depth': np.arange(1, 10, 1),
    }
tuner(model, param_grid, train_clean)

Fitting 5 folds for each of 9 candidates, totalling 45 fits




[CV] max_depth=1 .....................................................
[CV] max_depth=1 .....................................................
[CV] max_depth=1 .....................................................
[CV] max_depth=1 .....................................................
[CV] ............................ max_depth=1, score=0.553028 -   0.1s
[CV] max_depth=1 .....................................................
[CV] ............................ max_depth=1, score=0.622842 -   0.0s
[CV] max_depth=2 .....................................................
[CV] ............................ max_depth=1, score=0.615639 -   0.0s
[CV] max_depth=2 .....................................................
[CV] ............................ max_depth=1, score=0.651727 -   0.0s
[CV] max_depth=2 .....................................................
[CV] ............................ max_depth=1, score=0.696947 -   0.0s
[CV] max_depth=2 .....................................................


[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   43.4s


[CV] ............................ max_depth=2, score=0.648807 -   0.0s
[CV] max_depth=2 .....................................................
[CV] ............................ max_depth=2, score=0.656388 -   0.0s
[CV] ............................ max_depth=2, score=0.674624 -   0.0s
[CV] max_depth=3 .....................................................
[CV] max_depth=3 .....................................................
[CV] ............................ max_depth=2, score=0.677076 -   0.0s
[CV] max_depth=3 .....................................................
[CV] ............................ max_depth=2, score=0.749540 -   0.0s
[CV] max_depth=3 .....................................................


[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  1.6min


[CV] ............................ max_depth=3, score=0.675963 -   0.0s
[CV] max_depth=3 .....................................................
[CV] ............................ max_depth=3, score=0.686123 -   0.0s
[CV] max_depth=4 .....................................................
[CV] ............................ max_depth=3, score=0.697760 -   0.0s
[CV] max_depth=4 .....................................................
[CV] ............................ max_depth=3, score=0.709405 -   0.1s
[CV] max_depth=4 .....................................................
[CV] ............................ max_depth=3, score=0.778227 -   0.0s
[CV] max_depth=4 .....................................................
[CV] ............................ max_depth=4, score=0.710092 -   0.1s
[CV] max_depth=4 .....................................................
[CV] ............................ max_depth=4, score=0.698972 -   0.1s
[CV] max_depth=5 .....................................................


[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  3.3min


[CV] ............................ max_depth=4, score=0.712082 -   0.1s
[CV] max_depth=5 .....................................................
[CV] ............................ max_depth=4, score=0.722263 -   0.1s
[CV] max_depth=5 .....................................................
[CV] ............................ max_depth=4, score=0.803604 -   0.1s
[CV] max_depth=5 .....................................................
[CV] ............................ max_depth=5, score=0.718899 -   0.1s
[CV] max_depth=5 .....................................................
[CV] ............................ max_depth=5, score=0.707783 -   0.1s
[CV] max_depth=6 .....................................................
[CV] ............................ max_depth=5, score=0.726405 -   0.1s
[CV] max_depth=6 .....................................................
[CV] ............................ max_depth=5, score=0.739530 -   0.1s
[CV] max_depth=6 .....................................................


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:  5.4min


[CV] ............................ max_depth=5, score=0.813167 -   0.1s
[CV] max_depth=6 .....................................................
[CV] ............................ max_depth=6, score=0.731743 -   0.1s
[CV] max_depth=6 .....................................................
[CV] ............................ max_depth=6, score=0.713289 -   0.1s
[CV] max_depth=7 .....................................................
[CV] ............................ max_depth=6, score=0.735219 -   0.1s
[CV] max_depth=7 .....................................................
[CV] ............................ max_depth=6, score=0.745775 -   0.1s
[CV] max_depth=7 .....................................................
[CV] ............................ max_depth=6, score=0.824936 -   0.1s
[CV] max_depth=7 .....................................................
[CV] ............................ max_depth=7, score=0.736514 -   0.1s
[CV] max_depth=7 .....................................................
[CV] .

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  9.6min


[CV] ............................ max_depth=7, score=0.754225 -   0.1s
[CV] max_depth=8 .....................................................
[CV] ............................ max_depth=7, score=0.830085 -   0.1s
[CV] max_depth=8 .....................................................
[CV] ............................ max_depth=8, score=0.743853 -   0.2s
[CV] max_depth=8 .....................................................
[CV] ............................ max_depth=8, score=0.719163 -   0.2s
[CV] max_depth=9 .....................................................
[CV] ............................ max_depth=8, score=0.743665 -   0.1s
[CV] max_depth=9 .....................................................
[CV] ............................ max_depth=8, score=0.761205 -   0.2s
[CV] max_depth=9 .....................................................
[CV] ............................ max_depth=8, score=0.840750 -   0.2s
[CV] max_depth=9 .....................................................
[CV] .

[Parallel(n_jobs=-1)]: Done  43 out of  45 | elapsed: 17.3min remaining:   48.4s


[CV] ............................ max_depth=9, score=0.760103 -   0.1s
[CV] ............................ max_depth=9, score=0.835234 -   0.1s


[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed: 18.3min finished


Best score: 0.762
Best parameters set:
	max_depth: 8


In [47]:
model = GradientBoostingClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)

0.766795865633


In [45]:
model = GradientBoostingClassifier(max_depth=8, random_state=seed)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = score(y_test, y_pred)
print(result)

0.842733908427
