# Classify products to its cluster

In [1]:
# library
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.externals import joblib
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
import sqlite3
# smote
from imblearn.over_sampling import SMOTE

In [2]:
# make connection to sqlite db
conn = sqlite3.connect('product.db')
c = conn.cursor()

# enable foreign keys
c.execute("PRAGMA foreign_keys = ON")
conn.commit()

In [3]:
# result reproducibility
np.random.seed(42)

In [4]:
# query
# get ranking, reviewcount, salescluster from table 'prodpage'
sqlc = 'SELECT ranking, reviewcount, salescluster FROM prodpage'
c.execute(sqlc)
conn.commit()
product = c.fetchall()
product = pd.DataFrame(product)
product.columns = ['ranking', 'reviewcount', 'salescluster']

In [5]:
product.head()

Unnamed: 0,ranking,reviewcount,salescluster
0,41580.0,3200,4.0
1,41233.5,3100,4.0
2,41571.0,2200,2.0
3,41052.5,2100,2.0
4,40821.5,2100,2.0


In [6]:
# exclude row 0 and 1
product = product.loc[2:,]

In [7]:
# reindex
product.index = range(0, len(product))

In [8]:
product['salescluster'] = product['salescluster'].astype('int')

In [9]:
# Total number of cluster 1.0
len(product[product['salescluster'] == 1])/len(product)

0.4758378612299851

In [10]:
# Total number of cluster 2.0
len(product[product['salescluster'] == 2])/len(product)

0.016709140741453094

In [11]:
# Total number of cluster 3.0
len(product[product['salescluster'] == 3])/len(product)

0.5074529980285618

In [12]:
len(product)

41594

In [13]:
X = product[['ranking', 'reviewcount']].values
y = product['salescluster'].values

In [14]:
# Feature Scaling
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [15]:
# train-test set split
# Use stratified sampling, because classes are skewed
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                   test_size=0.2,
                                                   random_state=42,
                                                   shuffle=True,
                                                   stratify=y)

In [16]:
len(y_train[y_train == 1.])/len(y_train), len(y_train[y_train == 2.])/len(y_train), len(y_train[y_train == 3.])/len(y_train)

(0.4758527422990233, 0.01670924117205109, 0.5074380165289256)

In [17]:
len(y_test[y_test == 1.])/len(y_test), len(y_test[y_test == 2.])/len(y_test), len(y_test[y_test == 3.])/len(y_test)

(0.47577833874263736, 0.01670873903113355, 0.5075129222262291)

In [18]:
# Skewed Classes
# https://florianhartl.com/thoughts-on-machine-learning-dealing-with-skewed-classes.html
# https://news.ycombinator.com/item?id=4440560

In [41]:
X_train, y_train = SMOTE().fit_sample(X_train, y_train)

## 0. Dumb Classifier

In [42]:
from sklearn.base import BaseEstimator

In [43]:
class DumbClassifier(BaseEstimator):
    def fit(self, X, y=None):
        pass
    def predict(self, X):
        return np.zeros((len(X), 1), dtype=bool)
dumb = DumbClassifier()
cross_val_score(dumb, X_train, y_train, cv=5, scoring='accuracy')

array([0., 0., 0., 0., 0.])

## 1. Logistic Regression

In [44]:
from sklearn.linear_model import LogisticRegression

In [45]:
logreg = LogisticRegression(random_state=42, multi_class='multinomial'
#                           class_weight={3: 3.,
#                                        2: 1.5,
#                                        1: 1.5}
                           )

In [46]:
# Grid Search
parameters = [{
    'C': [1.0, 10.0, 15.0, 20.0, 30.0, 40.0,
         50.0],
    'solver': ['newton-cg', 'saga',
              'lbfgs'],
    'penalty': ['l2']
}]

grid_search = GridSearchCV(estimator=logreg,
                          param_grid=parameters,
                          scoring=['accuracy', 'f1_macro',
                                  'f1_micro'],
                          cv=5,
                          verbose=2,
                          refit='f1_micro',
                          n_jobs=-1)

In [47]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 21 candidates, totalling 105 fits
[CV] C=1.0, penalty=l2, solver=newton-cg .............................
[CV] C=1.0, penalty=l2, solver=newton-cg .............................
[CV] C=1.0, penalty=l2, solver=newton-cg .............................
[CV] C=1.0, penalty=l2, solver=newton-cg .............................
[CV] .............. C=1.0, penalty=l2, solver=newton-cg, total=   1.5s
[CV] .............. C=1.0, penalty=l2, solver=newton-cg, total=   1.5s
[CV] C=1.0, penalty=l2, solver=newton-cg .............................
[CV] .............. C=1.0, penalty=l2, solver=newton-cg, total=   1.6s
[CV] C=1.0, penalty=l2, solver=saga ..................................
[CV] C=1.0, penalty=l2, solver=saga ..................................
[CV] .............. C=1.0, penalty=l2, solver=newton-cg, total=   1.9s
[CV] C=1.0, penalty=l2, solver=saga ..................................
[CV] .............. C=1.0, penalty=l2, solver=newton-cg, total=   1.9s
[CV] C=1.0, pen



[CV] ................... C=1.0, penalty=l2, solver=saga, total=   5.0s
[CV] C=1.0, penalty=l2, solver=saga ..................................




[CV] ................... C=1.0, penalty=l2, solver=saga, total=   4.8s
[CV] C=1.0, penalty=l2, solver=lbfgs .................................




[CV] ................... C=1.0, penalty=l2, solver=saga, total=   5.3s
[CV] C=1.0, penalty=l2, solver=lbfgs .................................
[CV] .................. C=1.0, penalty=l2, solver=lbfgs, total=   1.0s
[CV] C=1.0, penalty=l2, solver=lbfgs .................................
[CV] .................. C=1.0, penalty=l2, solver=lbfgs, total=   0.8s
[CV] C=1.0, penalty=l2, solver=lbfgs .................................




[CV] ................... C=1.0, penalty=l2, solver=saga, total=   4.4s
[CV] C=1.0, penalty=l2, solver=lbfgs .................................
[CV] .................. C=1.0, penalty=l2, solver=lbfgs, total=   0.8s
[CV] C=10.0, penalty=l2, solver=newton-cg ............................
[CV] .................. C=1.0, penalty=l2, solver=lbfgs, total=   1.0s
[CV] C=10.0, penalty=l2, solver=newton-cg ............................
[CV] .................. C=1.0, penalty=l2, solver=lbfgs, total=   1.0s
[CV] C=10.0, penalty=l2, solver=newton-cg ............................
[CV] ............. C=10.0, penalty=l2, solver=newton-cg, total=   2.2s
[CV] C=10.0, penalty=l2, solver=newton-cg ............................
[CV] ............. C=10.0, penalty=l2, solver=newton-cg, total=   2.0s
[CV] C=10.0, penalty=l2, solver=newton-cg ............................




[CV] ................... C=1.0, penalty=l2, solver=saga, total=   4.3s
[CV] C=10.0, penalty=l2, solver=saga .................................
[CV] ............. C=10.0, penalty=l2, solver=newton-cg, total=   2.1s
[CV] C=10.0, penalty=l2, solver=saga .................................
[CV] ............. C=10.0, penalty=l2, solver=newton-cg, total=   1.9s
[CV] C=10.0, penalty=l2, solver=saga .................................
[CV] ............. C=10.0, penalty=l2, solver=newton-cg, total=   2.0s
[CV] C=10.0, penalty=l2, solver=saga .................................




[CV] .................. C=10.0, penalty=l2, solver=saga, total=   4.2s
[CV] C=10.0, penalty=l2, solver=saga .................................




[CV] .................. C=10.0, penalty=l2, solver=saga, total=   4.6s
[CV] C=10.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=10.0, penalty=l2, solver=lbfgs, total=   1.0s
[CV] C=10.0, penalty=l2, solver=lbfgs ................................




[CV] .................. C=10.0, penalty=l2, solver=saga, total=   4.2s
[CV] C=10.0, penalty=l2, solver=lbfgs ................................




[CV] .................. C=10.0, penalty=l2, solver=saga, total=   4.3s
[CV] C=10.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=10.0, penalty=l2, solver=lbfgs, total=   1.1s
[CV] C=10.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=10.0, penalty=l2, solver=lbfgs, total=   1.0s
[CV] C=15.0, penalty=l2, solver=newton-cg ............................
[CV] ................. C=10.0, penalty=l2, solver=lbfgs, total=   1.1s
[CV] C=15.0, penalty=l2, solver=newton-cg ............................
[CV] ................. C=10.0, penalty=l2, solver=lbfgs, total=   1.3s
[CV] C=15.0, penalty=l2, solver=newton-cg ............................




[CV] .................. C=10.0, penalty=l2, solver=saga, total=   4.7s
[CV] C=15.0, penalty=l2, solver=newton-cg ............................
[CV] ............. C=15.0, penalty=l2, solver=newton-cg, total=   2.5s
[CV] C=15.0, penalty=l2, solver=newton-cg ............................
[CV] ............. C=15.0, penalty=l2, solver=newton-cg, total=   2.4s
[CV] C=15.0, penalty=l2, solver=saga .................................
[CV] ............. C=15.0, penalty=l2, solver=newton-cg, total=   2.1s
[CV] C=15.0, penalty=l2, solver=saga .................................


[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:   21.4s


[CV] ............. C=15.0, penalty=l2, solver=newton-cg, total=   2.6s
[CV] C=15.0, penalty=l2, solver=saga .................................
[CV] ............. C=15.0, penalty=l2, solver=newton-cg, total=   2.2s
[CV] C=15.0, penalty=l2, solver=saga .................................




[CV] .................. C=15.0, penalty=l2, solver=saga, total=   3.8s
[CV] C=15.0, penalty=l2, solver=saga .................................




[CV] .................. C=15.0, penalty=l2, solver=saga, total=   4.2s
[CV] C=15.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=15.0, penalty=l2, solver=lbfgs, total=   0.9s
[CV] C=15.0, penalty=l2, solver=lbfgs ................................




[CV] .................. C=15.0, penalty=l2, solver=saga, total=   4.2s
[CV] C=15.0, penalty=l2, solver=lbfgs ................................




[CV] .................. C=15.0, penalty=l2, solver=saga, total=   4.1s
[CV] C=15.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=15.0, penalty=l2, solver=lbfgs, total=   1.1s
[CV] C=15.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=15.0, penalty=l2, solver=lbfgs, total=   1.2s
[CV] C=20.0, penalty=l2, solver=newton-cg ............................
[CV] ................. C=15.0, penalty=l2, solver=lbfgs, total=   1.1s
[CV] C=20.0, penalty=l2, solver=newton-cg ............................
[CV] ................. C=15.0, penalty=l2, solver=lbfgs, total=   1.0s
[CV] C=20.0, penalty=l2, solver=newton-cg ............................




[CV] .................. C=15.0, penalty=l2, solver=saga, total=   4.2s
[CV] C=20.0, penalty=l2, solver=newton-cg ............................
[CV] ............. C=20.0, penalty=l2, solver=newton-cg, total=   2.2s
[CV] C=20.0, penalty=l2, solver=newton-cg ............................
[CV] ............. C=20.0, penalty=l2, solver=newton-cg, total=   2.5s
[CV] C=20.0, penalty=l2, solver=saga .................................
[CV] ............. C=20.0, penalty=l2, solver=newton-cg, total=   2.3s
[CV] C=20.0, penalty=l2, solver=saga .................................
[CV] ............. C=20.0, penalty=l2, solver=newton-cg, total=   2.0s
[CV] C=20.0, penalty=l2, solver=saga .................................
[CV] ............. C=20.0, penalty=l2, solver=newton-cg, total=   2.2s
[CV] C=20.0, penalty=l2, solver=saga .................................




[CV] .................. C=20.0, penalty=l2, solver=saga, total=   4.0s
[CV] C=20.0, penalty=l2, solver=saga .................................




[CV] .................. C=20.0, penalty=l2, solver=saga, total=   4.2s
[CV] C=20.0, penalty=l2, solver=lbfgs ................................




[CV] .................. C=20.0, penalty=l2, solver=saga, total=   3.9s
[CV] C=20.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=20.0, penalty=l2, solver=lbfgs, total=   0.9s
[CV] C=20.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=20.0, penalty=l2, solver=lbfgs, total=   1.0s
[CV] C=20.0, penalty=l2, solver=lbfgs ................................




[CV] .................. C=20.0, penalty=l2, solver=saga, total=   4.0s
[CV] C=20.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=20.0, penalty=l2, solver=lbfgs, total=   0.9s
[CV] C=30.0, penalty=l2, solver=newton-cg ............................
[CV] ................. C=20.0, penalty=l2, solver=lbfgs, total=   1.0s
[CV] C=30.0, penalty=l2, solver=newton-cg ............................
[CV] ................. C=20.0, penalty=l2, solver=lbfgs, total=   1.1s
[CV] C=30.0, penalty=l2, solver=newton-cg ............................
[CV] ............. C=30.0, penalty=l2, solver=newton-cg, total=   1.9s
[CV] C=30.0, penalty=l2, solver=newton-cg ............................




[CV] .................. C=20.0, penalty=l2, solver=saga, total=   4.2s
[CV] C=30.0, penalty=l2, solver=newton-cg ............................
[CV] ............. C=30.0, penalty=l2, solver=newton-cg, total=   2.3s
[CV] C=30.0, penalty=l2, solver=saga .................................
[CV] ............. C=30.0, penalty=l2, solver=newton-cg, total=   2.1s
[CV] C=30.0, penalty=l2, solver=saga .................................
[CV] ............. C=30.0, penalty=l2, solver=newton-cg, total=   1.7s
[CV] C=30.0, penalty=l2, solver=saga .................................
[CV] ............. C=30.0, penalty=l2, solver=newton-cg, total=   2.1s
[CV] C=30.0, penalty=l2, solver=saga .................................




[CV] .................. C=30.0, penalty=l2, solver=saga, total=   3.9s
[CV] C=30.0, penalty=l2, solver=saga .................................




[CV] .................. C=30.0, penalty=l2, solver=saga, total=   4.0s
[CV] C=30.0, penalty=l2, solver=lbfgs ................................




[CV] .................. C=30.0, penalty=l2, solver=saga, total=   3.6s
[CV] C=30.0, penalty=l2, solver=lbfgs ................................




[CV] .................. C=30.0, penalty=l2, solver=saga, total=   3.8s
[CV] C=30.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=30.0, penalty=l2, solver=lbfgs, total=   1.0s
[CV] C=30.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=30.0, penalty=l2, solver=lbfgs, total=   1.1s
[CV] C=30.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=30.0, penalty=l2, solver=lbfgs, total=   1.1s
[CV] C=40.0, penalty=l2, solver=newton-cg ............................
[CV] ................. C=30.0, penalty=l2, solver=lbfgs, total=   1.2s
[CV] C=40.0, penalty=l2, solver=newton-cg ............................
[CV] ................. C=30.0, penalty=l2, solver=lbfgs, total=   1.1s
[CV] C=40.0, penalty=l2, solver=newton-cg ............................




[CV] .................. C=30.0, penalty=l2, solver=saga, total=   4.2s
[CV] C=40.0, penalty=l2, solver=newton-cg ............................
[CV] ............. C=40.0, penalty=l2, solver=newton-cg, total=   2.1s
[CV] C=40.0, penalty=l2, solver=newton-cg ............................
[CV] ............. C=40.0, penalty=l2, solver=newton-cg, total=   2.3s
[CV] C=40.0, penalty=l2, solver=saga .................................
[CV] ............. C=40.0, penalty=l2, solver=newton-cg, total=   2.2s
[CV] C=40.0, penalty=l2, solver=saga .................................
[CV] ............. C=40.0, penalty=l2, solver=newton-cg, total=   2.1s
[CV] C=40.0, penalty=l2, solver=saga .................................
[CV] ............. C=40.0, penalty=l2, solver=newton-cg, total=   2.1s
[CV] C=40.0, penalty=l2, solver=saga .................................




[CV] .................. C=40.0, penalty=l2, solver=saga, total=   3.9s
[CV] C=40.0, penalty=l2, solver=saga .................................




[CV] .................. C=40.0, penalty=l2, solver=saga, total=   3.9s
[CV] C=40.0, penalty=l2, solver=lbfgs ................................




[CV] .................. C=40.0, penalty=l2, solver=saga, total=   3.9s
[CV] C=40.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=40.0, penalty=l2, solver=lbfgs, total=   1.1s
[CV] C=40.0, penalty=l2, solver=lbfgs ................................




[CV] .................. C=40.0, penalty=l2, solver=saga, total=   3.9s
[CV] C=40.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=40.0, penalty=l2, solver=lbfgs, total=   1.1s
[CV] C=40.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=40.0, penalty=l2, solver=lbfgs, total=   1.1s
[CV] C=50.0, penalty=l2, solver=newton-cg ............................
[CV] ................. C=40.0, penalty=l2, solver=lbfgs, total=   0.9s
[CV] C=50.0, penalty=l2, solver=newton-cg ............................
[CV] ................. C=40.0, penalty=l2, solver=lbfgs, total=   1.1s
[CV] C=50.0, penalty=l2, solver=newton-cg ............................




[CV] .................. C=40.0, penalty=l2, solver=saga, total=   4.3s
[CV] C=50.0, penalty=l2, solver=newton-cg ............................
[CV] ............. C=50.0, penalty=l2, solver=newton-cg, total=   2.3s
[CV] C=50.0, penalty=l2, solver=newton-cg ............................
[CV] ............. C=50.0, penalty=l2, solver=newton-cg, total=   2.1s
[CV] C=50.0, penalty=l2, solver=saga .................................
[CV] ............. C=50.0, penalty=l2, solver=newton-cg, total=   2.5s
[CV] C=50.0, penalty=l2, solver=saga .................................
[CV] ............. C=50.0, penalty=l2, solver=newton-cg, total=   2.2s
[CV] C=50.0, penalty=l2, solver=saga .................................
[CV] ............. C=50.0, penalty=l2, solver=newton-cg, total=   2.1s
[CV] C=50.0, penalty=l2, solver=saga .................................




[CV] .................. C=50.0, penalty=l2, solver=saga, total=   3.9s
[CV] C=50.0, penalty=l2, solver=saga .................................




[CV] .................. C=50.0, penalty=l2, solver=saga, total=   3.9s
[CV] C=50.0, penalty=l2, solver=lbfgs ................................




[CV] .................. C=50.0, penalty=l2, solver=saga, total=   4.0s
[CV] C=50.0, penalty=l2, solver=lbfgs ................................




[CV] .................. C=50.0, penalty=l2, solver=saga, total=   3.9s
[CV] C=50.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=50.0, penalty=l2, solver=lbfgs, total=   1.1s
[CV] C=50.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=50.0, penalty=l2, solver=lbfgs, total=   1.2s
[CV] C=50.0, penalty=l2, solver=lbfgs ................................
[CV] ................. C=50.0, penalty=l2, solver=lbfgs, total=   1.2s
[CV] ................. C=50.0, penalty=l2, solver=lbfgs, total=   1.1s
[CV] ................. C=50.0, penalty=l2, solver=lbfgs, total=   0.9s




[CV] .................. C=50.0, penalty=l2, solver=saga, total=   4.0s


[Parallel(n_jobs=-1)]: Done 105 out of 105 | elapsed:  1.1min finished


GridSearchCV(cv=5, error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='multinomial',
          n_jobs=1, penalty='l2', random_state=42, solver='liblinear',
          tol=0.0001, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid=[{'C': [1.0, 10.0, 15.0, 20.0, 30.0, 40.0, 50.0], 'solver': ['newton-cg', 'saga', 'lbfgs'], 'penalty': ['l2']}],
       pre_dispatch='2*n_jobs', refit='f1_micro',
       return_train_score='warn',
       scoring=['accuracy', 'f1_macro', 'f1_micro'], verbose=2)

In [48]:
best_parameters = grid_search.best_params_
best_estimator = grid_search.best_estimator_

In [49]:
best_parameters

{'C': 40.0, 'penalty': 'l2', 'solver': 'newton-cg'}

In [50]:
grid_search.best_score_

0.9997236205705261

In [51]:
# confusion matrix
y_train_pred = cross_val_predict(best_estimator, X_train, y_train, cv=5,
                                verbose=2, n_jobs=-1)

[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    3.5s finished


In [52]:
conf_mx = confusion_matrix(y_train, y_train_pred)
conf_mx

array([[16876,     9,     0],
       [    0, 16885,     0],
       [    5,     0, 16880]])

In [53]:
# confusion matrix
y_test_pred = cross_val_predict(best_estimator, X_test, y_test, cv=5,
                                verbose=2, n_jobs=-1)

[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.4s finished


In [54]:
conf_mx_test = confusion_matrix(y_test, y_test_pred)
conf_mx_test

array([[3955,    3,    0],
       [   2,  137,    0],
       [   7,    0, 4215]])

In [55]:
# f1 score
print(f1_score(y_train, y_train_pred, average='micro'))
print(f1_score(y_test, y_test_pred, average='micro'))

0.9997236205705261
0.9985575189325641


In [56]:
# precision
print(precision_score(y_train, y_train_pred, average='micro'))
print(precision_score(y_test, y_test_pred, average='micro'))

0.9997236205705261
0.9985575189325641


In [57]:
# recall
print(recall_score(y_train, y_train_pred, average='micro'))
print(recall_score(y_test, y_test_pred, average='micro'))

0.9997236205705261
0.9985575189325641


In [58]:
# Save logreg model
joblib.dump(best_estimator, './training/grupClass_logreg.pkl')

['./training/grupClass_logreg.pkl']

## 2. Decision Tree

In [59]:
from sklearn.tree import DecisionTreeClassifier

In [60]:
destree = DecisionTreeClassifier(random_state=42
#                                class_weight={3: 3.,
#                                              2: 1.5,
#                                              1: 1.5}
                                )

In [61]:
# Grid Search
parameters2 = [{
    'max_depth': [3, 4, 5, 7, 9, 10],
    'min_samples_leaf': [4, 5, 6],
    'min_samples_split': [5, 6, 7, 8, 9, 10, 11, 12],
    'max_features': ['auto', 'log2']
}]

grid_search2 = GridSearchCV(estimator=destree,
                          param_grid=parameters2,
                          scoring=['accuracy', 'f1_macro',
                                  'f1_micro'],
                          cv=5,
                          verbose=2,
                          refit='f1_micro',
                          n_jobs=-1)

In [62]:
grid_search2.fit(X_train, y_train)

Fitting 5 folds for each of 288 candidates, totalling 1440 fits
[CV] max_depth=3, max_features=auto, min_samples_leaf=4, min_samples_split=5 
[CV] max_depth=3, max_features=auto, min_samples_leaf=4, min_samples_split=5 
[CV] max_depth=3, max_features=auto, min_samples_leaf=4, min_samples_split=5 
[CV] max_depth=3, max_features=auto, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=3, max_features=auto, min_samples_leaf=4, min_samples_split=5, total=   0.1s
[CV] max_depth=3, max_features=auto, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=3, max_features=auto, min_samples_leaf=4, min_samples_split=5, total=   0.0s
[CV]  max_depth=3, max_features=auto, min_samples_leaf=4, min_samples_split=5, total=   0.1s
[CV] max_depth=3, max_features=auto, min_samples_leaf=4, min_samples_split=6 
[CV] max_depth=3, max_features=auto, min_samples_leaf=4, min_samples_split=6 
[CV]  max_depth=3, max_features=auto, min_samples_leaf=4, min_samples_split=5, total=   0.1s
[CV] max_depth=3, 

[CV]  max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=5, total=   0.1s
[CV] max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=6 
[CV] max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=7, total=   0.1s
[CV] max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=6, total=   0.1s
[CV] max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=6 
[CV]  max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV]  max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=7, total=   0.0s
[CV] max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=7, total=   0.1s
[CV] max_depth=3, max_features=auto, min_samples_lea

[Parallel(n_jobs=-1)]: Done  58 tasks      | elapsed:    1.9s


[CV]  max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=10, total=   0.0s
[CV] max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=10 
[CV]  max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=11, total=   0.0s
[CV] max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=11 
[CV]  max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=11, total=   0.1s
[CV]  max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=10, total=   0.0s
[CV]  max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=10, total=   0.0s
[CV] max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=11 
[CV] max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=11 
[CV] max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=12 
[CV]  max_depth=3, max_features=auto, min_samples_leaf=5, min_samples_split=11, total=   0.0s
[CV] max_depth=3, max_features=auto, min_

[CV] max_depth=3, max_features=auto, min_samples_leaf=6, min_samples_split=12 
[CV]  max_depth=3, max_features=auto, min_samples_leaf=6, min_samples_split=12, total=   0.0s
[CV] max_depth=3, max_features=log2, min_samples_leaf=4, min_samples_split=5 
[CV] max_depth=3, max_features=log2, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=3, max_features=auto, min_samples_leaf=6, min_samples_split=12, total=   0.0s
[CV] max_depth=3, max_features=auto, min_samples_leaf=6, min_samples_split=12 
[CV]  max_depth=3, max_features=log2, min_samples_leaf=4, min_samples_split=5, total=   0.0s
[CV] max_depth=3, max_features=log2, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=3, max_features=auto, min_samples_leaf=6, min_samples_split=12, total=   0.0s
[CV]  max_depth=3, max_features=auto, min_samples_leaf=6, min_samples_split=12, total=   0.0s
[CV] max_depth=3, max_features=log2, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=3, max_features=log2, min_samples_leaf=4, min

[CV] max_depth=3, max_features=log2, min_samples_leaf=5, min_samples_split=6 
[CV] max_depth=3, max_features=log2, min_samples_leaf=5, min_samples_split=6 
[CV]  max_depth=3, max_features=log2, min_samples_leaf=5, min_samples_split=5, total=   0.0s
[CV]  max_depth=3, max_features=log2, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV] max_depth=3, max_features=log2, min_samples_leaf=5, min_samples_split=6 
[CV] max_depth=3, max_features=log2, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=3, max_features=log2, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV]  max_depth=3, max_features=log2, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV] max_depth=3, max_features=log2, min_samples_leaf=5, min_samples_split=6 
[CV] max_depth=3, max_features=log2, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=3, max_features=log2, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV]  max_depth=3, max_features=log2, min_samples_leaf=5, min_sampl

[CV]  max_depth=3, max_features=log2, min_samples_leaf=6, min_samples_split=7, total=   0.0s
[CV] max_depth=3, max_features=log2, min_samples_leaf=6, min_samples_split=7 
[CV] max_depth=3, max_features=log2, min_samples_leaf=6, min_samples_split=8 
[CV] max_depth=3, max_features=log2, min_samples_leaf=6, min_samples_split=8 
[CV]  max_depth=3, max_features=log2, min_samples_leaf=6, min_samples_split=7, total=   0.0s
[CV] max_depth=3, max_features=log2, min_samples_leaf=6, min_samples_split=8 
[CV]  max_depth=3, max_features=log2, min_samples_leaf=6, min_samples_split=8, total=   0.0s
[CV]  max_depth=3, max_features=log2, min_samples_leaf=6, min_samples_split=8, total=   0.0s
[CV] max_depth=3, max_features=log2, min_samples_leaf=6, min_samples_split=8 
[CV]  max_depth=3, max_features=log2, min_samples_leaf=6, min_samples_split=7, total=   0.0s
[CV] max_depth=3, max_features=log2, min_samples_leaf=6, min_samples_split=9 
[CV]  max_depth=3, max_features=log2, min_samples_leaf=6, min_sampl

[CV]  max_depth=4, max_features=auto, min_samples_leaf=4, min_samples_split=9, total=   0.0s
[CV]  max_depth=4, max_features=auto, min_samples_leaf=4, min_samples_split=8, total=   0.1s
[CV] max_depth=4, max_features=auto, min_samples_leaf=4, min_samples_split=9 
[CV] max_depth=4, max_features=auto, min_samples_leaf=4, min_samples_split=10 
[CV]  max_depth=4, max_features=auto, min_samples_leaf=4, min_samples_split=9, total=   0.1s
[CV]  max_depth=4, max_features=auto, min_samples_leaf=4, min_samples_split=9, total=   0.0s
[CV] max_depth=4, max_features=auto, min_samples_leaf=4, min_samples_split=9 
[CV] max_depth=4, max_features=auto, min_samples_leaf=4, min_samples_split=10 
[CV]  max_depth=4, max_features=auto, min_samples_leaf=4, min_samples_split=9, total=   0.0s
[CV] max_depth=4, max_features=auto, min_samples_leaf=4, min_samples_split=10 
[CV]  max_depth=4, max_features=auto, min_samples_leaf=4, min_samples_split=10, total=   0.1s
[CV]  max_depth=4, max_features=auto, min_sample

[CV]  max_depth=4, max_features=auto, min_samples_leaf=5, min_samples_split=10, total=   0.0s


[Parallel(n_jobs=-1)]: Done 300 tasks      | elapsed:    8.0s


[CV]  max_depth=4, max_features=auto, min_samples_leaf=5, min_samples_split=10, total=   0.0s
[CV] max_depth=4, max_features=auto, min_samples_leaf=5, min_samples_split=10 
[CV] max_depth=4, max_features=auto, min_samples_leaf=5, min_samples_split=11 
[CV]  max_depth=4, max_features=auto, min_samples_leaf=5, min_samples_split=11, total=   0.0s
[CV] max_depth=4, max_features=auto, min_samples_leaf=5, min_samples_split=11 
[CV]  max_depth=4, max_features=auto, min_samples_leaf=5, min_samples_split=11, total=   0.0s
[CV] max_depth=4, max_features=auto, min_samples_leaf=5, min_samples_split=11 
[CV]  max_depth=4, max_features=auto, min_samples_leaf=5, min_samples_split=11, total=   0.0s
[CV] max_depth=4, max_features=auto, min_samples_leaf=5, min_samples_split=12 
[CV]  max_depth=4, max_features=auto, min_samples_leaf=5, min_samples_split=11, total=   0.0s
[CV] max_depth=4, max_features=auto, min_samples_leaf=5, min_samples_split=12 
[CV]  max_depth=4, max_features=auto, min_samples_leaf=5

[CV] max_depth=4, max_features=log2, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=4, max_features=auto, min_samples_leaf=6, min_samples_split=12, total=   0.0s
[CV]  max_depth=4, max_features=auto, min_samples_leaf=6, min_samples_split=12, total=   0.0s
[CV] max_depth=4, max_features=auto, min_samples_leaf=6, min_samples_split=12 
[CV] max_depth=4, max_features=log2, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=4, max_features=auto, min_samples_leaf=6, min_samples_split=12, total=   0.1s
[CV]  max_depth=4, max_features=log2, min_samples_leaf=4, min_samples_split=5, total=   0.0s
[CV] max_depth=4, max_features=log2, min_samples_leaf=4, min_samples_split=5 
[CV] max_depth=4, max_features=log2, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=4, max_features=log2, min_samples_leaf=4, min_samples_split=5, total=   0.0s
[CV] max_depth=4, max_features=log2, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=4, max_features=auto, min_samples_leaf=6, min_s

[CV]  max_depth=4, max_features=log2, min_samples_leaf=5, min_samples_split=5, total=   0.1s
[CV] max_depth=4, max_features=log2, min_samples_leaf=5, min_samples_split=6 
[CV] max_depth=4, max_features=log2, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=4, max_features=log2, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV] max_depth=4, max_features=log2, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=4, max_features=log2, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV] max_depth=4, max_features=log2, min_samples_leaf=5, min_samples_split=6 
[CV]  max_depth=4, max_features=log2, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV]  max_depth=4, max_features=log2, min_samples_leaf=5, min_samples_split=7, total=   0.0s
[CV] max_depth=4, max_features=log2, min_samples_leaf=5, min_samples_split=6 
[CV] max_depth=4, max_features=log2, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=4, max_features=log2, min_samples_leaf=5, min_sampl

[CV] max_depth=4, max_features=log2, min_samples_leaf=6, min_samples_split=8 
[CV] max_depth=4, max_features=log2, min_samples_leaf=6, min_samples_split=7 
[CV]  max_depth=4, max_features=log2, min_samples_leaf=6, min_samples_split=7, total=   0.1s
[CV] max_depth=4, max_features=log2, min_samples_leaf=6, min_samples_split=8 
[CV] max_depth=4, max_features=log2, min_samples_leaf=6, min_samples_split=8 
[CV]  max_depth=4, max_features=log2, min_samples_leaf=6, min_samples_split=8, total=   0.0s
[CV] max_depth=4, max_features=log2, min_samples_leaf=6, min_samples_split=8 
[CV]  max_depth=4, max_features=log2, min_samples_leaf=6, min_samples_split=7, total=   0.0s
[CV] max_depth=4, max_features=log2, min_samples_leaf=6, min_samples_split=9 
[CV]  max_depth=4, max_features=log2, min_samples_leaf=6, min_samples_split=8, total=   0.1s
[CV] max_depth=4, max_features=log2, min_samples_leaf=6, min_samples_split=9 
[CV]  max_depth=4, max_features=log2, min_samples_leaf=6, min_samples_split=8, tot

[CV] max_depth=5, max_features=auto, min_samples_leaf=4, min_samples_split=9 
[CV]  max_depth=5, max_features=auto, min_samples_leaf=4, min_samples_split=9, total=   0.1s
[CV]  max_depth=5, max_features=auto, min_samples_leaf=4, min_samples_split=9, total=   0.1s
[CV] max_depth=5, max_features=auto, min_samples_leaf=4, min_samples_split=10 
[CV] max_depth=5, max_features=auto, min_samples_leaf=4, min_samples_split=9 
[CV]  max_depth=5, max_features=auto, min_samples_leaf=4, min_samples_split=8, total=   0.1s
[CV]  max_depth=5, max_features=auto, min_samples_leaf=4, min_samples_split=9, total=   0.1s
[CV] max_depth=5, max_features=auto, min_samples_leaf=4, min_samples_split=10 
[CV] max_depth=5, max_features=auto, min_samples_leaf=4, min_samples_split=10 
[CV]  max_depth=5, max_features=auto, min_samples_leaf=4, min_samples_split=9, total=   0.1s
[CV]  max_depth=5, max_features=auto, min_samples_leaf=4, min_samples_split=10, total=   0.1s
[CV] max_depth=5, max_features=auto, min_samples

[CV] max_depth=5, max_features=auto, min_samples_leaf=5, min_samples_split=11 
[CV]  max_depth=5, max_features=auto, min_samples_leaf=5, min_samples_split=10, total=   0.1s
[CV] max_depth=5, max_features=auto, min_samples_leaf=5, min_samples_split=11 
[CV]  max_depth=5, max_features=auto, min_samples_leaf=5, min_samples_split=11, total=   0.1s
[CV]  max_depth=5, max_features=auto, min_samples_leaf=5, min_samples_split=10, total=   0.1s
[CV] max_depth=5, max_features=auto, min_samples_leaf=5, min_samples_split=11 
[CV]  max_depth=5, max_features=auto, min_samples_leaf=5, min_samples_split=11, total=   0.1s
[CV] max_depth=5, max_features=auto, min_samples_leaf=5, min_samples_split=11 
[CV] max_depth=5, max_features=auto, min_samples_leaf=5, min_samples_split=12 
[CV]  max_depth=5, max_features=auto, min_samples_leaf=5, min_samples_split=11, total=   0.0s
[CV] max_depth=5, max_features=auto, min_samples_leaf=5, min_samples_split=12 
[CV]  max_depth=5, max_features=auto, min_samples_leaf=5

[CV] max_depth=5, max_features=log2, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=5, max_features=auto, min_samples_leaf=6, min_samples_split=12, total=   0.0s
[CV] max_depth=5, max_features=auto, min_samples_leaf=6, min_samples_split=12 
[CV]  max_depth=5, max_features=auto, min_samples_leaf=6, min_samples_split=12, total=   0.1s
[CV] max_depth=5, max_features=log2, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=5, max_features=auto, min_samples_leaf=6, min_samples_split=12, total=   0.1s
[CV]  max_depth=5, max_features=auto, min_samples_leaf=6, min_samples_split=12, total=   0.1s
[CV] max_depth=5, max_features=log2, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=5, max_features=log2, min_samples_leaf=4, min_samples_split=5, total=   0.1s
[CV] max_depth=5, max_features=log2, min_samples_leaf=4, min_samples_split=5 
[CV] max_depth=5, max_features=log2, min_samples_leaf=4, min_samples_split=6 
[CV]  max_depth=5, max_features=log2, min_samples_leaf=4, min_

[CV] max_depth=5, max_features=log2, min_samples_leaf=5, min_samples_split=6 
[CV]  max_depth=5, max_features=log2, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV] max_depth=5, max_features=log2, min_samples_leaf=5, min_samples_split=6 
[CV]  max_depth=5, max_features=log2, min_samples_leaf=5, min_samples_split=5, total=   0.1s
[CV]  max_depth=5, max_features=log2, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV] max_depth=5, max_features=log2, min_samples_leaf=5, min_samples_split=6 
[CV] max_depth=5, max_features=log2, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=5, max_features=log2, min_samples_leaf=5, min_samples_split=6, total=   0.1s
[CV] max_depth=5, max_features=log2, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=5, max_features=log2, min_samples_leaf=5, min_samples_split=6, total=   0.1s
[CV]  max_depth=5, max_features=log2, min_samples_leaf=5, min_samples_split=6, total=   0.1s
[CV] max_depth=5, max_features=log2, min_samples_lea

[CV] max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=8 
[CV]  max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=7, total=   0.0s
[CV]  max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=7, total=   0.1s
[CV] max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=8 
[CV] max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=8 
[CV]  max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=7, total=   0.1s
[CV]  max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=8, total=   0.0s
[CV] max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=8 
[CV] max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=9 
[CV]  max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=8, total=   0.0s
[CV]  max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=8, total=   0.1s
[CV] max_depth=5, max_features=log2, min_samples_lea

[Parallel(n_jobs=-1)]: Done 706 tasks      | elapsed:   19.1s


[CV]  max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=12, total=   0.1s
[CV] max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=12 
[CV]  max_depth=7, max_features=auto, min_samples_leaf=4, min_samples_split=5, total=   0.0s
[CV] max_depth=7, max_features=auto, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=12, total=   0.1s
[CV]  max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=12, total=   0.1s
[CV] max_depth=7, max_features=auto, min_samples_leaf=4, min_samples_split=5 
[CV] max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=12 
[CV]  max_depth=5, max_features=log2, min_samples_leaf=6, min_samples_split=12, total=   0.1s
[CV] max_depth=7, max_features=auto, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=7, max_features=auto, min_samples_leaf=4, min_samples_split=5, total=   0.1s
[CV]  max_depth=5, max_features=log2, min_samp

[CV]  max_depth=7, max_features=auto, min_samples_leaf=5, min_samples_split=5, total=   0.1s
[CV]  max_depth=7, max_features=auto, min_samples_leaf=5, min_samples_split=6, total=   0.1s
[CV] max_depth=7, max_features=auto, min_samples_leaf=5, min_samples_split=6 
[CV] max_depth=7, max_features=auto, min_samples_leaf=5, min_samples_split=6 
[CV]  max_depth=7, max_features=auto, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV] max_depth=7, max_features=auto, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=7, max_features=auto, min_samples_leaf=5, min_samples_split=5, total=   0.1s
[CV]  max_depth=7, max_features=auto, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV] max_depth=7, max_features=auto, min_samples_leaf=5, min_samples_split=7 
[CV] max_depth=7, max_features=auto, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=7, max_features=auto, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV] max_depth=7, max_features=auto, min_samples_lea

[CV]  max_depth=7, max_features=auto, min_samples_leaf=6, min_samples_split=6, total=   0.0s
[CV]  max_depth=7, max_features=auto, min_samples_leaf=6, min_samples_split=7, total=   0.0s
[CV] max_depth=7, max_features=auto, min_samples_leaf=6, min_samples_split=8 
[CV] max_depth=7, max_features=auto, min_samples_leaf=6, min_samples_split=8 
[CV]  max_depth=7, max_features=auto, min_samples_leaf=6, min_samples_split=7, total=   0.1s
[CV] max_depth=7, max_features=auto, min_samples_leaf=6, min_samples_split=9 
[CV] max_depth=7, max_features=auto, min_samples_leaf=6, min_samples_split=8 
[CV]  max_depth=7, max_features=auto, min_samples_leaf=6, min_samples_split=7, total=   0.1s
[CV]  max_depth=7, max_features=auto, min_samples_leaf=6, min_samples_split=8, total=   0.0s
[CV] max_depth=7, max_features=auto, min_samples_leaf=6, min_samples_split=8 
[CV]  max_depth=7, max_features=auto, min_samples_leaf=6, min_samples_split=8, total=   0.1s
[CV] max_depth=7, max_features=auto, min_samples_lea

[CV] max_depth=7, max_features=log2, min_samples_leaf=4, min_samples_split=9 
[CV] max_depth=7, max_features=log2, min_samples_leaf=4, min_samples_split=9 
[CV]  max_depth=7, max_features=log2, min_samples_leaf=4, min_samples_split=9, total=   0.0s
[CV] max_depth=7, max_features=log2, min_samples_leaf=4, min_samples_split=9 
[CV]  max_depth=7, max_features=log2, min_samples_leaf=4, min_samples_split=8, total=   0.1s
[CV] max_depth=7, max_features=log2, min_samples_leaf=4, min_samples_split=10 
[CV]  max_depth=7, max_features=log2, min_samples_leaf=4, min_samples_split=9, total=   0.1s
[CV]  max_depth=7, max_features=log2, min_samples_leaf=4, min_samples_split=9, total=   0.0s
[CV]  max_depth=7, max_features=log2, min_samples_leaf=4, min_samples_split=9, total=   0.1s
[CV] max_depth=7, max_features=log2, min_samples_leaf=4, min_samples_split=10 
[CV] max_depth=7, max_features=log2, min_samples_leaf=4, min_samples_split=10 
[CV] max_depth=7, max_features=log2, min_samples_leaf=4, min_sam

[CV]  max_depth=7, max_features=log2, min_samples_leaf=5, min_samples_split=10, total=   0.0s
[CV] max_depth=7, max_features=log2, min_samples_leaf=5, min_samples_split=10 
[CV]  max_depth=7, max_features=log2, min_samples_leaf=5, min_samples_split=10, total=   0.0s
[CV] max_depth=7, max_features=log2, min_samples_leaf=5, min_samples_split=11 
[CV]  max_depth=7, max_features=log2, min_samples_leaf=5, min_samples_split=11, total=   0.0s
[CV] max_depth=7, max_features=log2, min_samples_leaf=5, min_samples_split=11 
[CV]  max_depth=7, max_features=log2, min_samples_leaf=5, min_samples_split=11, total=   0.1s
[CV] max_depth=7, max_features=log2, min_samples_leaf=5, min_samples_split=11 
[CV]  max_depth=7, max_features=log2, min_samples_leaf=5, min_samples_split=11, total=   0.0s
[CV] max_depth=7, max_features=log2, min_samples_leaf=5, min_samples_split=12 
[CV]  max_depth=7, max_features=log2, min_samples_leaf=5, min_samples_split=10, total=   0.1s
[CV]  max_depth=7, max_features=log2, min

[CV] max_depth=9, max_features=auto, min_samples_leaf=4, min_samples_split=5 
[CV] max_depth=9, max_features=auto, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=7, max_features=log2, min_samples_leaf=6, min_samples_split=12, total=   0.1s
[CV]  max_depth=7, max_features=log2, min_samples_leaf=6, min_samples_split=12, total=   0.1s
[CV] max_depth=7, max_features=log2, min_samples_leaf=6, min_samples_split=12 
[CV] max_depth=7, max_features=log2, min_samples_leaf=6, min_samples_split=12 
[CV]  max_depth=9, max_features=auto, min_samples_leaf=4, min_samples_split=5, total=   0.0s
[CV] max_depth=9, max_features=auto, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=9, max_features=auto, min_samples_leaf=4, min_samples_split=5, total=   0.0s
[CV] max_depth=9, max_features=auto, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=7, max_features=log2, min_samples_leaf=6, min_samples_split=12, total=   0.0s
[CV]  max_depth=7, max_features=log2, min_samples_leaf=6, min_

[CV] max_depth=9, max_features=auto, min_samples_leaf=5, min_samples_split=6 
[CV] max_depth=9, max_features=auto, min_samples_leaf=5, min_samples_split=6 
[CV]  max_depth=9, max_features=auto, min_samples_leaf=5, min_samples_split=5, total=   0.1s
[CV] max_depth=9, max_features=auto, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=9, max_features=auto, min_samples_leaf=5, min_samples_split=6, total=   0.1s
[CV] max_depth=9, max_features=auto, min_samples_leaf=5, min_samples_split=6 
[CV]  max_depth=9, max_features=auto, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV]  max_depth=9, max_features=auto, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV] max_depth=9, max_features=auto, min_samples_leaf=5, min_samples_split=6 
[CV]  max_depth=9, max_features=auto, min_samples_leaf=5, min_samples_split=7, total=   0.0s
[CV] max_depth=9, max_features=auto, min_samples_leaf=5, min_samples_split=7 
[CV] max_depth=9, max_features=auto, min_samples_leaf=5, min_sample

[CV] max_depth=9, max_features=auto, min_samples_leaf=6, min_samples_split=8 
[CV] max_depth=9, max_features=auto, min_samples_leaf=6, min_samples_split=7 
[CV]  max_depth=9, max_features=auto, min_samples_leaf=6, min_samples_split=7, total=   0.0s
[CV] max_depth=9, max_features=auto, min_samples_leaf=6, min_samples_split=8 
[CV]  max_depth=9, max_features=auto, min_samples_leaf=6, min_samples_split=7, total=   0.0s
[CV] max_depth=9, max_features=auto, min_samples_leaf=6, min_samples_split=9 
[CV]  max_depth=9, max_features=auto, min_samples_leaf=6, min_samples_split=7, total=   0.1s
[CV]  max_depth=9, max_features=auto, min_samples_leaf=6, min_samples_split=8, total=   0.1s
[CV] max_depth=9, max_features=auto, min_samples_leaf=6, min_samples_split=8 
[CV] max_depth=9, max_features=auto, min_samples_leaf=6, min_samples_split=8 
[CV]  max_depth=9, max_features=auto, min_samples_leaf=6, min_samples_split=8, total=   0.0s
[CV] max_depth=9, max_features=auto, min_samples_leaf=6, min_sample

[CV]  max_depth=9, max_features=log2, min_samples_leaf=4, min_samples_split=9, total=   0.1s
[CV] max_depth=9, max_features=log2, min_samples_leaf=4, min_samples_split=9 
[CV]  max_depth=9, max_features=log2, min_samples_leaf=4, min_samples_split=8, total=   0.0s
[CV] max_depth=9, max_features=log2, min_samples_leaf=4, min_samples_split=10 
[CV]  max_depth=9, max_features=log2, min_samples_leaf=4, min_samples_split=9, total=   0.1s
[CV] max_depth=9, max_features=log2, min_samples_leaf=4, min_samples_split=9 
[CV]  max_depth=9, max_features=log2, min_samples_leaf=4, min_samples_split=9, total=   0.0s
[CV] max_depth=9, max_features=log2, min_samples_leaf=4, min_samples_split=10 
[CV]  max_depth=9, max_features=log2, min_samples_leaf=4, min_samples_split=9, total=   0.1s
[CV]  max_depth=9, max_features=log2, min_samples_leaf=4, min_samples_split=10, total=   0.1s
[CV] max_depth=9, max_features=log2, min_samples_leaf=4, min_samples_split=10 
[CV]  max_depth=9, max_features=log2, min_sample

[CV]  max_depth=9, max_features=log2, min_samples_leaf=5, min_samples_split=10, total=   0.0s
[CV] max_depth=9, max_features=log2, min_samples_leaf=5, min_samples_split=11 
[CV]  max_depth=9, max_features=log2, min_samples_leaf=5, min_samples_split=10, total=   0.1s
[CV]  max_depth=9, max_features=log2, min_samples_leaf=5, min_samples_split=10, total=   0.0s
[CV]  max_depth=9, max_features=log2, min_samples_leaf=5, min_samples_split=11, total=   0.1s
[CV] max_depth=9, max_features=log2, min_samples_leaf=5, min_samples_split=11 
[CV] max_depth=9, max_features=log2, min_samples_leaf=5, min_samples_split=11 
[CV] max_depth=9, max_features=log2, min_samples_leaf=5, min_samples_split=12 
[CV]  max_depth=9, max_features=log2, min_samples_leaf=5, min_samples_split=11, total=   0.0s
[CV] max_depth=9, max_features=log2, min_samples_leaf=5, min_samples_split=11 
[CV]  max_depth=9, max_features=log2, min_samples_leaf=5, min_samples_split=11, total=   0.0s
[CV] max_depth=9, max_features=log2, min_

[CV]  max_depth=9, max_features=log2, min_samples_leaf=6, min_samples_split=12, total=   0.0s
[CV] max_depth=10, max_features=auto, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=9, max_features=log2, min_samples_leaf=6, min_samples_split=12, total=   0.0s
[CV] max_depth=9, max_features=log2, min_samples_leaf=6, min_samples_split=12 
[CV]  max_depth=9, max_features=log2, min_samples_leaf=6, min_samples_split=12, total=   0.1s
[CV] max_depth=9, max_features=log2, min_samples_leaf=6, min_samples_split=12 
[CV]  max_depth=10, max_features=auto, min_samples_leaf=4, min_samples_split=5, total=   0.0s
[CV] max_depth=10, max_features=auto, min_samples_leaf=4, min_samples_split=5 
[CV]  max_depth=9, max_features=log2, min_samples_leaf=6, min_samples_split=12, total=   0.0s
[CV]  max_depth=10, max_features=auto, min_samples_leaf=4, min_samples_split=5, total=   0.0s
[CV] max_depth=10, max_features=auto, min_samples_leaf=4, min_samples_split=5 
[CV] max_depth=10, max_features=auto, min

[CV] max_depth=10, max_features=auto, min_samples_leaf=5, min_samples_split=6 
[CV]  max_depth=10, max_features=auto, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV]  max_depth=10, max_features=auto, min_samples_leaf=5, min_samples_split=5, total=   0.1s
[CV] max_depth=10, max_features=auto, min_samples_leaf=5, min_samples_split=6 
[CV] max_depth=10, max_features=auto, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=10, max_features=auto, min_samples_leaf=5, min_samples_split=5, total=   0.1s
[CV]  max_depth=10, max_features=auto, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV] max_depth=10, max_features=auto, min_samples_leaf=5, min_samples_split=6 
[CV] max_depth=10, max_features=auto, min_samples_leaf=5, min_samples_split=7 
[CV]  max_depth=10, max_features=auto, min_samples_leaf=5, min_samples_split=6, total=   0.0s
[CV] max_depth=10, max_features=auto, min_samples_leaf=5, min_samples_split=6 
[CV]  max_depth=10, max_features=auto, min_samples_leaf=

[Parallel(n_jobs=-1)]: Done 1272 tasks      | elapsed:   33.8s


[CV]  max_depth=10, max_features=auto, min_samples_leaf=6, min_samples_split=5, total=   0.0s
[CV] max_depth=10, max_features=auto, min_samples_leaf=6, min_samples_split=6 
[CV]  max_depth=10, max_features=auto, min_samples_leaf=6, min_samples_split=5, total=   0.1s
[CV] max_depth=10, max_features=auto, min_samples_leaf=6, min_samples_split=7 
[CV]  max_depth=10, max_features=auto, min_samples_leaf=6, min_samples_split=6, total=   0.0s
[CV]  max_depth=10, max_features=auto, min_samples_leaf=6, min_samples_split=6, total=   0.0s
[CV] max_depth=10, max_features=auto, min_samples_leaf=6, min_samples_split=6 
[CV] max_depth=10, max_features=auto, min_samples_leaf=6, min_samples_split=7 
[CV]  max_depth=10, max_features=auto, min_samples_leaf=6, min_samples_split=7, total=   0.0s
[CV] max_depth=10, max_features=auto, min_samples_leaf=6, min_samples_split=7 
[CV]  max_depth=10, max_features=auto, min_samples_leaf=6, min_samples_split=6, total=   0.1s
[CV]  max_depth=10, max_features=auto, mi

[CV]  max_depth=10, max_features=log2, min_samples_leaf=4, min_samples_split=6, total=   0.0s
[CV] max_depth=10, max_features=log2, min_samples_leaf=4, min_samples_split=7 
[CV] max_depth=10, max_features=log2, min_samples_leaf=4, min_samples_split=8 
[CV]  max_depth=10, max_features=log2, min_samples_leaf=4, min_samples_split=7, total=   0.1s
[CV]  max_depth=10, max_features=log2, min_samples_leaf=4, min_samples_split=7, total=   0.0s
[CV] max_depth=10, max_features=log2, min_samples_leaf=4, min_samples_split=8 
[CV] max_depth=10, max_features=log2, min_samples_leaf=4, min_samples_split=8 
[CV]  max_depth=10, max_features=log2, min_samples_leaf=4, min_samples_split=7, total=   0.1s
[CV] max_depth=10, max_features=log2, min_samples_leaf=4, min_samples_split=9 
[CV]  max_depth=10, max_features=log2, min_samples_leaf=4, min_samples_split=8, total=   0.0s
[CV] max_depth=10, max_features=log2, min_samples_leaf=4, min_samples_split=8 
[CV]  max_depth=10, max_features=log2, min_samples_leaf=

[CV]  max_depth=10, max_features=log2, min_samples_leaf=5, min_samples_split=7, total=   0.1s
[CV] max_depth=10, max_features=log2, min_samples_leaf=5, min_samples_split=9 
[CV]  max_depth=10, max_features=log2, min_samples_leaf=5, min_samples_split=9, total=   0.0s
[CV] max_depth=10, max_features=log2, min_samples_leaf=5, min_samples_split=9 
[CV] max_depth=10, max_features=log2, min_samples_leaf=5, min_samples_split=9 
[CV] max_depth=10, max_features=log2, min_samples_leaf=5, min_samples_split=10 
[CV]  max_depth=10, max_features=log2, min_samples_leaf=5, min_samples_split=9, total=   0.0s
[CV] max_depth=10, max_features=log2, min_samples_leaf=5, min_samples_split=9 
[CV]  max_depth=10, max_features=log2, min_samples_leaf=5, min_samples_split=9, total=   0.0s
[CV] max_depth=10, max_features=log2, min_samples_leaf=5, min_samples_split=10 
[CV]  max_depth=10, max_features=log2, min_samples_leaf=5, min_samples_split=9, total=   0.1s
[CV] max_depth=10, max_features=log2, min_samples_leaf

[CV]  max_depth=10, max_features=log2, min_samples_leaf=6, min_samples_split=10, total=   0.1s
[CV] max_depth=10, max_features=log2, min_samples_leaf=6, min_samples_split=11 
[CV]  max_depth=10, max_features=log2, min_samples_leaf=6, min_samples_split=11, total=   0.0s
[CV] max_depth=10, max_features=log2, min_samples_leaf=6, min_samples_split=10 
[CV] max_depth=10, max_features=log2, min_samples_leaf=6, min_samples_split=11 
[CV]  max_depth=10, max_features=log2, min_samples_leaf=6, min_samples_split=10, total=   0.1s
[CV] max_depth=10, max_features=log2, min_samples_leaf=6, min_samples_split=10 
[CV]  max_depth=10, max_features=log2, min_samples_leaf=6, min_samples_split=11, total=   0.0s
[CV] max_depth=10, max_features=log2, min_samples_leaf=6, min_samples_split=11 
[CV]  max_depth=10, max_features=log2, min_samples_leaf=6, min_samples_split=11, total=   0.0s
[CV]  max_depth=10, max_features=log2, min_samples_leaf=6, min_samples_split=10, total=   0.0s
[CV] max_depth=10, max_feature

[Parallel(n_jobs=-1)]: Done 1440 out of 1440 | elapsed:   37.9s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=42,
            splitter='best'),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid=[{'max_depth': [3, 4, 5, 7, 9, 10], 'min_samples_leaf': [4, 5, 6], 'min_samples_split': [5, 6, 7, 8, 9, 10, 11, 12], 'max_features': ['auto', 'log2']}],
       pre_dispatch='2*n_jobs', refit='f1_micro',
       return_train_score='warn',
       scoring=['accuracy', 'f1_macro', 'f1_micro'], verbose=2)

In [65]:
best_parameters2 = grid_search2.best_params_
best_estimator2 = grid_search2.best_estimator_

In [66]:
best_parameters2

{'max_depth': 9,
 'max_features': 'auto',
 'min_samples_leaf': 4,
 'min_samples_split': 10}

In [67]:
grid_search2.best_score_

0.9985193959135327

In [68]:
# confusion matrix
y_train_pred2 = cross_val_predict(best_estimator2, X_train, y_train, cv=5,
                                verbose=2, n_jobs=-1)

conf_mx2 = confusion_matrix(y_train, y_train_pred2)
conf_mx2

[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.1s finished


array([[16826,     0,    59],
       [    2, 16883,     0],
       [   14,     0, 16871]])

In [69]:
# confusion matrix
y_test_pred2 = cross_val_predict(best_estimator2, X_test, y_test, cv=5,
                                verbose=2, n_jobs=-1)

conf_mx_test2 = confusion_matrix(y_test, y_test_pred2)
conf_mx_test2

[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.0s finished


array([[3947,    1,   10],
       [   1,  138,    0],
       [  11,    0, 4211]])

In [70]:
# f1 score
print(f1_score(y_train, y_train_pred2, average='micro'))
print(f1_score(y_test, y_test_pred2, average='micro'))

0.9985193959135327
0.9972352446207476


In [86]:
# precision
print(precision_score(y_train, y_train_pred2, average='micro'))
print(precision_score(y_test, y_test_pred2, average='micro'))

0.9985193959135327
0.9972352446207476


In [87]:
# recall
print(recall_score(y_train, y_train_pred2, average='micro'))
print(recall_score(y_test, y_test_pred2, average='micro'))

0.9985193959135327
0.9972352446207476


In [73]:
# Save destree model
joblib.dump(best_estimator2, './training/grupClass_destree.pkl')

['./training/grupClass_destree.pkl']

## 3. C-Support Vector Classification

In [74]:
from sklearn.svm import SVC

In [75]:
suvec = SVC(random_state=42, 
           decision_function_shape='ovo'
#           class_weight={3: 3.,
#                         2: 1.5,
#                         1: 1.5}
           )

In [76]:
suvec.get_params().keys()

dict_keys(['C', 'cache_size', 'class_weight', 'coef0', 'decision_function_shape', 'degree', 'gamma', 'kernel', 'max_iter', 'probability', 'random_state', 'shrinking', 'tol', 'verbose'])

In [77]:
# Grid Search
parameters3 = [{
    'C': [0.001, 0.01, 0.1, 1, 10],
    'gamma': [0.001, 0.01, 0.1, 1],
    'kernel': ['rbf', 'poly', 'sigmoid']
}]

grid_search3 = GridSearchCV(estimator=suvec,
                          param_grid=parameters3,
                          scoring=['accuracy', 'f1_macro',
                                  'f1_micro'],
                          cv=5,
                          verbose=2,
                          refit='f1_micro',
                          n_jobs=-1)

In [78]:
grid_search3.fit(X_train, y_train)

Fitting 5 folds for each of 60 candidates, totalling 300 fits
[CV] C=0.001, gamma=0.001, kernel=rbf ................................
[CV] C=0.001, gamma=0.001, kernel=rbf ................................
[CV] C=0.001, gamma=0.001, kernel=rbf ................................
[CV] C=0.001, gamma=0.001, kernel=rbf ................................
[CV] ................. C=0.001, gamma=0.001, kernel=rbf, total= 4.1min
[CV] C=0.001, gamma=0.001, kernel=rbf ................................
[CV] ................. C=0.001, gamma=0.001, kernel=rbf, total= 4.1min
[CV] C=0.001, gamma=0.001, kernel=poly ...............................
[CV] ................. C=0.001, gamma=0.001, kernel=rbf, total= 4.1min
[CV] C=0.001, gamma=0.001, kernel=poly ...............................
[CV] ................. C=0.001, gamma=0.001, kernel=rbf, total= 4.2min
[CV] C=0.001, gamma=0.001, kernel=poly ...............................
[CV] ................ C=0.001, gamma=0.001, kernel=poly, total= 1.9min
[CV] C=0.001, g

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 51.0min


[CV] ................... C=0.001, gamma=0.1, kernel=rbf, total= 1.3min
[CV] C=0.001, gamma=0.1, kernel=poly .................................
[CV] .................. C=0.001, gamma=0.1, kernel=poly, total=  45.4s
[CV] C=0.001, gamma=0.1, kernel=poly .................................
[CV] .................. C=0.001, gamma=0.1, kernel=poly, total=  52.7s
[CV] C=0.001, gamma=0.1, kernel=poly .................................
[CV] .................. C=0.001, gamma=0.1, kernel=poly, total=  50.8s
[CV] C=0.001, gamma=0.1, kernel=sigmoid ..............................
[CV] ................... C=0.001, gamma=0.1, kernel=rbf, total= 1.4min
[CV] C=0.001, gamma=0.1, kernel=sigmoid ..............................
[CV] .................. C=0.001, gamma=0.1, kernel=poly, total=  52.1s
[CV] C=0.001, gamma=0.1, kernel=sigmoid ..............................
[CV] .................. C=0.001, gamma=0.1, kernel=poly, total=  47.5s
[CV] C=0.001, gamma=0.1, kernel=sigmoid ..............................
[CV] .

[CV] .................... C=0.01, gamma=0.1, kernel=rbf, total=  26.8s
[CV] C=0.01, gamma=0.1, kernel=poly ..................................
[CV] .................... C=0.01, gamma=0.1, kernel=rbf, total=  27.0s
[CV] C=0.01, gamma=0.1, kernel=poly ..................................
[CV] .................... C=0.01, gamma=0.1, kernel=rbf, total=  27.2s
[CV] C=0.01, gamma=0.1, kernel=poly ..................................
[CV] ............... C=0.01, gamma=0.01, kernel=sigmoid, total= 1.4min
[CV] C=0.01, gamma=0.1, kernel=poly ..................................
[CV] ................... C=0.01, gamma=0.1, kernel=poly, total=  38.3s
[CV] C=0.01, gamma=0.1, kernel=poly ..................................
[CV] ................... C=0.01, gamma=0.1, kernel=poly, total=  42.4s
[CV] C=0.01, gamma=0.1, kernel=sigmoid ...............................
[CV] ................... C=0.01, gamma=0.1, kernel=poly, total=  40.1s
[CV] C=0.01, gamma=0.1, kernel=sigmoid ...............................
[CV] .

[CV] ..................... C=0.1, gamma=0.1, kernel=rbf, total=  12.1s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ..................... C=0.1, gamma=0.1, kernel=rbf, total=  11.5s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ................ C=0.1, gamma=0.01, kernel=sigmoid, total=  33.1s
[CV] C=0.1, gamma=0.1, kernel=poly ...................................
[CV] ................ C=0.1, gamma=0.01, kernel=sigmoid, total=  32.3s
[CV] C=0.1, gamma=0.1, kernel=poly ...................................
[CV] ..................... C=0.1, gamma=0.1, kernel=rbf, total=  11.6s
[CV] C=0.1, gamma=0.1, kernel=poly ...................................


[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed: 139.0min


[CV] ..................... C=0.1, gamma=0.1, kernel=rbf, total=  11.5s
[CV] C=0.1, gamma=0.1, kernel=poly ...................................
[CV] .................... C=0.1, gamma=0.1, kernel=poly, total=  26.8s
[CV] C=0.1, gamma=0.1, kernel=poly ...................................
[CV] .................... C=0.1, gamma=0.1, kernel=poly, total=  27.6s
[CV] C=0.1, gamma=0.1, kernel=sigmoid ................................
[CV] .................... C=0.1, gamma=0.1, kernel=poly, total=  27.6s
[CV] C=0.1, gamma=0.1, kernel=sigmoid ................................
[CV] .................... C=0.1, gamma=0.1, kernel=poly, total=  28.0s
[CV] C=0.1, gamma=0.1, kernel=sigmoid ................................
[CV] .................... C=0.1, gamma=0.1, kernel=poly, total=  26.8s
[CV] C=0.1, gamma=0.1, kernel=sigmoid ................................
[CV] ................. C=0.1, gamma=0.1, kernel=sigmoid, total=  38.9s
[CV] C=0.1, gamma=0.1, kernel=sigmoid ................................
[CV] .

[CV] ....................... C=1, gamma=0.1, kernel=rbf, total=   5.6s
[CV] C=1, gamma=0.1, kernel=poly .....................................
[CV] ..................... C=1, gamma=0.01, kernel=poly, total=  46.1s
[CV] C=1, gamma=0.1, kernel=poly .....................................
[CV] ....................... C=1, gamma=0.1, kernel=rbf, total=   5.4s
[CV] C=1, gamma=0.1, kernel=poly .....................................
[CV] ...................... C=1, gamma=0.1, kernel=poly, total=  19.0s
[CV] C=1, gamma=0.1, kernel=poly .....................................
[CV] ...................... C=1, gamma=0.1, kernel=poly, total=  19.3s
[CV] C=1, gamma=0.1, kernel=sigmoid ..................................
[CV] ...................... C=1, gamma=0.1, kernel=poly, total=  19.3s
[CV] C=1, gamma=0.1, kernel=sigmoid ..................................
[CV] ...................... C=1, gamma=0.1, kernel=poly, total=  18.9s
[CV] C=1, gamma=0.1, kernel=sigmoid ..................................
[CV] .

[CV] ...................... C=10, gamma=0.1, kernel=rbf, total=   2.7s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV] ...................... C=10, gamma=0.1, kernel=rbf, total=   2.8s
[CV] ...................... C=10, gamma=0.1, kernel=rbf, total=   2.7s
[CV] C=10, gamma=0.1, kernel=poly ....................................
[CV] C=10, gamma=0.1, kernel=poly ....................................
[CV] ...................... C=10, gamma=0.1, kernel=rbf, total=   2.6s
[CV] C=10, gamma=0.1, kernel=poly ....................................
[CV] ..................... C=10, gamma=0.1, kernel=poly, total=  15.1s
[CV] C=10, gamma=0.1, kernel=poly ....................................
[CV] ..................... C=10, gamma=0.1, kernel=poly, total=  16.1s
[CV] C=10, gamma=0.1, kernel=poly ....................................
[CV] .................... C=10, gamma=0.01, kernel=poly, total=  36.1s
[CV] C=10, gamma=0.1, kernel=sigmoid .................................
[CV] .

[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed: 183.5min finished


GridSearchCV(cv=5, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovo', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=42, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid=[{'C': [0.001, 0.01, 0.1, 1, 10], 'gamma': [0.001, 0.01, 0.1, 1], 'kernel': ['rbf', 'poly', 'sigmoid']}],
       pre_dispatch='2*n_jobs', refit='f1_micro',
       return_train_score='warn',
       scoring=['accuracy', 'f1_macro', 'f1_micro'], verbose=2)

In [79]:
best_parameters3 = grid_search3.best_params_
best_estimator3 = grid_search3.best_estimator_

In [80]:
best_parameters3

{'C': 10, 'gamma': 1, 'kernel': 'rbf'}

In [81]:
grid_search3.best_score_

0.999822327509624

In [82]:
# confusion matrix
y_train_pred3 = cross_val_predict(best_estimator3, X_train, y_train, cv=5,
                                verbose=2, n_jobs=-1)


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    2.5s finished


In [83]:
conf_mx3 = confusion_matrix(y_train, y_train_pred3)
conf_mx3

array([[16881,     4,     0],
       [    0, 16885,     0],
       [    5,     0, 16880]])

In [88]:
# confusion matrix
y_test_pred3 = cross_val_predict(best_estimator3, X_test, y_test, cv=5,
                                verbose=2, n_jobs=-1)

[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.2s finished


In [89]:
conf_mx_test3 = confusion_matrix(y_test, y_test_pred3)
conf_mx_test3

array([[3955,    0,    3],
       [   1,  138,    0],
       [   9,    0, 4213]])

In [90]:
# f1 score
print(f1_score(y_train, y_train_pred3, average='micro'))
print(f1_score(y_test, y_test_pred3, average='micro'))

0.999822327509624
0.9984373121769443


In [91]:
# precision
print(precision_score(y_train, y_train_pred3, average='micro'))
print(precision_score(y_test, y_test_pred3, average='micro'))

0.999822327509624
0.9984373121769443


In [92]:
# recall
print(recall_score(y_train, y_train_pred3, average='micro'))
print(recall_score(y_test, y_test_pred3, average='micro'))

0.999822327509624
0.9984373121769443


In [93]:
# Save logreg model
joblib.dump(best_estimator3, './training/grupClass_svc.pkl')

['./training/grupClass_svc.pkl']

In [94]:
??GridSearchCV

In [37]:
estimator = joblib.load('./training/grupClass_svc.pkl')

In [38]:
# confusion matrix
y_train_pred = cross_val_predict(estimator, X_train, y_train, cv=5,
                                verbose=2, n_jobs=-1)

[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    1.5s finished


In [39]:
# confusion matrix
y_test_pred = cross_val_predict(estimator, X_test, y_test, cv=5,
                                verbose=2, n_jobs=-1)

[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.2s finished


In [40]:
# f1 score
print(f1_score(y_train, y_train_pred, average='macro'))
print(f1_score(y_test, y_test_pred, average='macro'))

0.9992774476331882
0.9977756537067718


In [41]:
# precision
print(precision_score(y_train, y_train_pred, average='macro'))
print(precision_score(y_test, y_test_pred, average='macro'))

0.9992754959335551
0.9989221189838666


In [42]:
# recall
print(recall_score(y_train, y_train_pred, average='macro'))
print(recall_score(y_test, y_test_pred, average='macro'))

0.9992794105746493
0.9966387018963708
