In [99]:
import numpy as np
from numpy.core.fromnumeric import _all_dispatcher
import pandas as pd
import joblib
np.random.seed(2021)
import warnings
warnings.filterwarnings('ignore')

In [70]:
##########################################################
######### training code (without any validation) #########

# load data
print('loading train.gz...')
# use only a subset of rows - you should use all rows eventually
df_train = pd.read_csv("train.gz", compression='gzip', nrows=20000, header='infer')
Y = df_train['click']
# discard some columns
unused_cols = ["id", 'site_id', 'app_id']
df_train.drop(unused_cols, axis=1, inplace=True)

loading train.gz...


In [71]:
# create a copy to prevent from modifying the original dataset
df_copy = df_train.copy()

In [72]:
# nunique() shows that the three columns are same across 
df_copy.drop(['hour', 'app_domain', 'app_category', 'click'], axis=1,inplace=True)

In [73]:
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
X_train, X_test, y_train, y_test = train_test_split(df_copy, Y, test_size=0.3)

In [None]:
X_train.hour

In [74]:
import category_encoders as ce
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline

# one_hot_features = ['C1', 'device_type', 'device_conn_type', 'C18']
one_hot_features = [0,7,8,13]
one_hot_transformer = OneHotEncoder(drop='first')

# target_features = ['banner_pos','site_domain','site_category','device_id','device_ip','device_model','C14',
#                    'C15','C16','C17','C19','C20','C21']
target_features = [1,2,3,4,5,6,9,10,11,12,14,15,16]
target_transformer = ce.JamesSteinEncoder()
        
preprocessor = ColumnTransformer(
    transformers=[
        ('one_hot', one_hot_transformer, one_hot_features),
        ('target', target_transformer, target_features)])

In [75]:
from catboost import CatBoostClassifier
from sklearn.metrics import log_loss
from sklearn.linear_model import LogisticRegression

clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', CatBoostClassifier(iterations=20,learning_rate=0.1,depth=7, eval_metric='Logloss'))])

clf.fit(X_train.values, y_train.values.reshape(-1,1))
y_pred = clf.predict_proba(X_test.values)[:, 1]
print("model logloss: %.3f" % log_loss(y_test, y_pred))

  elif pd.api.types.is_categorical(cols):


0:	learn: 0.6180901	total: 32.1ms	remaining: 611ms
1:	learn: 0.5544265	total: 36.6ms	remaining: 329ms
2:	learn: 0.5013944	total: 41.4ms	remaining: 235ms
3:	learn: 0.4562998	total: 45.3ms	remaining: 181ms
4:	learn: 0.4178803	total: 47.7ms	remaining: 143ms
5:	learn: 0.3851783	total: 51.1ms	remaining: 119ms
6:	learn: 0.3563879	total: 59.4ms	remaining: 110ms
7:	learn: 0.3316121	total: 62.8ms	remaining: 94.1ms
8:	learn: 0.3101531	total: 67.7ms	remaining: 82.7ms
9:	learn: 0.2907200	total: 72ms	remaining: 72ms
10:	learn: 0.2750967	total: 76.1ms	remaining: 62.3ms
11:	learn: 0.2602556	total: 80.4ms	remaining: 53.6ms
12:	learn: 0.2482299	total: 86.3ms	remaining: 46.5ms
13:	learn: 0.2367818	total: 90.8ms	remaining: 38.9ms
14:	learn: 0.2264693	total: 94.3ms	remaining: 31.4ms
15:	learn: 0.2171684	total: 100ms	remaining: 25ms
16:	learn: 0.2083740	total: 105ms	remaining: 18.5ms
17:	learn: 0.2015494	total: 109ms	remaining: 12.2ms
18:	learn: 0.1947537	total: 113ms	remaining: 5.95ms
19:	learn: 0.1883837

#### First baseline model has a very high logloss at 0.525.

In [76]:
categorical_f = ['C1','device_type','device_conn_type','C18','banner_pos','site_domain','site_category',
                 'device_id','device_ip','device_model','C14','C15','C16','C17','C19','C20','C21']
cat = CatBoostClassifier(iterations=20,learning_rate=0.1,depth=7, eval_metric='Logloss')
cat.fit(X_train, y_train,cat_features=categorical_f)
y_pred_cat = cat.predict_proba(X_test.values)[:, 1]
print("model logloss: %.3f" % log_loss(y_test, y_pred_cat))

0:	learn: 0.6548731	total: 22.6ms	remaining: 429ms
1:	learn: 0.6253654	total: 30.9ms	remaining: 278ms
2:	learn: 0.5983279	total: 40.4ms	remaining: 229ms
3:	learn: 0.5760107	total: 54ms	remaining: 216ms
4:	learn: 0.5575109	total: 62.1ms	remaining: 186ms
5:	learn: 0.5417719	total: 70.8ms	remaining: 165ms
6:	learn: 0.5286705	total: 80.2ms	remaining: 149ms
7:	learn: 0.5174565	total: 89.1ms	remaining: 134ms
8:	learn: 0.5079688	total: 94.1ms	remaining: 115ms
9:	learn: 0.5000841	total: 103ms	remaining: 103ms
10:	learn: 0.4931135	total: 112ms	remaining: 91.3ms
11:	learn: 0.4868609	total: 123ms	remaining: 81.7ms
12:	learn: 0.4825855	total: 125ms	remaining: 67.5ms
13:	learn: 0.4782327	total: 133ms	remaining: 57.2ms
14:	learn: 0.4742199	total: 143ms	remaining: 47.5ms
15:	learn: 0.4709195	total: 147ms	remaining: 36.8ms
16:	learn: 0.4679939	total: 157ms	remaining: 27.7ms
17:	learn: 0.4656034	total: 160ms	remaining: 17.8ms
18:	learn: 0.4635120	total: 168ms	remaining: 8.86ms
19:	learn: 0.4620704	tota

#### using the default catboost encoder seems to get much better result

### Bayesian Search CV for hyperparameter tuning

In [45]:
# need to balance the data
from imblearn.over_sampling import SMOTE, BorderlineSMOTE, RandomOverSampler
# smt = SMOTE()
ros = RandomOverSampler(random_state=0)
X_train_re, y_train_re = ros.fit_resample(X_train, y_train)

In [46]:
from skopt import BayesSearchCV
# parameter ranges are specified by one of below
from skopt.space import Real, Categorical, Integer
from sklearn.metrics import make_scorer

cat1 = CatBoostClassifier(iterations=20,learning_rate=0.1,depth=7,loss_function='Logloss', 
                          cat_features=categorical_f,verbose=False)

param = {
    'iterations': Integer(10, 1000),
    'depth': Integer(1, 8),
    'learning_rate': Real(0.01, 1.0, 'log-uniform'),
    'random_strength': Real(1e-9, 10, 'log-uniform'),
    'bagging_temperature': Real(0.0, 1.0),
    'border_count': Integer(1, 255),
    'l2_leaf_reg': Integer(2, 30),
    'scale_pos_weight':Real(0.01, 1.0, 'uniform')
}

LogLoss = make_scorer(log_loss, greater_is_better=False, needs_proba=True)

# log-uniform: understand as search over p = exp(x) by varying x
opt = BayesSearchCV(
    cat1,
    param,
    scoring = LogLoss,
    n_iter=64,
    cv=5,
    random_state=42
)

# executes bayesian optimization
opt.fit(X_train_re, y_train_re)

BayesSearchCV(cv=5,
              estimator=<catboost.core.CatBoostClassifier object at 0x7fa17b3d6130>,
              n_iter=64, random_state=42,
              scoring=make_scorer(log_loss, greater_is_better=False, needs_proba=True),
              search_spaces={'bagging_temperature': Real(low=0.0, high=1.0, prior='uniform', transform='identity'),
                             'border_count': Integer(low=1, high=255, prior='uniform', transform='ident...
                             'iterations': Integer(low=10, high=1000, prior='uniform', transform='identity'),
                             'l2_leaf_reg': Integer(low=2, high=30, prior='uniform', transform='identity'),
                             'learning_rate': Real(low=0.01, high=1.0, prior='log-uniform', transform='identity'),
                             'random_strength': Real(low=1e-09, high=10, prior='log-uniform', transform='identity'),
                             'scale_pos_weight': Real(low=0.01, high=1.0, prior='uniform', t

In [48]:
opt.best_score_

-0.1523188638805765

In [100]:
df_xgb = df_copy.copy()
def convert_obj_to_int(self):
    
    object_list_columns = self.columns
    object_list_dtypes = self.dtypes
    new_col_suffix = '_int'
    for index in range(0,len(object_list_columns)):
        if object_list_dtypes[index] == object :
            self[object_list_columns[index]+new_col_suffix] = self[object_list_columns[index]].map( lambda  x: hash(x))
            self.drop([object_list_columns[index]],inplace=True,axis=1)
    return self
df_xgb = convert_obj_to_int(df_xgb)

In [101]:
X_train_xgb, X_test_xgb, y_train_xgb, y_test_xgb = train_test_split(df_xgb, Y, test_size=0.3)

In [102]:
from xgboost import XGBClassifier
from bayes_opt import BayesianOptimization
from sklearn.model_selection import cross_val_score
pbounds = {
    'learning_rate': (0.01, 1.0),
    'n_estimators': (100, 1000),
    'max_depth': (3,7),
    'subsample': (1.0, 1.0),  # Change for big datasets
    'colsample': (1.0, 1.0),  # Change for datasets with lots of features
    'gamma': (0, 10)}

LogLoss = make_scorer(log_loss, greater_is_better=False, needs_proba=True)

def xgboost_hyper_param(learning_rate,n_estimators,max_depth,subsample,colsample,gamma):
    max_depth = int(max_depth)
    n_estimators = int(n_estimators)
    clf = XGBClassifier(max_depth=max_depth,learning_rate=learning_rate,
                        n_estimators=n_estimators,gamma=gamma)
    
    return np.mean(cross_val_score(clf, X_train_xgb, y_train_xgb, cv=5, scoring=LogLoss))

optimizer = BayesianOptimization(f=xgboost_hyper_param, pbounds=pbounds)

In [103]:
optimizer.maximize(init_points=10, n_iter=50)

|   iter    |  target   | colsample |   gamma   | learni... | max_depth | n_esti... | subsample |
-------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m-0.4533  [0m | [0m 1.0     [0m | [0m 7.748   [0m | [0m 0.7445  [0m | [0m 6.968   [0m | [0m 986.1   [0m | [0m 1.0     [0m |
| [95m 2       [0m | [95m-0.4527  [0m | [95m 1.0     [0m | [95m 1.431   [0m | [95m 0.3249  [0m | [95m 3.068   [0m | [95m 457.8   [0m | [95m 1.0     [0m |
| [0m 3       [0m | [0m-0.5153  [0m | [0m 1.0     [0m | [0m 1.643   [0m | [0m 0.9103  [0m | [0m 6.73    [0m | [0m 847.8   [0m | [0m 1.0     [0m |
| [95m 4       [0m | [95m-0.4512  [0m | [95m 1.0     [0m | [95m 5.385   [0m | [95m 0.525   [0m | [95m 6.988   [0m | [95m 937.5   [0m | [95m 1.0     [0m |
| [0m 5       [0m | [0m-0.4562  [0m | [0m 1.0     [0m | [0m 5.462   [0m | [0m 0.8011  [0m | [0m 5.34    [0m | [0m 662.0   [0m 

| [0m 6       [0m | [0m-0.4514  [0m | [0m 1.0     [0m | [0m 8.76    [0m | [0m 0.2778  [0m | [0m 6.078   [0m | [0m 689.5   [0m | [0m 1.0     [0m |
| [95m 7       [0m | [95m-0.4505  [0m | [95m 1.0     [0m | [95m 4.404   [0m | [95m 0.3955  [0m | [95m 5.521   [0m | [95m 142.0   [0m | [95m 1.0     [0m |
| [0m 8       [0m | [0m-0.4621  [0m | [0m 1.0     [0m | [0m 3.644   [0m | [0m 0.6652  [0m | [0m 6.925   [0m | [0m 434.3   [0m | [0m 1.0     [0m |
| [0m 9       [0m | [0m-0.4512  [0m | [0m 1.0     [0m | [0m 3.88    [0m | [0m 0.07737 [0m | [0m 4.668   [0m | [0m 868.2   [0m | [0m 1.0     [0m |
| [0m 10      [0m | [0m-0.4684  [0m | [0m 1.0     [0m | [0m 0.2537  [0m | [0m 0.404   [0m | [0m 3.364   [0m | [0m 423.5   [0m | [0m 1.0     [0m |


| [0m 11      [0m | [0m-0.4515  [0m | [0m 1.0     [0m | [0m 4.488   [0m | [0m 0.1349  [0m | [0m 4.726   [0m | [0m 140.4   [0m | [0m 1.0     [0m |
| [0m 12      [0m | [0m-0.4542  [0m | [0m 1.0     [0m | [0m 5.029   [0m | [0m 0.8134  [0m | [0m 4.159   [0m | [0m 883.7   [0m | [0m 1.0     [0m |
| [0m 13      [0m | [0m-0.4542  [0m | [0m 1.0     [0m | [0m 7.08    [0m | [0m 0.9955  [0m | [0m 5.099   [0m | [0m 958.4   [0m | [0m 1.0     [0m |
| [0m 14      [0m | [0m-0.4527  [0m | [0m 1.0     [0m | [0m 9.404   [0m | [0m 0.4247  [0m | [0m 4.486   [0m | [0m 162.3   [0m | [0m 1.0     [0m |
| [0m 15      [0m | [0m-0.4543  [0m | [0m 1.0     [0m | [0m 2.205   [0m | [0m 0.6103  [0m | [0m 3.114   [0m | [0m 709.1   [0m | [0m 1.0     [0m |


| [0m 16      [0m | [0m-0.4507  [0m | [0m 1.0     [0m | [0m 0.213   [0m | [0m 0.02439 [0m | [0m 4.078   [0m | [0m 917.3   [0m | [0m 1.0     [0m |
| [0m 17      [0m | [0m-0.4506  [0m | [0m 1.0     [0m | [0m 5.503   [0m | [0m 0.6011  [0m | [0m 5.993   [0m | [0m 480.0   [0m | [0m 1.0     [0m |
| [0m 18      [0m | [0m-0.4898  [0m | [0m 1.0     [0m | [0m 0.7581  [0m | [0m 0.8805  [0m | [0m 3.793   [0m | [0m 500.4   [0m | [0m 1.0     [0m |
| [0m 19      [0m | [0m-0.4544  [0m | [0m 1.0     [0m | [0m 0.03033 [0m | [0m 0.1744  [0m | [0m 4.95    [0m | [0m 181.5   [0m | [0m 1.0     [0m |
| [0m 20      [0m | [0m-0.4525  [0m | [0m 1.0     [0m | [0m 9.816   [0m | [0m 0.1972  [0m | [0m 5.033   [0m | [0m 904.5   [0m | [0m 1.0     [0m |
| [0m 21      [0m | [0m-0.4524  [0m | [0m 1.0     [0m | [0m 9.857   [0m | [0m 0.5109  [0m | [0m 4.808   [0m | [0m 203.3   [0m | [0m 1.0     [0m |


| [0m 22      [0m | [0m-0.4592  [0m | [0m 1.0     [0m | [0m 0.8249  [0m | [0m 0.438   [0m | [0m 4.446   [0m | [0m 222.4   [0m | [0m 1.0     [0m |
| [0m 23      [0m | [0m-0.4526  [0m | [0m 1.0     [0m | [0m 9.923   [0m | [0m 0.08167 [0m | [0m 5.403   [0m | [0m 732.7   [0m | [0m 1.0     [0m |
| [0m 24      [0m | [0m-0.4552  [0m | [0m 1.0     [0m | [0m 1.07    [0m | [0m 0.4877  [0m | [0m 3.6     [0m | [0m 751.6   [0m | [0m 1.0     [0m |
| [0m 25      [0m | [0m-0.4534  [0m | [0m 1.0     [0m | [0m 9.897   [0m | [0m 0.571   [0m | [0m 4.358   [0m | [0m 775.0   [0m | [0m 1.0     [0m |
| [0m 26      [0m | [0m-0.4514  [0m | [0m 1.0     [0m | [0m 9.94    [0m | [0m 0.5594  [0m | [0m 6.6     [0m | [0m 468.8   [0m | [0m 1.0     [0m |


| [0m 27      [0m | [0m-0.5299  [0m | [0m 1.0     [0m | [0m 0.543   [0m | [0m 0.763   [0m | [0m 5.134   [0m | [0m 796.6   [0m | [0m 1.0     [0m |
| [0m 28      [0m | [0m-0.4527  [0m | [0m 1.0     [0m | [0m 9.945   [0m | [0m 0.3388  [0m | [0m 4.544   [0m | [0m 624.7   [0m | [0m 1.0     [0m |
| [0m 29      [0m | [0m-0.8089  [0m | [0m 1.0     [0m | [0m 0.03914 [0m | [0m 0.8161  [0m | [0m 6.232   [0m | [0m 598.0   [0m | [0m 1.0     [0m |
| [0m 30      [0m | [0m-0.4536  [0m | [0m 1.0     [0m | [0m 9.954   [0m | [0m 0.7148  [0m | [0m 3.671   [0m | [0m 293.2   [0m | [0m 1.0     [0m |
| [0m 31      [0m | [0m-0.5194  [0m | [0m 1.0     [0m | [0m 0.4657  [0m | [0m 0.5851  [0m | [0m 5.764   [0m | [0m 344.8   [0m | [0m 1.0     [0m |


| [0m 32      [0m | [0m-0.4524  [0m | [0m 1.0     [0m | [0m 9.847   [0m | [0m 0.6932  [0m | [0m 6.75    [0m | [0m 100.1   [0m | [0m 1.0     [0m |
| [0m 33      [0m | [0m-0.451   [0m | [0m 1.0     [0m | [0m 8.392   [0m | [0m 0.4552  [0m | [0m 6.992   [0m | [0m 259.4   [0m | [0m 1.0     [0m |
| [0m 34      [0m | [0m-0.4538  [0m | [0m 1.0     [0m | [0m 9.801   [0m | [0m 0.3632  [0m | [0m 3.008   [0m | [0m 643.0   [0m | [0m 1.0     [0m |
| [0m 35      [0m | [0m-0.4526  [0m | [0m 1.0     [0m | [0m 9.728   [0m | [0m 0.09533 [0m | [0m 6.456   [0m | [0m 389.7   [0m | [0m 1.0     [0m |
| [0m 36      [0m | [0m-0.4535  [0m | [0m 1.0     [0m | [0m 9.85    [0m | [0m 0.2501  [0m | [0m 4.184   [0m | [0m 535.9   [0m | [0m 1.0     [0m |
| [0m 37      [0m | [0m-0.6578  [0m | [0m 1.0     [0m | [0m 0.07147 [0m | [0m 0.6241  [0m | [0m 5.312   [0m | [0m 999.9   [0m | [0m 1.0     [0m |


| [0m 38      [0m | [0m-0.4544  [0m | [0m 1.0     [0m | [0m 0.5552  [0m | [0m 0.2627  [0m | [0m 3.915   [0m | [0m 312.3   [0m | [0m 1.0     [0m |
| [0m 39      [0m | [0m-0.4672  [0m | [0m 1.0     [0m | [0m 0.4448  [0m | [0m 0.2458  [0m | [0m 6.359   [0m | [0m 116.4   [0m | [0m 1.0     [0m |
| [0m 40      [0m | [0m-0.4973  [0m | [0m 1.0     [0m | [0m 0.3907  [0m | [0m 0.3591  [0m | [0m 6.934   [0m | [0m 276.9   [0m | [0m 1.0     [0m |
| [0m 41      [0m | [0m-0.4534  [0m | [0m 1.0     [0m | [0m 8.862   [0m | [0m 0.9055  [0m | [0m 3.049   [0m | [0m 241.1   [0m | [0m 1.0     [0m |
| [0m 42      [0m | [0m-0.4539  [0m | [0m 1.0     [0m | [0m 9.566   [0m | [0m 0.7019  [0m | [0m 3.572   [0m | [0m 975.2   [0m | [0m 1.0     [0m |


| [0m 43      [0m | [0m-0.5783  [0m | [0m 1.0     [0m | [0m 0.2911  [0m | [0m 0.6635  [0m | [0m 6.545   [0m | [0m 553.0   [0m | [0m 1.0     [0m |
| [0m 44      [0m | [0m-0.5674  [0m | [0m 1.0     [0m | [0m 0.1072  [0m | [0m 0.6946  [0m | [0m 4.674   [0m | [0m 371.9   [0m | [0m 1.0     [0m |
| [0m 45      [0m | [0m-0.4519  [0m | [0m 1.0     [0m | [0m 9.891   [0m | [0m 0.3139  [0m | [0m 5.889   [0m | [0m 405.4   [0m | [0m 1.0     [0m |
| [0m 46      [0m | [0m-0.4539  [0m | [0m 1.0     [0m | [0m 9.987   [0m | [0m 0.4431  [0m | [0m 3.394   [0m | [0m 520.3   [0m | [0m 1.0     [0m |
| [0m 47      [0m | [0m-0.4719  [0m | [0m 1.0     [0m | [0m 0.07443 [0m | [0m 0.2133  [0m | [0m 3.122   [0m | [0m 676.1   [0m | [0m 1.0     [0m |


| [0m 48      [0m | [0m-0.4538  [0m | [0m 1.0     [0m | [0m 9.578   [0m | [0m 0.3843  [0m | [0m 3.097   [0m | [0m 324.6   [0m | [0m 1.0     [0m |
| [0m 49      [0m | [0m-0.5977  [0m | [0m 1.0     [0m | [0m 0.1403  [0m | [0m 0.9603  [0m | [0m 3.299   [0m | [0m 633.8   [0m | [0m 1.0     [0m |
| [0m 50      [0m | [0m-0.64    [0m | [0m 1.0     [0m | [0m 0.08424 [0m | [0m 0.6028  [0m | [0m 6.284   [0m | [0m 724.4   [0m | [0m 1.0     [0m |
| [0m 51      [0m | [0m-0.5369  [0m | [0m 1.0     [0m | [0m 0.3856  [0m | [0m 0.9862  [0m | [0m 3.817   [0m | [0m 469.9   [0m | [0m 1.0     [0m |
| [0m 52      [0m | [0m-0.4532  [0m | [0m 1.0     [0m | [0m 8.073   [0m | [0m 0.2848  [0m | [0m 3.063   [0m | [0m 913.4   [0m | [0m 1.0     [0m |
| [0m 53      [0m | [0m-0.5244  [0m | [0m 1.0     [0m | [0m 0.5817  [0m | [0m 0.7472  [0m | [0m 5.982   [0m | [0m 907.5   [0m | [0m 1.0     [0m |


| [0m 54      [0m | [0m-0.4509  [0m | [0m 1.0     [0m | [0m 6.17    [0m | [0m 0.3339  [0m | [0m 5.102   [0m | [0m 924.1   [0m | [0m 1.0     [0m |
| [0m 55      [0m | [0m-0.4532  [0m | [0m 1.0     [0m | [0m 4.731   [0m | [0m 0.6157  [0m | [0m 6.863   [0m | [0m 699.2   [0m | [0m 1.0     [0m |
| [0m 56      [0m | [0m-0.4539  [0m | [0m 1.0     [0m | [0m 9.851   [0m | [0m 0.1876  [0m | [0m 3.929   [0m | [0m 743.9   [0m | [0m 1.0     [0m |
| [0m 57      [0m | [0m-0.4518  [0m | [0m 1.0     [0m | [0m 9.919   [0m | [0m 0.1994  [0m | [0m 5.272   [0m | [0m 450.1   [0m | [0m 1.0     [0m |
| [0m 58      [0m | [0m-0.4507  [0m | [0m 1.0     [0m | [0m 9.017   [0m | [0m 0.4788  [0m | [0m 6.768   [0m | [0m 761.5   [0m | [0m 1.0     [0m |


| [0m 59      [0m | [0m-0.4854  [0m | [0m 1.0     [0m | [0m 0.1283  [0m | [0m 0.6028  [0m | [0m 3.934   [0m | [0m 250.4   [0m | [0m 1.0     [0m |
| [0m 60      [0m | [0m-0.4543  [0m | [0m 1.0     [0m | [0m 0.05016 [0m | [0m 0.2218  [0m | [0m 3.605   [0m | [0m 194.9   [0m | [0m 1.0     [0m |


In [105]:
optimizer.max

{'target': -0.45048454820444545,
 'params': {'colsample': 1.0,
  'gamma': 4.4036150622160655,
  'learning_rate': 0.39547097261826947,
  'max_depth': 5.521033605627301,
  'n_estimators': 141.9601037125904,
  'subsample': 1.0}}

In [54]:
from sklearn.metrics import make_scorer
LogLoss = make_scorer(log_loss, greater_is_better=False, needs_proba=True)

best_params={'bagging_temperature': 0.0,
 'border_count': 48,
 'depth': 8,
 'iterations': 759,
 'l2_leaf_reg': 3,
 'learning_rate': 0.01,
 'random_strength': 1e-09,
 'scale_pos_weight': 1.0}

# best_params['iterations'] = 1000

In [50]:
opt_clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', CatBoostClassifier(**best_params,od_type='Iter', loss_function='Logloss'))])

from sklearn.model_selection import cross_val_score
log_ = cross_val_score(estimator = opt_clf, X = X_train_re.values, y = y_train_re.values.reshape(-1,1), 
                             cv = 5, scoring=LogLoss)

  elif pd.api.types.is_categorical(cols):


0:	learn: 0.1952359	total: 6.73ms	remaining: 6.72s
1:	learn: 0.1895334	total: 14.2ms	remaining: 7.11s
2:	learn: 0.1887291	total: 20.4ms	remaining: 6.76s
3:	learn: 0.1882983	total: 26.5ms	remaining: 6.61s
4:	learn: 0.1700176	total: 32.8ms	remaining: 6.52s
5:	learn: 0.1693073	total: 38.7ms	remaining: 6.42s
6:	learn: 0.1678611	total: 44.9ms	remaining: 6.36s
7:	learn: 0.1672434	total: 51.2ms	remaining: 6.34s
8:	learn: 0.1649212	total: 58.4ms	remaining: 6.43s
9:	learn: 0.1626958	total: 68.5ms	remaining: 6.78s
10:	learn: 0.1617119	total: 75ms	remaining: 6.75s
11:	learn: 0.1604154	total: 81.3ms	remaining: 6.69s
12:	learn: 0.1583953	total: 87.5ms	remaining: 6.64s
13:	learn: 0.1533844	total: 93.5ms	remaining: 6.58s
14:	learn: 0.1516842	total: 99.1ms	remaining: 6.5s
15:	learn: 0.1507243	total: 105ms	remaining: 6.47s
16:	learn: 0.1495447	total: 111ms	remaining: 6.43s
17:	learn: 0.1485590	total: 117ms	remaining: 6.4s
18:	learn: 0.1479176	total: 124ms	remaining: 6.38s
19:	learn: 0.1473076	total: 12

162:	learn: 0.1049265	total: 1.2s	remaining: 6.17s
163:	learn: 0.1048240	total: 1.22s	remaining: 6.21s
164:	learn: 0.1044531	total: 1.24s	remaining: 6.26s
165:	learn: 0.1043234	total: 1.25s	remaining: 6.3s
166:	learn: 0.1042118	total: 1.27s	remaining: 6.34s
167:	learn: 0.1040699	total: 1.28s	remaining: 6.36s
168:	learn: 0.1038910	total: 1.29s	remaining: 6.35s
169:	learn: 0.1036909	total: 1.3s	remaining: 6.34s
170:	learn: 0.1034522	total: 1.3s	remaining: 6.32s
171:	learn: 0.1032589	total: 1.31s	remaining: 6.31s
172:	learn: 0.1028909	total: 1.32s	remaining: 6.29s
173:	learn: 0.1026151	total: 1.32s	remaining: 6.28s
174:	learn: 0.1023135	total: 1.33s	remaining: 6.27s
175:	learn: 0.1021617	total: 1.33s	remaining: 6.25s
176:	learn: 0.1020234	total: 1.34s	remaining: 6.24s
177:	learn: 0.1019950	total: 1.35s	remaining: 6.22s
178:	learn: 0.1018942	total: 1.35s	remaining: 6.21s
179:	learn: 0.1017929	total: 1.36s	remaining: 6.2s
180:	learn: 0.1015954	total: 1.37s	remaining: 6.19s
181:	learn: 0.101

347:	learn: 0.0847334	total: 2.58s	remaining: 4.84s
348:	learn: 0.0847333	total: 2.59s	remaining: 4.83s
349:	learn: 0.0847333	total: 2.59s	remaining: 4.81s
350:	learn: 0.0847333	total: 2.6s	remaining: 4.8s
351:	learn: 0.0847332	total: 2.6s	remaining: 4.79s
352:	learn: 0.0847331	total: 2.6s	remaining: 4.77s
353:	learn: 0.0847331	total: 2.61s	remaining: 4.76s
354:	learn: 0.0847331	total: 2.61s	remaining: 4.74s
355:	learn: 0.0847330	total: 2.61s	remaining: 4.73s
356:	learn: 0.0847330	total: 2.62s	remaining: 4.71s
357:	learn: 0.0847329	total: 2.62s	remaining: 4.7s
358:	learn: 0.0847327	total: 2.62s	remaining: 4.68s
359:	learn: 0.0847327	total: 2.63s	remaining: 4.67s
360:	learn: 0.0847326	total: 2.63s	remaining: 4.66s
361:	learn: 0.0847326	total: 2.63s	remaining: 4.64s
362:	learn: 0.0847325	total: 2.64s	remaining: 4.63s
363:	learn: 0.0847324	total: 2.65s	remaining: 4.63s
364:	learn: 0.0847323	total: 2.66s	remaining: 4.63s
365:	learn: 0.0847323	total: 2.67s	remaining: 4.62s
366:	learn: 0.084

522:	learn: 0.0798890	total: 3.56s	remaining: 3.25s
523:	learn: 0.0798014	total: 3.57s	remaining: 3.25s
524:	learn: 0.0798010	total: 3.58s	remaining: 3.24s
525:	learn: 0.0797398	total: 3.58s	remaining: 3.23s
526:	learn: 0.0797188	total: 3.59s	remaining: 3.22s
527:	learn: 0.0796957	total: 3.6s	remaining: 3.22s
528:	learn: 0.0796957	total: 3.6s	remaining: 3.21s
529:	learn: 0.0796953	total: 3.61s	remaining: 3.2s
530:	learn: 0.0796549	total: 3.61s	remaining: 3.19s
531:	learn: 0.0796127	total: 3.62s	remaining: 3.18s
532:	learn: 0.0794914	total: 3.62s	remaining: 3.17s
533:	learn: 0.0794164	total: 3.63s	remaining: 3.17s
534:	learn: 0.0794145	total: 3.63s	remaining: 3.16s
535:	learn: 0.0794008	total: 3.64s	remaining: 3.15s
536:	learn: 0.0793911	total: 3.65s	remaining: 3.14s
537:	learn: 0.0793859	total: 3.65s	remaining: 3.14s
538:	learn: 0.0792851	total: 3.66s	remaining: 3.13s
539:	learn: 0.0792188	total: 3.67s	remaining: 3.12s
540:	learn: 0.0792158	total: 3.67s	remaining: 3.11s
541:	learn: 0.0

725:	learn: 0.0769915	total: 4.53s	remaining: 1.71s
726:	learn: 0.0769915	total: 4.53s	remaining: 1.7s
727:	learn: 0.0769914	total: 4.54s	remaining: 1.7s
728:	learn: 0.0769914	total: 4.54s	remaining: 1.69s
729:	learn: 0.0769914	total: 4.56s	remaining: 1.69s
730:	learn: 0.0769914	total: 4.57s	remaining: 1.68s
731:	learn: 0.0769912	total: 4.57s	remaining: 1.67s
732:	learn: 0.0769912	total: 4.58s	remaining: 1.67s
733:	learn: 0.0769912	total: 4.58s	remaining: 1.66s
734:	learn: 0.0769911	total: 4.58s	remaining: 1.65s
735:	learn: 0.0769911	total: 4.59s	remaining: 1.65s
736:	learn: 0.0769911	total: 4.59s	remaining: 1.64s
737:	learn: 0.0769911	total: 4.59s	remaining: 1.63s
738:	learn: 0.0769910	total: 4.6s	remaining: 1.62s
739:	learn: 0.0769634	total: 4.61s	remaining: 1.62s
740:	learn: 0.0767671	total: 4.61s	remaining: 1.61s
741:	learn: 0.0767502	total: 4.62s	remaining: 1.61s
742:	learn: 0.0767136	total: 4.63s	remaining: 1.6s
743:	learn: 0.0766611	total: 4.63s	remaining: 1.59s
744:	learn: 0.07

907:	learn: 0.0712400	total: 5.5s	remaining: 557ms
908:	learn: 0.0712400	total: 5.5s	remaining: 551ms
909:	learn: 0.0712399	total: 5.51s	remaining: 546ms
910:	learn: 0.0712398	total: 5.52s	remaining: 540ms
911:	learn: 0.0712397	total: 5.53s	remaining: 533ms
912:	learn: 0.0712397	total: 5.53s	remaining: 527ms
913:	learn: 0.0712396	total: 5.54s	remaining: 521ms
914:	learn: 0.0711962	total: 5.55s	remaining: 515ms
915:	learn: 0.0711400	total: 5.55s	remaining: 509ms
916:	learn: 0.0711123	total: 5.56s	remaining: 503ms
917:	learn: 0.0711123	total: 5.57s	remaining: 497ms
918:	learn: 0.0711123	total: 5.57s	remaining: 491ms
919:	learn: 0.0711122	total: 5.57s	remaining: 485ms
920:	learn: 0.0709982	total: 5.58s	remaining: 478ms
921:	learn: 0.0708254	total: 5.58s	remaining: 472ms
922:	learn: 0.0708252	total: 5.59s	remaining: 466ms
923:	learn: 0.0708249	total: 5.59s	remaining: 460ms
924:	learn: 0.0708249	total: 5.59s	remaining: 454ms
925:	learn: 0.0708247	total: 5.6s	remaining: 447ms
926:	learn: 0.0

  elif pd.api.types.is_categorical(cols):


0:	learn: 0.3314638	total: 11.3ms	remaining: 11.3s
1:	learn: 0.2135021	total: 18.9ms	remaining: 9.43s
2:	learn: 0.2102456	total: 29.9ms	remaining: 9.94s
3:	learn: 0.2079882	total: 39.3ms	remaining: 9.78s
4:	learn: 0.2008222	total: 47.8ms	remaining: 9.51s
5:	learn: 0.1996551	total: 57.9ms	remaining: 9.59s
6:	learn: 0.1978723	total: 66ms	remaining: 9.36s
7:	learn: 0.1965072	total: 77ms	remaining: 9.54s
8:	learn: 0.1950340	total: 84.4ms	remaining: 9.29s
9:	learn: 0.1739783	total: 93.1ms	remaining: 9.22s
10:	learn: 0.1716597	total: 102ms	remaining: 9.13s
11:	learn: 0.1705896	total: 110ms	remaining: 9.04s
12:	learn: 0.1639664	total: 121ms	remaining: 9.15s
13:	learn: 0.1607966	total: 129ms	remaining: 9.12s
14:	learn: 0.1591456	total: 143ms	remaining: 9.36s
15:	learn: 0.1580142	total: 152ms	remaining: 9.32s
16:	learn: 0.1571165	total: 159ms	remaining: 9.2s
17:	learn: 0.1563830	total: 168ms	remaining: 9.14s
18:	learn: 0.1551947	total: 175ms	remaining: 9.05s
19:	learn: 0.1544958	total: 184ms	re

175:	learn: 0.1071155	total: 1.4s	remaining: 6.57s
176:	learn: 0.1070542	total: 1.41s	remaining: 6.57s
177:	learn: 0.1069120	total: 1.42s	remaining: 6.57s
178:	learn: 0.1067948	total: 1.43s	remaining: 6.57s
179:	learn: 0.1066094	total: 1.44s	remaining: 6.56s
180:	learn: 0.1065702	total: 1.45s	remaining: 6.55s
181:	learn: 0.1063923	total: 1.46s	remaining: 6.54s
182:	learn: 0.1062078	total: 1.46s	remaining: 6.54s
183:	learn: 0.1061200	total: 1.47s	remaining: 6.53s
184:	learn: 0.1059226	total: 1.48s	remaining: 6.52s
185:	learn: 0.1057913	total: 1.49s	remaining: 6.52s
186:	learn: 0.1056998	total: 1.5s	remaining: 6.51s
187:	learn: 0.1055627	total: 1.5s	remaining: 6.5s
188:	learn: 0.1055040	total: 1.51s	remaining: 6.49s
189:	learn: 0.1054522	total: 1.52s	remaining: 6.49s
190:	learn: 0.1053727	total: 1.53s	remaining: 6.48s
191:	learn: 0.1053113	total: 1.54s	remaining: 6.47s
192:	learn: 0.1053109	total: 1.54s	remaining: 6.45s
193:	learn: 0.1053107	total: 1.54s	remaining: 6.42s
194:	learn: 0.10

377:	learn: 0.1034806	total: 2.37s	remaining: 3.91s
378:	learn: 0.1034805	total: 2.38s	remaining: 3.9s
379:	learn: 0.1034804	total: 2.38s	remaining: 3.89s
380:	learn: 0.1034803	total: 2.39s	remaining: 3.88s
381:	learn: 0.1034795	total: 2.4s	remaining: 3.88s
382:	learn: 0.1034789	total: 2.4s	remaining: 3.87s
383:	learn: 0.1034788	total: 2.4s	remaining: 3.86s
384:	learn: 0.1034787	total: 2.41s	remaining: 3.85s
385:	learn: 0.1034785	total: 2.41s	remaining: 3.84s
386:	learn: 0.1034784	total: 2.42s	remaining: 3.83s
387:	learn: 0.1034783	total: 2.42s	remaining: 3.82s
388:	learn: 0.1034781	total: 2.42s	remaining: 3.81s
389:	learn: 0.1034780	total: 2.43s	remaining: 3.8s
390:	learn: 0.1034780	total: 2.43s	remaining: 3.79s
391:	learn: 0.1034779	total: 2.44s	remaining: 3.78s
392:	learn: 0.1034779	total: 2.44s	remaining: 3.77s
393:	learn: 0.1034779	total: 2.44s	remaining: 3.76s
394:	learn: 0.1034779	total: 2.44s	remaining: 3.75s
395:	learn: 0.1034778	total: 2.45s	remaining: 3.74s
396:	learn: 0.103

587:	learn: 0.1024826	total: 3.13s	remaining: 2.2s
588:	learn: 0.1024826	total: 3.14s	remaining: 2.19s
589:	learn: 0.1024826	total: 3.15s	remaining: 2.19s
590:	learn: 0.1024826	total: 3.15s	remaining: 2.18s
591:	learn: 0.1024826	total: 3.16s	remaining: 2.17s
592:	learn: 0.1024826	total: 3.16s	remaining: 2.17s
593:	learn: 0.1024826	total: 3.16s	remaining: 2.16s
594:	learn: 0.1024826	total: 3.17s	remaining: 2.16s
595:	learn: 0.1024826	total: 3.17s	remaining: 2.15s
596:	learn: 0.1022360	total: 3.18s	remaining: 2.15s
597:	learn: 0.1021136	total: 3.19s	remaining: 2.14s
598:	learn: 0.1020467	total: 3.2s	remaining: 2.14s
599:	learn: 0.1020466	total: 3.2s	remaining: 2.13s
600:	learn: 0.1020399	total: 3.21s	remaining: 2.13s
601:	learn: 0.1020287	total: 3.21s	remaining: 2.12s
602:	learn: 0.1020286	total: 3.22s	remaining: 2.12s
603:	learn: 0.1020210	total: 3.23s	remaining: 2.12s
604:	learn: 0.1020207	total: 3.23s	remaining: 2.11s
605:	learn: 0.1020204	total: 3.23s	remaining: 2.1s
606:	learn: 0.10

752:	learn: 0.1011266	total: 3.9s	remaining: 1.28s
753:	learn: 0.1011265	total: 3.91s	remaining: 1.27s
754:	learn: 0.1011264	total: 3.91s	remaining: 1.27s
755:	learn: 0.1011263	total: 3.92s	remaining: 1.26s
756:	learn: 0.1011263	total: 3.92s	remaining: 1.26s
757:	learn: 0.1011262	total: 3.92s	remaining: 1.25s
758:	learn: 0.1011261	total: 3.93s	remaining: 1.25s
759:	learn: 0.1011261	total: 3.93s	remaining: 1.24s
760:	learn: 0.1011260	total: 3.94s	remaining: 1.24s
761:	learn: 0.1011260	total: 3.94s	remaining: 1.23s
762:	learn: 0.1011259	total: 3.94s	remaining: 1.22s
763:	learn: 0.1011259	total: 3.94s	remaining: 1.22s
764:	learn: 0.1011258	total: 3.95s	remaining: 1.21s
765:	learn: 0.1011255	total: 3.95s	remaining: 1.21s
766:	learn: 0.1011255	total: 3.96s	remaining: 1.2s
767:	learn: 0.1011252	total: 3.96s	remaining: 1.2s
768:	learn: 0.1011251	total: 3.96s	remaining: 1.19s
769:	learn: 0.1011251	total: 3.96s	remaining: 1.18s
770:	learn: 0.1011250	total: 3.97s	remaining: 1.18s
771:	learn: 0.1

913:	learn: 0.1011106	total: 4.48s	remaining: 421ms
914:	learn: 0.1011105	total: 4.48s	remaining: 416ms
915:	learn: 0.1011105	total: 4.48s	remaining: 411ms
916:	learn: 0.1011105	total: 4.49s	remaining: 406ms
917:	learn: 0.1011104	total: 4.49s	remaining: 401ms
918:	learn: 0.1011104	total: 4.49s	remaining: 396ms
919:	learn: 0.1011100	total: 4.5s	remaining: 391ms
920:	learn: 0.1011100	total: 4.5s	remaining: 386ms
921:	learn: 0.1011099	total: 4.5s	remaining: 381ms
922:	learn: 0.1011099	total: 4.51s	remaining: 376ms
923:	learn: 0.1011098	total: 4.51s	remaining: 371ms
924:	learn: 0.1011095	total: 4.51s	remaining: 366ms
925:	learn: 0.1011094	total: 4.52s	remaining: 361ms
926:	learn: 0.1011088	total: 4.52s	remaining: 356ms
927:	learn: 0.1011088	total: 4.52s	remaining: 351ms
928:	learn: 0.1011087	total: 4.53s	remaining: 346ms
929:	learn: 0.1011086	total: 4.53s	remaining: 341ms
930:	learn: 0.1011086	total: 4.53s	remaining: 336ms
931:	learn: 0.1011081	total: 4.54s	remaining: 331ms
932:	learn: 0.1

  elif pd.api.types.is_categorical(cols):


0:	learn: 0.2009748	total: 11.5ms	remaining: 11.5s
1:	learn: 0.1868242	total: 22ms	remaining: 11s
2:	learn: 0.1858569	total: 30.7ms	remaining: 10.2s
3:	learn: 0.1844987	total: 37.4ms	remaining: 9.32s
4:	learn: 0.1813436	total: 44ms	remaining: 8.75s
5:	learn: 0.1800176	total: 50.1ms	remaining: 8.3s
6:	learn: 0.1793629	total: 57.2ms	remaining: 8.12s
7:	learn: 0.1788460	total: 64.6ms	remaining: 8.01s
8:	learn: 0.1771825	total: 72ms	remaining: 7.92s
9:	learn: 0.1703957	total: 78.8ms	remaining: 7.8s
10:	learn: 0.1693439	total: 86.4ms	remaining: 7.77s
11:	learn: 0.1682304	total: 93.2ms	remaining: 7.67s
12:	learn: 0.1620443	total: 99.5ms	remaining: 7.55s
13:	learn: 0.1551035	total: 107ms	remaining: 7.55s
14:	learn: 0.1541630	total: 114ms	remaining: 7.47s
15:	learn: 0.1532937	total: 121ms	remaining: 7.42s
16:	learn: 0.1524826	total: 127ms	remaining: 7.35s
17:	learn: 0.1514100	total: 133ms	remaining: 7.26s
18:	learn: 0.1506449	total: 139ms	remaining: 7.18s
19:	learn: 0.1495541	total: 146ms	rema

164:	learn: 0.1079166	total: 1.2s	remaining: 6.05s
165:	learn: 0.1078499	total: 1.2s	remaining: 6.05s
166:	learn: 0.1078498	total: 1.21s	remaining: 6.03s
167:	learn: 0.1078292	total: 1.21s	remaining: 6.01s
168:	learn: 0.1078199	total: 1.22s	remaining: 5.99s
169:	learn: 0.1076399	total: 1.23s	remaining: 5.98s
170:	learn: 0.1075495	total: 1.23s	remaining: 5.97s
171:	learn: 0.1073572	total: 1.24s	remaining: 5.96s
172:	learn: 0.1070726	total: 1.24s	remaining: 5.95s
173:	learn: 0.1069215	total: 1.25s	remaining: 5.94s
174:	learn: 0.1069214	total: 1.25s	remaining: 5.91s
175:	learn: 0.1069214	total: 1.26s	remaining: 5.88s
176:	learn: 0.1069213	total: 1.26s	remaining: 5.86s
177:	learn: 0.1069213	total: 1.26s	remaining: 5.83s
178:	learn: 0.1069213	total: 1.27s	remaining: 5.81s
179:	learn: 0.1069213	total: 1.27s	remaining: 5.78s
180:	learn: 0.1069212	total: 1.27s	remaining: 5.76s
181:	learn: 0.1069212	total: 1.27s	remaining: 5.73s
182:	learn: 0.1069212	total: 1.28s	remaining: 5.71s
183:	learn: 0.

342:	learn: 0.0999218	total: 1.97s	remaining: 3.77s
343:	learn: 0.0999216	total: 1.98s	remaining: 3.77s
344:	learn: 0.0999214	total: 1.98s	remaining: 3.76s
345:	learn: 0.0999212	total: 1.98s	remaining: 3.75s
346:	learn: 0.0999212	total: 1.99s	remaining: 3.74s
347:	learn: 0.0999211	total: 1.99s	remaining: 3.73s
348:	learn: 0.0999210	total: 1.99s	remaining: 3.72s
349:	learn: 0.0999203	total: 2s	remaining: 3.71s
350:	learn: 0.0999202	total: 2s	remaining: 3.7s
351:	learn: 0.0999201	total: 2s	remaining: 3.69s
352:	learn: 0.0999200	total: 2.01s	remaining: 3.68s
353:	learn: 0.0999199	total: 2.01s	remaining: 3.67s
354:	learn: 0.0999198	total: 2.01s	remaining: 3.66s
355:	learn: 0.0999187	total: 2.02s	remaining: 3.65s
356:	learn: 0.0999184	total: 2.02s	remaining: 3.64s
357:	learn: 0.0999182	total: 2.02s	remaining: 3.63s
358:	learn: 0.0999180	total: 2.02s	remaining: 3.62s
359:	learn: 0.0999170	total: 2.03s	remaining: 3.61s
360:	learn: 0.0999167	total: 2.03s	remaining: 3.6s
361:	learn: 0.0999166	t

514:	learn: 0.0920563	total: 2.74s	remaining: 2.58s
515:	learn: 0.0919813	total: 2.75s	remaining: 2.58s
516:	learn: 0.0918828	total: 2.76s	remaining: 2.58s
517:	learn: 0.0918002	total: 2.76s	remaining: 2.57s
518:	learn: 0.0916923	total: 2.77s	remaining: 2.57s
519:	learn: 0.0916427	total: 2.77s	remaining: 2.56s
520:	learn: 0.0915573	total: 2.78s	remaining: 2.56s
521:	learn: 0.0914751	total: 2.79s	remaining: 2.55s
522:	learn: 0.0914440	total: 2.79s	remaining: 2.55s
523:	learn: 0.0913304	total: 2.8s	remaining: 2.54s
524:	learn: 0.0912519	total: 2.81s	remaining: 2.54s
525:	learn: 0.0910202	total: 2.81s	remaining: 2.54s
526:	learn: 0.0909296	total: 2.82s	remaining: 2.53s
527:	learn: 0.0908612	total: 2.83s	remaining: 2.53s
528:	learn: 0.0908605	total: 2.83s	remaining: 2.52s
529:	learn: 0.0908598	total: 2.83s	remaining: 2.51s
530:	learn: 0.0908591	total: 2.83s	remaining: 2.5s
531:	learn: 0.0908588	total: 2.84s	remaining: 2.5s
532:	learn: 0.0905505	total: 2.84s	remaining: 2.49s
533:	learn: 0.0

699:	learn: 0.0818682	total: 3.73s	remaining: 1.6s
700:	learn: 0.0817859	total: 3.73s	remaining: 1.59s
701:	learn: 0.0817409	total: 3.74s	remaining: 1.59s
702:	learn: 0.0816954	total: 3.75s	remaining: 1.58s
703:	learn: 0.0815687	total: 3.75s	remaining: 1.58s
704:	learn: 0.0815110	total: 3.76s	remaining: 1.57s
705:	learn: 0.0814715	total: 3.76s	remaining: 1.57s
706:	learn: 0.0814491	total: 3.77s	remaining: 1.56s
707:	learn: 0.0814488	total: 3.77s	remaining: 1.55s
708:	learn: 0.0814487	total: 3.77s	remaining: 1.55s
709:	learn: 0.0814480	total: 3.78s	remaining: 1.54s
710:	learn: 0.0814475	total: 3.78s	remaining: 1.54s
711:	learn: 0.0814469	total: 3.79s	remaining: 1.53s
712:	learn: 0.0814469	total: 3.79s	remaining: 1.52s
713:	learn: 0.0814469	total: 3.79s	remaining: 1.52s
714:	learn: 0.0814469	total: 3.8s	remaining: 1.51s
715:	learn: 0.0814468	total: 3.8s	remaining: 1.51s
716:	learn: 0.0814466	total: 3.8s	remaining: 1.5s
717:	learn: 0.0814465	total: 3.81s	remaining: 1.49s
718:	learn: 0.081

868:	learn: 0.0814270	total: 4.3s	remaining: 648ms
869:	learn: 0.0814269	total: 4.3s	remaining: 643ms
870:	learn: 0.0814269	total: 4.31s	remaining: 638ms
871:	learn: 0.0814266	total: 4.31s	remaining: 633ms
872:	learn: 0.0814266	total: 4.31s	remaining: 628ms
873:	learn: 0.0814266	total: 4.32s	remaining: 622ms
874:	learn: 0.0814265	total: 4.32s	remaining: 617ms
875:	learn: 0.0814263	total: 4.33s	remaining: 612ms
876:	learn: 0.0814259	total: 4.33s	remaining: 607ms
877:	learn: 0.0814258	total: 4.33s	remaining: 602ms
878:	learn: 0.0814257	total: 4.33s	remaining: 597ms
879:	learn: 0.0814257	total: 4.34s	remaining: 592ms
880:	learn: 0.0814256	total: 4.34s	remaining: 586ms
881:	learn: 0.0814256	total: 4.34s	remaining: 581ms
882:	learn: 0.0814251	total: 4.35s	remaining: 576ms
883:	learn: 0.0814248	total: 4.35s	remaining: 571ms
884:	learn: 0.0814247	total: 4.35s	remaining: 566ms
885:	learn: 0.0814246	total: 4.36s	remaining: 561ms
886:	learn: 0.0814246	total: 4.36s	remaining: 555ms
887:	learn: 0.

  elif pd.api.types.is_categorical(cols):


0:	learn: 0.2864407	total: 6.68ms	remaining: 6.67s
1:	learn: 0.2116597	total: 14ms	remaining: 6.97s
2:	learn: 0.2094066	total: 19.8ms	remaining: 6.58s
3:	learn: 0.2078325	total: 26.2ms	remaining: 6.53s
4:	learn: 0.2054525	total: 32.8ms	remaining: 6.53s
5:	learn: 0.2021154	total: 38.8ms	remaining: 6.42s
6:	learn: 0.2011759	total: 45.2ms	remaining: 6.41s
7:	learn: 0.2006515	total: 51.6ms	remaining: 6.39s
8:	learn: 0.1983430	total: 57.3ms	remaining: 6.31s
9:	learn: 0.1916652	total: 63.7ms	remaining: 6.3s
10:	learn: 0.1900505	total: 69.7ms	remaining: 6.26s
11:	learn: 0.1880597	total: 74.8ms	remaining: 6.16s
12:	learn: 0.1779090	total: 81ms	remaining: 6.15s
13:	learn: 0.1600696	total: 87.1ms	remaining: 6.13s
14:	learn: 0.1585713	total: 93.3ms	remaining: 6.13s
15:	learn: 0.1572005	total: 99ms	remaining: 6.09s
16:	learn: 0.1552994	total: 105ms	remaining: 6.08s
17:	learn: 0.1544127	total: 111ms	remaining: 6.08s
18:	learn: 0.1539448	total: 118ms	remaining: 6.1s
19:	learn: 0.1528303	total: 125ms

185:	learn: 0.1145531	total: 2.04s	remaining: 8.93s
186:	learn: 0.1145531	total: 2.05s	remaining: 8.91s
187:	learn: 0.1145531	total: 2.05s	remaining: 8.87s
188:	learn: 0.1145531	total: 2.06s	remaining: 8.84s
189:	learn: 0.1145530	total: 2.06s	remaining: 8.8s
190:	learn: 0.1145530	total: 2.07s	remaining: 8.77s
191:	learn: 0.1145527	total: 2.08s	remaining: 8.73s
192:	learn: 0.1145527	total: 2.08s	remaining: 8.7s
193:	learn: 0.1145527	total: 2.08s	remaining: 8.67s
194:	learn: 0.1145527	total: 2.09s	remaining: 8.63s
195:	learn: 0.1145527	total: 2.1s	remaining: 8.6s
196:	learn: 0.1145527	total: 2.1s	remaining: 8.56s
197:	learn: 0.1145527	total: 2.11s	remaining: 8.53s
198:	learn: 0.1145526	total: 2.11s	remaining: 8.5s
199:	learn: 0.1145524	total: 2.11s	remaining: 8.46s
200:	learn: 0.1145524	total: 2.12s	remaining: 8.42s
201:	learn: 0.1145524	total: 2.12s	remaining: 8.39s
202:	learn: 0.1145521	total: 2.13s	remaining: 8.36s
203:	learn: 0.1145518	total: 2.13s	remaining: 8.32s
204:	learn: 0.1145

401:	learn: 0.1130865	total: 2.81s	remaining: 4.17s
402:	learn: 0.1130864	total: 2.81s	remaining: 4.16s
403:	learn: 0.1130864	total: 2.81s	remaining: 4.15s
404:	learn: 0.1130864	total: 2.82s	remaining: 4.14s
405:	learn: 0.1130864	total: 2.82s	remaining: 4.13s
406:	learn: 0.1130863	total: 2.83s	remaining: 4.12s
407:	learn: 0.1130863	total: 2.83s	remaining: 4.11s
408:	learn: 0.1130863	total: 2.83s	remaining: 4.09s
409:	learn: 0.1130863	total: 2.83s	remaining: 4.08s
410:	learn: 0.1130863	total: 2.84s	remaining: 4.07s
411:	learn: 0.1130861	total: 2.84s	remaining: 4.05s
412:	learn: 0.1130859	total: 2.84s	remaining: 4.04s
413:	learn: 0.1130859	total: 2.85s	remaining: 4.03s
414:	learn: 0.1130857	total: 2.85s	remaining: 4.02s
415:	learn: 0.1130856	total: 2.86s	remaining: 4.01s
416:	learn: 0.1130856	total: 2.86s	remaining: 4s
417:	learn: 0.1130856	total: 2.86s	remaining: 3.99s
418:	learn: 0.1130856	total: 2.87s	remaining: 3.98s
419:	learn: 0.1130855	total: 2.87s	remaining: 3.96s
420:	learn: 0.1

567:	learn: 0.1123311	total: 3.37s	remaining: 2.57s
568:	learn: 0.1123311	total: 3.38s	remaining: 2.56s
569:	learn: 0.1123310	total: 3.38s	remaining: 2.55s
570:	learn: 0.1123310	total: 3.39s	remaining: 2.54s
571:	learn: 0.1123310	total: 3.39s	remaining: 2.54s
572:	learn: 0.1123310	total: 3.39s	remaining: 2.53s
573:	learn: 0.1123310	total: 3.4s	remaining: 2.52s
574:	learn: 0.1123310	total: 3.4s	remaining: 2.51s
575:	learn: 0.1123310	total: 3.4s	remaining: 2.51s
576:	learn: 0.1123310	total: 3.41s	remaining: 2.5s
577:	learn: 0.1123307	total: 3.41s	remaining: 2.49s
578:	learn: 0.1123307	total: 3.41s	remaining: 2.48s
579:	learn: 0.1123307	total: 3.42s	remaining: 2.48s
580:	learn: 0.1123306	total: 3.42s	remaining: 2.47s
581:	learn: 0.1123306	total: 3.43s	remaining: 2.46s
582:	learn: 0.1123306	total: 3.43s	remaining: 2.45s
583:	learn: 0.1123306	total: 3.43s	remaining: 2.45s
584:	learn: 0.1123305	total: 3.44s	remaining: 2.44s
585:	learn: 0.1123305	total: 3.44s	remaining: 2.43s
586:	learn: 0.11

742:	learn: 0.1122036	total: 3.95s	remaining: 1.37s
743:	learn: 0.1119357	total: 3.96s	remaining: 1.36s
744:	learn: 0.1118193	total: 3.97s	remaining: 1.36s
745:	learn: 0.1116214	total: 3.97s	remaining: 1.35s
746:	learn: 0.1115434	total: 3.98s	remaining: 1.35s
747:	learn: 0.1115433	total: 3.98s	remaining: 1.34s
748:	learn: 0.1115433	total: 3.99s	remaining: 1.33s
749:	learn: 0.1115433	total: 3.99s	remaining: 1.33s
750:	learn: 0.1115432	total: 3.99s	remaining: 1.32s
751:	learn: 0.1115431	total: 4s	remaining: 1.32s
752:	learn: 0.1115431	total: 4s	remaining: 1.31s
753:	learn: 0.1115431	total: 4s	remaining: 1.3s
754:	learn: 0.1115430	total: 4s	remaining: 1.3s
755:	learn: 0.1115429	total: 4.01s	remaining: 1.29s
756:	learn: 0.1115428	total: 4.01s	remaining: 1.29s
757:	learn: 0.1115428	total: 4.01s	remaining: 1.28s
758:	learn: 0.1115427	total: 4.02s	remaining: 1.27s
759:	learn: 0.1115426	total: 4.02s	remaining: 1.27s
760:	learn: 0.1115424	total: 4.02s	remaining: 1.26s
761:	learn: 0.1115423	tota

906:	learn: 0.1025616	total: 4.73s	remaining: 485ms
907:	learn: 0.1023683	total: 4.74s	remaining: 480ms
908:	learn: 0.1021847	total: 4.74s	remaining: 475ms
909:	learn: 0.1021098	total: 4.75s	remaining: 470ms
910:	learn: 0.1020688	total: 4.76s	remaining: 465ms
911:	learn: 0.1020676	total: 4.76s	remaining: 459ms
912:	learn: 0.1020676	total: 4.76s	remaining: 454ms
913:	learn: 0.1020676	total: 4.77s	remaining: 449ms
914:	learn: 0.1020675	total: 4.77s	remaining: 443ms
915:	learn: 0.1020674	total: 4.77s	remaining: 438ms
916:	learn: 0.1020673	total: 4.78s	remaining: 432ms
917:	learn: 0.1020672	total: 4.78s	remaining: 427ms
918:	learn: 0.1020672	total: 4.78s	remaining: 422ms
919:	learn: 0.1020671	total: 4.79s	remaining: 416ms
920:	learn: 0.1020670	total: 4.79s	remaining: 411ms
921:	learn: 0.1020668	total: 4.79s	remaining: 405ms
922:	learn: 0.1020657	total: 4.79s	remaining: 400ms
923:	learn: 0.1020655	total: 4.8s	remaining: 395ms
924:	learn: 0.1020654	total: 4.8s	remaining: 389ms
925:	learn: 0.

  elif pd.api.types.is_categorical(cols):


0:	learn: 0.2830581	total: 7.63ms	remaining: 7.62s
1:	learn: 0.2006858	total: 14.3ms	remaining: 7.14s
2:	learn: 0.1985698	total: 20.3ms	remaining: 6.75s
3:	learn: 0.1964717	total: 26.4ms	remaining: 6.57s
4:	learn: 0.1876025	total: 32.4ms	remaining: 6.45s
5:	learn: 0.1854953	total: 38.8ms	remaining: 6.43s
6:	learn: 0.1838850	total: 44.7ms	remaining: 6.34s
7:	learn: 0.1835831	total: 50.7ms	remaining: 6.29s
8:	learn: 0.1823372	total: 57ms	remaining: 6.28s
9:	learn: 0.1807868	total: 64.6ms	remaining: 6.4s
10:	learn: 0.1799244	total: 70.8ms	remaining: 6.37s
11:	learn: 0.1747592	total: 78.6ms	remaining: 6.47s
12:	learn: 0.1616256	total: 84.4ms	remaining: 6.41s
13:	learn: 0.1599099	total: 90.9ms	remaining: 6.4s
14:	learn: 0.1580021	total: 97.1ms	remaining: 6.38s
15:	learn: 0.1563925	total: 103ms	remaining: 6.35s
16:	learn: 0.1552623	total: 110ms	remaining: 6.35s
17:	learn: 0.1536669	total: 116ms	remaining: 6.32s
18:	learn: 0.1524845	total: 121ms	remaining: 6.27s
19:	learn: 0.1516077	total: 12

192:	learn: 0.1069110	total: 1.2s	remaining: 5.01s
193:	learn: 0.1065335	total: 1.21s	remaining: 5.01s
194:	learn: 0.1063550	total: 1.21s	remaining: 5s
195:	learn: 0.1060765	total: 1.22s	remaining: 5s
196:	learn: 0.1060760	total: 1.22s	remaining: 4.98s
197:	learn: 0.1060752	total: 1.23s	remaining: 4.96s
198:	learn: 0.1060639	total: 1.23s	remaining: 4.96s
199:	learn: 0.1058902	total: 1.24s	remaining: 4.95s
200:	learn: 0.1058311	total: 1.24s	remaining: 4.94s
201:	learn: 0.1056607	total: 1.25s	remaining: 4.94s
202:	learn: 0.1055032	total: 1.26s	remaining: 4.93s
203:	learn: 0.1052486	total: 1.26s	remaining: 4.92s
204:	learn: 0.1052484	total: 1.26s	remaining: 4.91s
205:	learn: 0.1051766	total: 1.27s	remaining: 4.9s
206:	learn: 0.1049892	total: 1.28s	remaining: 4.89s
207:	learn: 0.1048998	total: 1.28s	remaining: 4.89s
208:	learn: 0.1047101	total: 1.29s	remaining: 4.88s
209:	learn: 0.1044432	total: 1.3s	remaining: 4.88s
210:	learn: 0.1042533	total: 1.3s	remaining: 4.87s
211:	learn: 0.1040346	

383:	learn: 0.1001716	total: 1.97s	remaining: 3.16s
384:	learn: 0.1001715	total: 1.97s	remaining: 3.15s
385:	learn: 0.1001715	total: 1.98s	remaining: 3.15s
386:	learn: 0.1001715	total: 1.98s	remaining: 3.14s
387:	learn: 0.1001714	total: 1.99s	remaining: 3.13s
388:	learn: 0.1001711	total: 1.99s	remaining: 3.12s
389:	learn: 0.1001710	total: 1.99s	remaining: 3.12s
390:	learn: 0.1001710	total: 2s	remaining: 3.11s
391:	learn: 0.1001709	total: 2s	remaining: 3.1s
392:	learn: 0.1001709	total: 2s	remaining: 3.09s
393:	learn: 0.1001707	total: 2s	remaining: 3.08s
394:	learn: 0.1001706	total: 2.01s	remaining: 3.07s
395:	learn: 0.1001704	total: 2.01s	remaining: 3.06s
396:	learn: 0.1001704	total: 2.01s	remaining: 3.06s
397:	learn: 0.1001704	total: 2.02s	remaining: 3.05s
398:	learn: 0.1001704	total: 2.02s	remaining: 3.04s
399:	learn: 0.1001703	total: 2.02s	remaining: 3.03s
400:	learn: 0.1001703	total: 2.02s	remaining: 3.02s
401:	learn: 0.1001703	total: 2.03s	remaining: 3.02s
402:	learn: 0.1001702	tot

566:	learn: 0.0933694	total: 2.75s	remaining: 2.1s
567:	learn: 0.0933694	total: 2.75s	remaining: 2.09s
568:	learn: 0.0933694	total: 2.76s	remaining: 2.09s
569:	learn: 0.0933694	total: 2.76s	remaining: 2.08s
570:	learn: 0.0933694	total: 2.76s	remaining: 2.08s
571:	learn: 0.0933694	total: 2.77s	remaining: 2.07s
572:	learn: 0.0933694	total: 2.77s	remaining: 2.06s
573:	learn: 0.0933694	total: 2.77s	remaining: 2.06s
574:	learn: 0.0933694	total: 2.77s	remaining: 2.05s
575:	learn: 0.0933694	total: 2.78s	remaining: 2.04s
576:	learn: 0.0933694	total: 2.78s	remaining: 2.04s
577:	learn: 0.0933693	total: 2.78s	remaining: 2.03s
578:	learn: 0.0933693	total: 2.79s	remaining: 2.03s
579:	learn: 0.0933693	total: 2.79s	remaining: 2.02s
580:	learn: 0.0933693	total: 2.79s	remaining: 2.01s
581:	learn: 0.0933693	total: 2.8s	remaining: 2.01s
582:	learn: 0.0933693	total: 2.8s	remaining: 2s
583:	learn: 0.0933693	total: 2.8s	remaining: 2s
584:	learn: 0.0933693	total: 2.81s	remaining: 1.99s
585:	learn: 0.0933693	

746:	learn: 0.0875016	total: 3.52s	remaining: 1.19s
747:	learn: 0.0875016	total: 3.53s	remaining: 1.19s
748:	learn: 0.0875014	total: 3.53s	remaining: 1.18s
749:	learn: 0.0875012	total: 3.54s	remaining: 1.18s
750:	learn: 0.0875012	total: 3.54s	remaining: 1.17s
751:	learn: 0.0875011	total: 3.54s	remaining: 1.17s
752:	learn: 0.0875010	total: 3.54s	remaining: 1.16s
753:	learn: 0.0875008	total: 3.55s	remaining: 1.16s
754:	learn: 0.0875008	total: 3.55s	remaining: 1.15s
755:	learn: 0.0875008	total: 3.55s	remaining: 1.15s
756:	learn: 0.0875008	total: 3.56s	remaining: 1.14s
757:	learn: 0.0875008	total: 3.56s	remaining: 1.14s
758:	learn: 0.0875004	total: 3.56s	remaining: 1.13s
759:	learn: 0.0875002	total: 3.57s	remaining: 1.13s
760:	learn: 0.0875002	total: 3.57s	remaining: 1.12s
761:	learn: 0.0875001	total: 3.57s	remaining: 1.12s
762:	learn: 0.0875000	total: 3.58s	remaining: 1.11s
763:	learn: 0.0874999	total: 3.58s	remaining: 1.1s
764:	learn: 0.0874998	total: 3.58s	remaining: 1.1s
765:	learn: 0.

911:	learn: 0.0869999	total: 4.09s	remaining: 395ms
912:	learn: 0.0869998	total: 4.1s	remaining: 391ms
913:	learn: 0.0869996	total: 4.1s	remaining: 386ms
914:	learn: 0.0869995	total: 4.11s	remaining: 382ms
915:	learn: 0.0869995	total: 4.11s	remaining: 377ms
916:	learn: 0.0869733	total: 4.12s	remaining: 373ms
917:	learn: 0.0869247	total: 4.13s	remaining: 368ms
918:	learn: 0.0867725	total: 4.13s	remaining: 364ms
919:	learn: 0.0866178	total: 4.14s	remaining: 360ms
920:	learn: 0.0865844	total: 4.14s	remaining: 355ms
921:	learn: 0.0864924	total: 4.15s	remaining: 351ms
922:	learn: 0.0864420	total: 4.16s	remaining: 347ms
923:	learn: 0.0864208	total: 4.16s	remaining: 342ms
924:	learn: 0.0864208	total: 4.17s	remaining: 338ms
925:	learn: 0.0864207	total: 4.17s	remaining: 333ms
926:	learn: 0.0864207	total: 4.17s	remaining: 329ms
927:	learn: 0.0864207	total: 4.18s	remaining: 324ms
928:	learn: 0.0864207	total: 4.18s	remaining: 320ms
929:	learn: 0.0864207	total: 4.18s	remaining: 315ms
930:	learn: 0.

In [51]:
print("Log loss: {:.3f}".format(-log_.mean()))

Log loss: 0.437


In [55]:
opt_clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', CatBoostClassifier(**best_params,od_type='Iter', eval_metric='Logloss'))])

opt_clf.fit(X_train.values, y_train.values.reshape(-1,1))
y_pred = opt_clf.predict_proba(X_test.values)[:, 1]
print("model logloss: %.3f" % log_loss(y_test, y_pred))

  elif pd.api.types.is_categorical(cols):


0:	learn: 0.6697220	total: 12.1ms	remaining: 9.16s
1:	learn: 0.6461048	total: 21ms	remaining: 7.96s
2:	learn: 0.6253606	total: 30.3ms	remaining: 7.63s
3:	learn: 0.6026194	total: 39.4ms	remaining: 7.44s
4:	learn: 0.5823713	total: 47.6ms	remaining: 7.18s
5:	learn: 0.5646853	total: 56.3ms	remaining: 7.06s
6:	learn: 0.5427890	total: 64.4ms	remaining: 6.92s
7:	learn: 0.5263117	total: 74.8ms	remaining: 7.02s
8:	learn: 0.5050964	total: 83.6ms	remaining: 6.97s
9:	learn: 0.4910259	total: 92.7ms	remaining: 6.94s
10:	learn: 0.4739767	total: 101ms	remaining: 6.9s
11:	learn: 0.4613058	total: 111ms	remaining: 6.93s
12:	learn: 0.4496900	total: 119ms	remaining: 6.82s
13:	learn: 0.4379222	total: 127ms	remaining: 6.78s
14:	learn: 0.4200535	total: 137ms	remaining: 6.79s
15:	learn: 0.4075263	total: 147ms	remaining: 6.82s
16:	learn: 0.3957930	total: 156ms	remaining: 6.82s
17:	learn: 0.3844064	total: 165ms	remaining: 6.81s
18:	learn: 0.3731632	total: 175ms	remaining: 6.83s
19:	learn: 0.3631718	total: 184ms	

161:	learn: 0.1276554	total: 1.64s	remaining: 6.03s
162:	learn: 0.1275573	total: 1.65s	remaining: 6.02s
163:	learn: 0.1274133	total: 1.66s	remaining: 6.02s
164:	learn: 0.1273094	total: 1.67s	remaining: 6.01s
165:	learn: 0.1272235	total: 1.68s	remaining: 5.99s
166:	learn: 0.1271426	total: 1.69s	remaining: 5.98s
167:	learn: 0.1269909	total: 1.7s	remaining: 5.97s
168:	learn: 0.1268280	total: 1.7s	remaining: 5.95s
169:	learn: 0.1266857	total: 1.71s	remaining: 5.94s
170:	learn: 0.1265945	total: 1.73s	remaining: 5.93s
171:	learn: 0.1264581	total: 1.73s	remaining: 5.92s
172:	learn: 0.1263351	total: 1.75s	remaining: 5.91s
173:	learn: 0.1262526	total: 1.76s	remaining: 5.91s
174:	learn: 0.1261801	total: 1.76s	remaining: 5.89s
175:	learn: 0.1260545	total: 1.77s	remaining: 5.88s
176:	learn: 0.1259311	total: 1.79s	remaining: 5.88s
177:	learn: 0.1258206	total: 1.8s	remaining: 5.87s
178:	learn: 0.1257029	total: 1.81s	remaining: 5.86s
179:	learn: 0.1256298	total: 1.82s	remaining: 5.85s
180:	learn: 0.1

340:	learn: 0.1175038	total: 3.47s	remaining: 4.26s
341:	learn: 0.1174815	total: 3.48s	remaining: 4.25s
342:	learn: 0.1174488	total: 3.49s	remaining: 4.24s
343:	learn: 0.1174256	total: 3.5s	remaining: 4.23s
344:	learn: 0.1173906	total: 3.51s	remaining: 4.21s
345:	learn: 0.1173666	total: 3.52s	remaining: 4.2s
346:	learn: 0.1173308	total: 3.53s	remaining: 4.19s
347:	learn: 0.1172887	total: 3.54s	remaining: 4.18s
348:	learn: 0.1172738	total: 3.55s	remaining: 4.17s
349:	learn: 0.1172455	total: 3.56s	remaining: 4.16s
350:	learn: 0.1172042	total: 3.57s	remaining: 4.15s
351:	learn: 0.1171765	total: 3.58s	remaining: 4.13s
352:	learn: 0.1171601	total: 3.59s	remaining: 4.13s
353:	learn: 0.1171297	total: 3.61s	remaining: 4.13s
354:	learn: 0.1171151	total: 3.62s	remaining: 4.12s
355:	learn: 0.1170924	total: 3.64s	remaining: 4.12s
356:	learn: 0.1170646	total: 3.65s	remaining: 4.11s
357:	learn: 0.1170378	total: 3.66s	remaining: 4.1s
358:	learn: 0.1170192	total: 3.67s	remaining: 4.09s
359:	learn: 0.1

504:	learn: 0.1141477	total: 5.31s	remaining: 2.67s
505:	learn: 0.1141243	total: 5.32s	remaining: 2.66s
506:	learn: 0.1141019	total: 5.33s	remaining: 2.65s
507:	learn: 0.1140764	total: 5.34s	remaining: 2.64s
508:	learn: 0.1140572	total: 5.34s	remaining: 2.63s
509:	learn: 0.1140424	total: 5.35s	remaining: 2.61s
510:	learn: 0.1140280	total: 5.36s	remaining: 2.6s
511:	learn: 0.1140006	total: 5.37s	remaining: 2.59s
512:	learn: 0.1139734	total: 5.38s	remaining: 2.58s
513:	learn: 0.1139557	total: 5.4s	remaining: 2.57s
514:	learn: 0.1139331	total: 5.42s	remaining: 2.57s
515:	learn: 0.1139063	total: 5.43s	remaining: 2.56s
516:	learn: 0.1138838	total: 5.45s	remaining: 2.55s
517:	learn: 0.1138556	total: 5.46s	remaining: 2.54s
518:	learn: 0.1138414	total: 5.48s	remaining: 2.53s
519:	learn: 0.1138099	total: 5.49s	remaining: 2.52s
520:	learn: 0.1137878	total: 5.51s	remaining: 2.52s
521:	learn: 0.1137657	total: 5.52s	remaining: 2.51s
522:	learn: 0.1137566	total: 5.54s	remaining: 2.5s
523:	learn: 0.1

681:	learn: 0.1106971	total: 7.12s	remaining: 804ms
682:	learn: 0.1106705	total: 7.13s	remaining: 794ms
683:	learn: 0.1106401	total: 7.15s	remaining: 784ms
684:	learn: 0.1106275	total: 7.16s	remaining: 774ms
685:	learn: 0.1106119	total: 7.18s	remaining: 764ms
686:	learn: 0.1106032	total: 7.19s	remaining: 753ms
687:	learn: 0.1105781	total: 7.2s	remaining: 743ms
688:	learn: 0.1105638	total: 7.21s	remaining: 732ms
689:	learn: 0.1105493	total: 7.21s	remaining: 721ms
690:	learn: 0.1105369	total: 7.22s	remaining: 711ms
691:	learn: 0.1105068	total: 7.23s	remaining: 700ms
692:	learn: 0.1104911	total: 7.24s	remaining: 690ms
693:	learn: 0.1104771	total: 7.25s	remaining: 679ms
694:	learn: 0.1104650	total: 7.26s	remaining: 669ms
695:	learn: 0.1104485	total: 7.27s	remaining: 658ms
696:	learn: 0.1104188	total: 7.28s	remaining: 648ms
697:	learn: 0.1104145	total: 7.29s	remaining: 637ms
698:	learn: 0.1103983	total: 7.3s	remaining: 626ms
699:	learn: 0.1103805	total: 7.31s	remaining: 616ms
700:	learn: 0.

In [23]:
opt_cat = CatBoostClassifier(**best_params,od_type='Iter', loss_function='Logloss', cat_features=categorical_f)
# opt_cat.fit(X_train_re, y_train_re, cat_features=categorical_f)

log_2 = cross_val_score(estimator = opt_cat, X = X_train_re, y = y_train_re, 
                             cv = 5, scoring=LogLoss)

0:	learn: 0.6865870	total: 41ms	remaining: 40.9s
1:	learn: 0.6803749	total: 79.4ms	remaining: 39.6s
2:	learn: 0.6742512	total: 119ms	remaining: 39.5s
3:	learn: 0.6683201	total: 156ms	remaining: 38.9s
4:	learn: 0.6624911	total: 190ms	remaining: 37.8s
5:	learn: 0.6568084	total: 231ms	remaining: 38.4s
6:	learn: 0.6514418	total: 255ms	remaining: 36.2s
7:	learn: 0.6460545	total: 278ms	remaining: 34.5s
8:	learn: 0.6408656	total: 316ms	remaining: 34.8s
9:	learn: 0.6357314	total: 370ms	remaining: 36.6s
10:	learn: 0.6306905	total: 433ms	remaining: 39s
11:	learn: 0.6256938	total: 461ms	remaining: 38s
12:	learn: 0.6209260	total: 500ms	remaining: 38s
13:	learn: 0.6163016	total: 541ms	remaining: 38.1s
14:	learn: 0.6118400	total: 578ms	remaining: 37.9s
15:	learn: 0.6074414	total: 614ms	remaining: 37.8s
16:	learn: 0.6031800	total: 652ms	remaining: 37.7s
17:	learn: 0.5990325	total: 693ms	remaining: 37.8s
18:	learn: 0.5948505	total: 734ms	remaining: 37.9s
19:	learn: 0.5843899	total: 778ms	remaining: 38

164:	learn: 0.1762573	total: 8s	remaining: 40.5s
165:	learn: 0.1756429	total: 8.05s	remaining: 40.5s
166:	learn: 0.1750420	total: 8.1s	remaining: 40.4s
167:	learn: 0.1744571	total: 8.15s	remaining: 40.4s
168:	learn: 0.1739060	total: 8.2s	remaining: 40.3s
169:	learn: 0.1733683	total: 8.25s	remaining: 40.3s
170:	learn: 0.1728081	total: 8.3s	remaining: 40.3s
171:	learn: 0.1722671	total: 8.36s	remaining: 40.2s
172:	learn: 0.1717442	total: 8.4s	remaining: 40.2s
173:	learn: 0.1712161	total: 8.45s	remaining: 40.1s
174:	learn: 0.1707147	total: 8.5s	remaining: 40.1s
175:	learn: 0.1701831	total: 8.56s	remaining: 40.1s
176:	learn: 0.1696047	total: 8.61s	remaining: 40s
177:	learn: 0.1691396	total: 8.67s	remaining: 40s
178:	learn: 0.1685858	total: 8.72s	remaining: 40s
179:	learn: 0.1681419	total: 8.78s	remaining: 40s
180:	learn: 0.1676683	total: 8.84s	remaining: 40s
181:	learn: 0.1671478	total: 8.89s	remaining: 40s
182:	learn: 0.1666591	total: 8.95s	remaining: 39.9s
183:	learn: 0.1662493	total: 8.9

323:	learn: 0.1428015	total: 15.7s	remaining: 32.8s
324:	learn: 0.1427936	total: 15.8s	remaining: 32.7s
325:	learn: 0.1427905	total: 15.8s	remaining: 32.7s
326:	learn: 0.1426750	total: 15.8s	remaining: 32.6s
327:	learn: 0.1426411	total: 15.9s	remaining: 32.5s
328:	learn: 0.1425512	total: 15.9s	remaining: 32.5s
329:	learn: 0.1425420	total: 16s	remaining: 32.4s
330:	learn: 0.1424841	total: 16s	remaining: 32.4s
331:	learn: 0.1424803	total: 16.1s	remaining: 32.3s
332:	learn: 0.1424710	total: 16.1s	remaining: 32.3s
333:	learn: 0.1424632	total: 16.2s	remaining: 32.2s
334:	learn: 0.1424530	total: 16.2s	remaining: 32.2s
335:	learn: 0.1424395	total: 16.2s	remaining: 32.1s
336:	learn: 0.1423285	total: 16.3s	remaining: 32s
337:	learn: 0.1423239	total: 16.3s	remaining: 32s
338:	learn: 0.1423152	total: 16.4s	remaining: 31.9s
339:	learn: 0.1423096	total: 16.4s	remaining: 31.8s
340:	learn: 0.1423010	total: 16.4s	remaining: 31.8s
341:	learn: 0.1422925	total: 16.5s	remaining: 31.7s
342:	learn: 0.142283

482:	learn: 0.1392404	total: 22.4s	remaining: 24s
483:	learn: 0.1392377	total: 22.5s	remaining: 24s
484:	learn: 0.1392259	total: 22.5s	remaining: 23.9s
485:	learn: 0.1392233	total: 22.6s	remaining: 23.9s
486:	learn: 0.1392208	total: 22.6s	remaining: 23.8s
487:	learn: 0.1392042	total: 22.6s	remaining: 23.8s
488:	learn: 0.1391829	total: 22.7s	remaining: 23.7s
489:	learn: 0.1391802	total: 22.7s	remaining: 23.7s
490:	learn: 0.1391771	total: 22.8s	remaining: 23.6s
491:	learn: 0.1391562	total: 22.8s	remaining: 23.6s
492:	learn: 0.1391541	total: 22.9s	remaining: 23.5s
493:	learn: 0.1391490	total: 22.9s	remaining: 23.5s
494:	learn: 0.1391380	total: 22.9s	remaining: 23.4s
495:	learn: 0.1391249	total: 23s	remaining: 23.4s
496:	learn: 0.1391218	total: 23s	remaining: 23.3s
497:	learn: 0.1391208	total: 23.1s	remaining: 23.3s
498:	learn: 0.1391182	total: 23.1s	remaining: 23.2s
499:	learn: 0.1391156	total: 23.1s	remaining: 23.1s
500:	learn: 0.1391124	total: 23.2s	remaining: 23.1s
501:	learn: 0.139109

642:	learn: 0.1357009	total: 29.7s	remaining: 16.5s
643:	learn: 0.1356629	total: 29.8s	remaining: 16.5s
644:	learn: 0.1356487	total: 29.8s	remaining: 16.4s
645:	learn: 0.1356071	total: 29.9s	remaining: 16.4s
646:	learn: 0.1355975	total: 29.9s	remaining: 16.3s
647:	learn: 0.1355902	total: 30s	remaining: 16.3s
648:	learn: 0.1355721	total: 30s	remaining: 16.2s
649:	learn: 0.1355465	total: 30.1s	remaining: 16.2s
650:	learn: 0.1355230	total: 30.1s	remaining: 16.1s
651:	learn: 0.1355143	total: 30.2s	remaining: 16.1s
652:	learn: 0.1354926	total: 30.2s	remaining: 16s
653:	learn: 0.1354661	total: 30.2s	remaining: 16s
654:	learn: 0.1354260	total: 30.3s	remaining: 16s
655:	learn: 0.1354073	total: 30.3s	remaining: 15.9s
656:	learn: 0.1354033	total: 30.4s	remaining: 15.9s
657:	learn: 0.1353662	total: 30.4s	remaining: 15.8s
658:	learn: 0.1353599	total: 30.5s	remaining: 15.8s
659:	learn: 0.1353351	total: 30.5s	remaining: 15.7s
660:	learn: 0.1353000	total: 30.6s	remaining: 15.7s
661:	learn: 0.1352886	

804:	learn: 0.1315346	total: 37.7s	remaining: 9.12s
805:	learn: 0.1315161	total: 37.7s	remaining: 9.07s
806:	learn: 0.1314885	total: 37.7s	remaining: 9.03s
807:	learn: 0.1314241	total: 37.8s	remaining: 8.98s
808:	learn: 0.1314016	total: 37.9s	remaining: 8.94s
809:	learn: 0.1313842	total: 37.9s	remaining: 8.89s
810:	learn: 0.1313058	total: 38s	remaining: 8.85s
811:	learn: 0.1313000	total: 38s	remaining: 8.8s
812:	learn: 0.1312792	total: 38.1s	remaining: 8.75s
813:	learn: 0.1312716	total: 38.1s	remaining: 8.7s
814:	learn: 0.1311949	total: 38.2s	remaining: 8.66s
815:	learn: 0.1311901	total: 38.2s	remaining: 8.61s
816:	learn: 0.1311823	total: 38.2s	remaining: 8.56s
817:	learn: 0.1311313	total: 38.3s	remaining: 8.52s
818:	learn: 0.1311196	total: 38.3s	remaining: 8.47s
819:	learn: 0.1311006	total: 38.4s	remaining: 8.43s
820:	learn: 0.1310878	total: 38.4s	remaining: 8.38s
821:	learn: 0.1310729	total: 38.5s	remaining: 8.33s
822:	learn: 0.1310385	total: 38.5s	remaining: 8.29s
823:	learn: 0.1310

963:	learn: 0.1279396	total: 45.3s	remaining: 1.69s
964:	learn: 0.1279052	total: 45.3s	remaining: 1.64s
965:	learn: 0.1278635	total: 45.4s	remaining: 1.6s
966:	learn: 0.1278574	total: 45.4s	remaining: 1.55s
967:	learn: 0.1278520	total: 45.5s	remaining: 1.5s
968:	learn: 0.1278457	total: 45.5s	remaining: 1.46s
969:	learn: 0.1278099	total: 45.6s	remaining: 1.41s
970:	learn: 0.1278081	total: 45.6s	remaining: 1.36s
971:	learn: 0.1277674	total: 45.7s	remaining: 1.32s
972:	learn: 0.1277465	total: 45.7s	remaining: 1.27s
973:	learn: 0.1277351	total: 45.8s	remaining: 1.22s
974:	learn: 0.1277140	total: 45.8s	remaining: 1.18s
975:	learn: 0.1276929	total: 45.9s	remaining: 1.13s
976:	learn: 0.1276761	total: 45.9s	remaining: 1.08s
977:	learn: 0.1276711	total: 46s	remaining: 1.03s
978:	learn: 0.1276533	total: 46s	remaining: 987ms
979:	learn: 0.1276326	total: 46.1s	remaining: 940ms
980:	learn: 0.1276051	total: 46.1s	remaining: 893ms
981:	learn: 0.1275996	total: 46.1s	remaining: 846ms
982:	learn: 0.1275

126:	learn: 0.1997572	total: 6.12s	remaining: 42.1s
127:	learn: 0.1988203	total: 6.18s	remaining: 42.1s
128:	learn: 0.1979160	total: 6.24s	remaining: 42.1s
129:	learn: 0.1970211	total: 6.3s	remaining: 42.1s
130:	learn: 0.1961524	total: 6.36s	remaining: 42.2s
131:	learn: 0.1953050	total: 6.41s	remaining: 42.2s
132:	learn: 0.1944822	total: 6.47s	remaining: 42.2s
133:	learn: 0.1936792	total: 6.52s	remaining: 42.2s
134:	learn: 0.1928995	total: 6.58s	remaining: 42.2s
135:	learn: 0.1921381	total: 6.63s	remaining: 42.2s
136:	learn: 0.1914106	total: 6.69s	remaining: 42.1s
137:	learn: 0.1906991	total: 6.75s	remaining: 42.1s
138:	learn: 0.1896779	total: 6.8s	remaining: 42.1s
139:	learn: 0.1886832	total: 6.85s	remaining: 42.1s
140:	learn: 0.1877045	total: 6.9s	remaining: 42s
141:	learn: 0.1867558	total: 6.95s	remaining: 42s
142:	learn: 0.1858576	total: 7s	remaining: 42s
143:	learn: 0.1849610	total: 7.05s	remaining: 41.9s
144:	learn: 0.1840875	total: 7.1s	remaining: 41.9s
145:	learn: 0.1833075	tot

289:	learn: 0.1334956	total: 14.7s	remaining: 36s
290:	learn: 0.1334189	total: 14.8s	remaining: 35.9s
291:	learn: 0.1332927	total: 14.8s	remaining: 35.9s
292:	learn: 0.1331461	total: 14.9s	remaining: 35.8s
293:	learn: 0.1330946	total: 14.9s	remaining: 35.8s
294:	learn: 0.1330169	total: 14.9s	remaining: 35.7s
295:	learn: 0.1329619	total: 15s	remaining: 35.6s
296:	learn: 0.1328870	total: 15s	remaining: 35.6s
297:	learn: 0.1327960	total: 15.1s	remaining: 35.5s
298:	learn: 0.1326555	total: 15.1s	remaining: 35.5s
299:	learn: 0.1325471	total: 15.2s	remaining: 35.4s
300:	learn: 0.1324264	total: 15.2s	remaining: 35.4s
301:	learn: 0.1323613	total: 15.3s	remaining: 35.3s
302:	learn: 0.1322921	total: 15.3s	remaining: 35.3s
303:	learn: 0.1322524	total: 15.4s	remaining: 35.2s
304:	learn: 0.1321992	total: 15.4s	remaining: 35.2s
305:	learn: 0.1321046	total: 15.5s	remaining: 35.1s
306:	learn: 0.1320630	total: 15.5s	remaining: 35s
307:	learn: 0.1320220	total: 15.6s	remaining: 35s
308:	learn: 0.1319838	

449:	learn: 0.1261294	total: 22.6s	remaining: 27.7s
450:	learn: 0.1261009	total: 22.7s	remaining: 27.6s
451:	learn: 0.1260853	total: 22.8s	remaining: 27.6s
452:	learn: 0.1260762	total: 22.8s	remaining: 27.5s
453:	learn: 0.1260714	total: 22.9s	remaining: 27.5s
454:	learn: 0.1260169	total: 22.9s	remaining: 27.4s
455:	learn: 0.1260004	total: 23s	remaining: 27.4s
456:	learn: 0.1259901	total: 23s	remaining: 27.4s
457:	learn: 0.1259866	total: 23.1s	remaining: 27.3s
458:	learn: 0.1259641	total: 23.1s	remaining: 27.3s
459:	learn: 0.1259538	total: 23.2s	remaining: 27.2s
460:	learn: 0.1259482	total: 23.3s	remaining: 27.2s
461:	learn: 0.1259286	total: 23.3s	remaining: 27.2s
462:	learn: 0.1259184	total: 23.4s	remaining: 27.1s
463:	learn: 0.1259148	total: 23.4s	remaining: 27.1s
464:	learn: 0.1259057	total: 23.5s	remaining: 27s
465:	learn: 0.1258506	total: 23.5s	remaining: 27s
466:	learn: 0.1258468	total: 23.6s	remaining: 26.9s
467:	learn: 0.1258345	total: 23.6s	remaining: 26.9s
468:	learn: 0.125780

609:	learn: 0.1228536	total: 31.2s	remaining: 19.9s
610:	learn: 0.1228466	total: 31.2s	remaining: 19.9s
611:	learn: 0.1228339	total: 31.3s	remaining: 19.8s
612:	learn: 0.1228120	total: 31.3s	remaining: 19.8s
613:	learn: 0.1228029	total: 31.4s	remaining: 19.7s
614:	learn: 0.1227676	total: 31.4s	remaining: 19.7s
615:	learn: 0.1227613	total: 31.5s	remaining: 19.6s
616:	learn: 0.1227542	total: 31.5s	remaining: 19.6s
617:	learn: 0.1227387	total: 31.5s	remaining: 19.5s
618:	learn: 0.1227359	total: 31.6s	remaining: 19.4s
619:	learn: 0.1227287	total: 31.6s	remaining: 19.4s
620:	learn: 0.1227194	total: 31.7s	remaining: 19.3s
621:	learn: 0.1226922	total: 31.7s	remaining: 19.3s
622:	learn: 0.1226521	total: 31.8s	remaining: 19.2s
623:	learn: 0.1226078	total: 31.8s	remaining: 19.2s
624:	learn: 0.1225772	total: 31.9s	remaining: 19.1s
625:	learn: 0.1225701	total: 31.9s	remaining: 19.1s
626:	learn: 0.1225498	total: 32s	remaining: 19s
627:	learn: 0.1225387	total: 32s	remaining: 19s
628:	learn: 0.122488

768:	learn: 0.1191950	total: 38.9s	remaining: 11.7s
769:	learn: 0.1191896	total: 39s	remaining: 11.6s
770:	learn: 0.1191577	total: 39s	remaining: 11.6s
771:	learn: 0.1191091	total: 39.1s	remaining: 11.5s
772:	learn: 0.1190899	total: 39.1s	remaining: 11.5s
773:	learn: 0.1190352	total: 39.2s	remaining: 11.4s
774:	learn: 0.1190135	total: 39.3s	remaining: 11.4s
775:	learn: 0.1189806	total: 39.3s	remaining: 11.3s
776:	learn: 0.1189630	total: 39.4s	remaining: 11.3s
777:	learn: 0.1189347	total: 39.4s	remaining: 11.3s
778:	learn: 0.1189006	total: 39.5s	remaining: 11.2s
779:	learn: 0.1188879	total: 39.5s	remaining: 11.1s
780:	learn: 0.1188529	total: 39.6s	remaining: 11.1s
781:	learn: 0.1188305	total: 39.6s	remaining: 11s
782:	learn: 0.1188123	total: 39.7s	remaining: 11s
783:	learn: 0.1188085	total: 39.7s	remaining: 10.9s
784:	learn: 0.1187881	total: 39.8s	remaining: 10.9s
785:	learn: 0.1187834	total: 39.8s	remaining: 10.8s
786:	learn: 0.1187731	total: 39.8s	remaining: 10.8s
787:	learn: 0.118756

930:	learn: 0.1163756	total: 46.6s	remaining: 3.45s
931:	learn: 0.1163503	total: 46.6s	remaining: 3.4s
932:	learn: 0.1163205	total: 46.7s	remaining: 3.35s
933:	learn: 0.1163196	total: 46.8s	remaining: 3.3s
934:	learn: 0.1163164	total: 46.8s	remaining: 3.25s
935:	learn: 0.1162979	total: 46.9s	remaining: 3.2s
936:	learn: 0.1162845	total: 46.9s	remaining: 3.15s
937:	learn: 0.1162769	total: 46.9s	remaining: 3.1s
938:	learn: 0.1162474	total: 47s	remaining: 3.05s
939:	learn: 0.1162417	total: 47s	remaining: 3s
940:	learn: 0.1162148	total: 47.1s	remaining: 2.95s
941:	learn: 0.1161947	total: 47.1s	remaining: 2.9s
942:	learn: 0.1161772	total: 47.2s	remaining: 2.85s
943:	learn: 0.1161521	total: 47.3s	remaining: 2.8s
944:	learn: 0.1161031	total: 47.3s	remaining: 2.75s
945:	learn: 0.1160964	total: 47.4s	remaining: 2.7s
946:	learn: 0.1160781	total: 47.4s	remaining: 2.65s
947:	learn: 0.1160516	total: 47.5s	remaining: 2.6s
948:	learn: 0.1160323	total: 47.5s	remaining: 2.55s
949:	learn: 0.1160259	total

92:	learn: 0.2440589	total: 3.91s	remaining: 38.1s
93:	learn: 0.2420782	total: 3.96s	remaining: 38.2s
94:	learn: 0.2401551	total: 4.02s	remaining: 38.3s
95:	learn: 0.2382695	total: 4.07s	remaining: 38.3s
96:	learn: 0.2364519	total: 4.12s	remaining: 38.4s
97:	learn: 0.2346487	total: 4.18s	remaining: 38.4s
98:	learn: 0.2329143	total: 4.23s	remaining: 38.5s
99:	learn: 0.2312276	total: 4.28s	remaining: 38.5s
100:	learn: 0.2295518	total: 4.34s	remaining: 38.6s
101:	learn: 0.2279510	total: 4.39s	remaining: 38.7s
102:	learn: 0.2264868	total: 4.45s	remaining: 38.7s
103:	learn: 0.2249767	total: 4.5s	remaining: 38.7s
104:	learn: 0.2235911	total: 4.55s	remaining: 38.8s
105:	learn: 0.2221565	total: 4.6s	remaining: 38.8s
106:	learn: 0.2208459	total: 4.65s	remaining: 38.8s
107:	learn: 0.2195356	total: 4.71s	remaining: 38.9s
108:	learn: 0.2182240	total: 4.77s	remaining: 39s
109:	learn: 0.2169715	total: 4.82s	remaining: 39s
110:	learn: 0.2157634	total: 4.88s	remaining: 39.1s
111:	learn: 0.2145370	tota

253:	learn: 0.1441184	total: 13s	remaining: 38.1s
254:	learn: 0.1441178	total: 13s	remaining: 38s
255:	learn: 0.1441145	total: 13s	remaining: 37.9s
256:	learn: 0.1439184	total: 13.1s	remaining: 37.9s
257:	learn: 0.1437269	total: 13.2s	remaining: 37.9s
258:	learn: 0.1437217	total: 13.2s	remaining: 37.8s
259:	learn: 0.1437166	total: 13.2s	remaining: 37.7s
260:	learn: 0.1435484	total: 13.3s	remaining: 37.6s
261:	learn: 0.1435479	total: 13.3s	remaining: 37.6s
262:	learn: 0.1435470	total: 13.4s	remaining: 37.5s
263:	learn: 0.1435464	total: 13.4s	remaining: 37.4s
264:	learn: 0.1433822	total: 13.5s	remaining: 37.4s
265:	learn: 0.1432018	total: 13.5s	remaining: 37.3s
266:	learn: 0.1432006	total: 13.6s	remaining: 37.2s
267:	learn: 0.1430262	total: 13.6s	remaining: 37.2s
268:	learn: 0.1430262	total: 13.7s	remaining: 37.1s
269:	learn: 0.1428603	total: 13.7s	remaining: 37.1s
270:	learn: 0.1428598	total: 13.8s	remaining: 37s
271:	learn: 0.1427079	total: 13.8s	remaining: 37s
272:	learn: 0.1427055	to

416:	learn: 0.1364093	total: 20.6s	remaining: 28.8s
417:	learn: 0.1364091	total: 20.6s	remaining: 28.7s
418:	learn: 0.1363384	total: 20.7s	remaining: 28.7s
419:	learn: 0.1363376	total: 20.7s	remaining: 28.6s
420:	learn: 0.1363046	total: 20.8s	remaining: 28.6s
421:	learn: 0.1362721	total: 20.8s	remaining: 28.5s
422:	learn: 0.1362710	total: 20.9s	remaining: 28.4s
423:	learn: 0.1362574	total: 20.9s	remaining: 28.4s
424:	learn: 0.1362566	total: 20.9s	remaining: 28.3s
425:	learn: 0.1362010	total: 21s	remaining: 28.3s
426:	learn: 0.1361450	total: 21.1s	remaining: 28.3s
427:	learn: 0.1360763	total: 21.1s	remaining: 28.2s
428:	learn: 0.1360629	total: 21.1s	remaining: 28.1s
429:	learn: 0.1360490	total: 21.2s	remaining: 28.1s
430:	learn: 0.1359794	total: 21.3s	remaining: 28.1s
431:	learn: 0.1359204	total: 21.3s	remaining: 28s
432:	learn: 0.1358828	total: 21.4s	remaining: 28s
433:	learn: 0.1358699	total: 21.4s	remaining: 27.9s
434:	learn: 0.1358607	total: 21.4s	remaining: 27.8s
435:	learn: 0.1358

575:	learn: 0.1307040	total: 28.3s	remaining: 20.8s
576:	learn: 0.1306670	total: 28.4s	remaining: 20.8s
577:	learn: 0.1306369	total: 28.4s	remaining: 20.7s
578:	learn: 0.1306090	total: 28.5s	remaining: 20.7s
579:	learn: 0.1305536	total: 28.5s	remaining: 20.6s
580:	learn: 0.1305139	total: 28.6s	remaining: 20.6s
581:	learn: 0.1305068	total: 28.6s	remaining: 20.5s
582:	learn: 0.1304500	total: 28.7s	remaining: 20.5s
583:	learn: 0.1304416	total: 28.7s	remaining: 20.4s
584:	learn: 0.1304386	total: 28.7s	remaining: 20.4s
585:	learn: 0.1304317	total: 28.8s	remaining: 20.3s
586:	learn: 0.1303809	total: 28.8s	remaining: 20.3s
587:	learn: 0.1303436	total: 28.9s	remaining: 20.2s
588:	learn: 0.1303137	total: 28.9s	remaining: 20.2s
589:	learn: 0.1303025	total: 29s	remaining: 20.1s
590:	learn: 0.1302470	total: 29s	remaining: 20.1s
591:	learn: 0.1301928	total: 29.1s	remaining: 20.1s
592:	learn: 0.1301518	total: 29.1s	remaining: 20s
593:	learn: 0.1300965	total: 29.2s	remaining: 20s
594:	learn: 0.130056

734:	learn: 0.1268247	total: 36.1s	remaining: 13s
735:	learn: 0.1268051	total: 36.1s	remaining: 13s
736:	learn: 0.1267712	total: 36.2s	remaining: 12.9s
737:	learn: 0.1267526	total: 36.2s	remaining: 12.9s
738:	learn: 0.1267462	total: 36.3s	remaining: 12.8s
739:	learn: 0.1266939	total: 36.3s	remaining: 12.8s
740:	learn: 0.1266605	total: 36.4s	remaining: 12.7s
741:	learn: 0.1266514	total: 36.4s	remaining: 12.7s
742:	learn: 0.1266447	total: 36.5s	remaining: 12.6s
743:	learn: 0.1266112	total: 36.5s	remaining: 12.6s
744:	learn: 0.1265838	total: 36.6s	remaining: 12.5s
745:	learn: 0.1265692	total: 36.6s	remaining: 12.5s
746:	learn: 0.1265180	total: 36.7s	remaining: 12.4s
747:	learn: 0.1264925	total: 36.7s	remaining: 12.4s
748:	learn: 0.1264848	total: 36.8s	remaining: 12.3s
749:	learn: 0.1264690	total: 36.8s	remaining: 12.3s
750:	learn: 0.1264453	total: 36.9s	remaining: 12.2s
751:	learn: 0.1264391	total: 36.9s	remaining: 12.2s
752:	learn: 0.1264341	total: 37s	remaining: 12.1s
753:	learn: 0.1264

895:	learn: 0.1211928	total: 44.2s	remaining: 5.13s
896:	learn: 0.1211620	total: 44.3s	remaining: 5.08s
897:	learn: 0.1211410	total: 44.3s	remaining: 5.04s
898:	learn: 0.1211327	total: 44.4s	remaining: 4.99s
899:	learn: 0.1211067	total: 44.4s	remaining: 4.93s
900:	learn: 0.1210923	total: 44.5s	remaining: 4.88s
901:	learn: 0.1210866	total: 44.5s	remaining: 4.83s
902:	learn: 0.1210661	total: 44.6s	remaining: 4.79s
903:	learn: 0.1210580	total: 44.6s	remaining: 4.74s
904:	learn: 0.1210306	total: 44.7s	remaining: 4.69s
905:	learn: 0.1210138	total: 44.7s	remaining: 4.64s
906:	learn: 0.1209831	total: 44.8s	remaining: 4.59s
907:	learn: 0.1209582	total: 44.8s	remaining: 4.54s
908:	learn: 0.1209411	total: 44.9s	remaining: 4.49s
909:	learn: 0.1209228	total: 44.9s	remaining: 4.44s
910:	learn: 0.1209059	total: 45s	remaining: 4.39s
911:	learn: 0.1208737	total: 45s	remaining: 4.34s
912:	learn: 0.1208514	total: 45s	remaining: 4.29s
913:	learn: 0.1208223	total: 45.1s	remaining: 4.24s
914:	learn: 0.1208

57:	learn: 0.3874497	total: 2.48s	remaining: 40.3s
58:	learn: 0.3809277	total: 2.54s	remaining: 40.5s
59:	learn: 0.3744116	total: 2.59s	remaining: 40.6s
60:	learn: 0.3681642	total: 2.64s	remaining: 40.7s
61:	learn: 0.3621495	total: 2.7s	remaining: 40.8s
62:	learn: 0.3563817	total: 2.75s	remaining: 40.9s
63:	learn: 0.3507462	total: 2.8s	remaining: 41s
64:	learn: 0.3453627	total: 2.85s	remaining: 41.1s
65:	learn: 0.3400347	total: 2.9s	remaining: 41.1s
66:	learn: 0.3348835	total: 2.96s	remaining: 41.3s
67:	learn: 0.3298467	total: 3.02s	remaining: 41.4s
68:	learn: 0.3249463	total: 3.07s	remaining: 41.4s
69:	learn: 0.3202483	total: 3.12s	remaining: 41.5s
70:	learn: 0.3156314	total: 3.17s	remaining: 41.5s
71:	learn: 0.3112212	total: 3.22s	remaining: 41.5s
72:	learn: 0.3068840	total: 3.27s	remaining: 41.6s
73:	learn: 0.3027977	total: 3.33s	remaining: 41.7s
74:	learn: 0.2987759	total: 3.38s	remaining: 41.7s
75:	learn: 0.2947427	total: 3.44s	remaining: 41.8s
76:	learn: 0.2910733	total: 3.5s	rem

218:	learn: 0.1444835	total: 11.3s	remaining: 40.2s
219:	learn: 0.1442023	total: 11.3s	remaining: 40.1s
220:	learn: 0.1439617	total: 11.4s	remaining: 40.1s
221:	learn: 0.1437266	total: 11.4s	remaining: 40.1s
222:	learn: 0.1434352	total: 11.5s	remaining: 40.1s
223:	learn: 0.1432874	total: 11.5s	remaining: 40s
224:	learn: 0.1430623	total: 11.6s	remaining: 40s
225:	learn: 0.1428422	total: 11.7s	remaining: 39.9s
226:	learn: 0.1425642	total: 11.7s	remaining: 39.9s
227:	learn: 0.1423232	total: 11.8s	remaining: 39.9s
228:	learn: 0.1420477	total: 11.8s	remaining: 39.9s
229:	learn: 0.1418309	total: 11.9s	remaining: 39.8s
230:	learn: 0.1415857	total: 12s	remaining: 39.8s
231:	learn: 0.1415438	total: 12s	remaining: 39.7s
232:	learn: 0.1412529	total: 12.1s	remaining: 39.7s
233:	learn: 0.1409976	total: 12.1s	remaining: 39.7s
234:	learn: 0.1407847	total: 12.2s	remaining: 39.7s
235:	learn: 0.1405331	total: 12.2s	remaining: 39.6s
236:	learn: 0.1402911	total: 12.3s	remaining: 39.6s
237:	learn: 0.140078

380:	learn: 0.1260299	total: 20s	remaining: 32.4s
381:	learn: 0.1260277	total: 20s	remaining: 32.4s
382:	learn: 0.1259786	total: 20.1s	remaining: 32.3s
383:	learn: 0.1259111	total: 20.1s	remaining: 32.3s
384:	learn: 0.1258711	total: 20.2s	remaining: 32.2s
385:	learn: 0.1258452	total: 20.2s	remaining: 32.2s
386:	learn: 0.1258195	total: 20.3s	remaining: 32.1s
387:	learn: 0.1257524	total: 20.3s	remaining: 32s
388:	learn: 0.1257187	total: 20.4s	remaining: 32s
389:	learn: 0.1256828	total: 20.4s	remaining: 31.9s
390:	learn: 0.1256005	total: 20.5s	remaining: 31.9s
391:	learn: 0.1255986	total: 20.5s	remaining: 31.8s
392:	learn: 0.1255756	total: 20.5s	remaining: 31.7s
393:	learn: 0.1255492	total: 20.6s	remaining: 31.7s
394:	learn: 0.1254626	total: 20.6s	remaining: 31.6s
395:	learn: 0.1254505	total: 20.7s	remaining: 31.6s
396:	learn: 0.1254071	total: 20.7s	remaining: 31.5s
397:	learn: 0.1253659	total: 20.8s	remaining: 31.4s
398:	learn: 0.1253367	total: 20.8s	remaining: 31.4s
399:	learn: 0.125325

542:	learn: 0.1212591	total: 27.4s	remaining: 23.1s
543:	learn: 0.1212577	total: 27.4s	remaining: 23s
544:	learn: 0.1212332	total: 27.5s	remaining: 23s
545:	learn: 0.1212066	total: 27.5s	remaining: 22.9s
546:	learn: 0.1212056	total: 27.6s	remaining: 22.8s
547:	learn: 0.1211452	total: 27.6s	remaining: 22.8s
548:	learn: 0.1210918	total: 27.7s	remaining: 22.7s
549:	learn: 0.1210897	total: 27.7s	remaining: 22.7s
550:	learn: 0.1210674	total: 27.8s	remaining: 22.6s
551:	learn: 0.1210186	total: 27.8s	remaining: 22.6s
552:	learn: 0.1209709	total: 27.9s	remaining: 22.5s
553:	learn: 0.1209699	total: 27.9s	remaining: 22.5s
554:	learn: 0.1209646	total: 28s	remaining: 22.4s
555:	learn: 0.1209576	total: 28s	remaining: 22.4s
556:	learn: 0.1209536	total: 28s	remaining: 22.3s
557:	learn: 0.1208961	total: 28.1s	remaining: 22.3s
558:	learn: 0.1208939	total: 28.1s	remaining: 22.2s
559:	learn: 0.1208857	total: 28.2s	remaining: 22.1s
560:	learn: 0.1208342	total: 28.2s	remaining: 22.1s
561:	learn: 0.1208332	

705:	learn: 0.1176406	total: 35.2s	remaining: 14.7s
706:	learn: 0.1176284	total: 35.2s	remaining: 14.6s
707:	learn: 0.1176041	total: 35.3s	remaining: 14.6s
708:	learn: 0.1175917	total: 35.3s	remaining: 14.5s
709:	learn: 0.1175764	total: 35.4s	remaining: 14.5s
710:	learn: 0.1175679	total: 35.4s	remaining: 14.4s
711:	learn: 0.1175455	total: 35.5s	remaining: 14.4s
712:	learn: 0.1175055	total: 35.5s	remaining: 14.3s
713:	learn: 0.1175001	total: 35.6s	remaining: 14.3s
714:	learn: 0.1174715	total: 35.6s	remaining: 14.2s
715:	learn: 0.1174669	total: 35.7s	remaining: 14.2s
716:	learn: 0.1174614	total: 35.7s	remaining: 14.1s
717:	learn: 0.1174479	total: 35.8s	remaining: 14.1s
718:	learn: 0.1174435	total: 35.8s	remaining: 14s
719:	learn: 0.1174199	total: 35.9s	remaining: 14s
720:	learn: 0.1174136	total: 36s	remaining: 13.9s
721:	learn: 0.1174053	total: 36s	remaining: 13.9s
722:	learn: 0.1174046	total: 36s	remaining: 13.8s
723:	learn: 0.1173685	total: 36.1s	remaining: 13.8s
724:	learn: 0.1173209	

869:	learn: 0.1148339	total: 43.2s	remaining: 6.46s
870:	learn: 0.1148238	total: 43.3s	remaining: 6.41s
871:	learn: 0.1148198	total: 43.3s	remaining: 6.36s
872:	learn: 0.1148145	total: 43.4s	remaining: 6.31s
873:	learn: 0.1147659	total: 43.4s	remaining: 6.26s
874:	learn: 0.1147406	total: 43.5s	remaining: 6.21s
875:	learn: 0.1147324	total: 43.5s	remaining: 6.16s
876:	learn: 0.1147262	total: 43.6s	remaining: 6.11s
877:	learn: 0.1147123	total: 43.6s	remaining: 6.06s
878:	learn: 0.1147049	total: 43.7s	remaining: 6.01s
879:	learn: 0.1147039	total: 43.7s	remaining: 5.96s
880:	learn: 0.1146911	total: 43.7s	remaining: 5.91s
881:	learn: 0.1146792	total: 43.8s	remaining: 5.86s
882:	learn: 0.1146522	total: 43.8s	remaining: 5.81s
883:	learn: 0.1146200	total: 43.9s	remaining: 5.76s
884:	learn: 0.1146099	total: 43.9s	remaining: 5.71s
885:	learn: 0.1146025	total: 44s	remaining: 5.66s
886:	learn: 0.1145946	total: 44s	remaining: 5.61s
887:	learn: 0.1145424	total: 44.1s	remaining: 5.56s
888:	learn: 0.11

28:	learn: 0.4839999	total: 1.15s	remaining: 38.6s
29:	learn: 0.4767364	total: 1.21s	remaining: 39.3s
30:	learn: 0.4697472	total: 1.3s	remaining: 40.6s
31:	learn: 0.4629143	total: 1.35s	remaining: 40.8s
32:	learn: 0.4564858	total: 1.4s	remaining: 41s
33:	learn: 0.4500752	total: 1.45s	remaining: 41.1s
34:	learn: 0.4438507	total: 1.49s	remaining: 41.2s
35:	learn: 0.4378406	total: 1.53s	remaining: 41.1s
36:	learn: 0.4318616	total: 1.58s	remaining: 41.2s
37:	learn: 0.4261538	total: 1.63s	remaining: 41.3s
38:	learn: 0.4205056	total: 1.68s	remaining: 41.5s
39:	learn: 0.4151407	total: 1.73s	remaining: 41.5s
40:	learn: 0.4099442	total: 1.78s	remaining: 41.6s
41:	learn: 0.4048679	total: 1.82s	remaining: 41.6s
42:	learn: 0.3997803	total: 1.87s	remaining: 41.7s
43:	learn: 0.3950376	total: 1.92s	remaining: 41.7s
44:	learn: 0.3901931	total: 1.97s	remaining: 41.8s
45:	learn: 0.3857323	total: 2s	remaining: 41.6s
46:	learn: 0.3813777	total: 2.05s	remaining: 41.7s
47:	learn: 0.3769232	total: 2.1s	remai

190:	learn: 0.1560203	total: 9.67s	remaining: 41s
191:	learn: 0.1556285	total: 9.73s	remaining: 41s
192:	learn: 0.1552465	total: 9.79s	remaining: 41s
193:	learn: 0.1548696	total: 9.85s	remaining: 40.9s
194:	learn: 0.1545316	total: 9.91s	remaining: 40.9s
195:	learn: 0.1542252	total: 9.97s	remaining: 40.9s
196:	learn: 0.1538852	total: 10s	remaining: 40.9s
197:	learn: 0.1535367	total: 10.1s	remaining: 40.9s
198:	learn: 0.1531732	total: 10.2s	remaining: 40.9s
199:	learn: 0.1528726	total: 10.2s	remaining: 40.8s
200:	learn: 0.1525667	total: 10.3s	remaining: 40.8s
201:	learn: 0.1522182	total: 10.3s	remaining: 40.8s
202:	learn: 0.1519331	total: 10.4s	remaining: 40.7s
203:	learn: 0.1515740	total: 10.4s	remaining: 40.7s
204:	learn: 0.1512236	total: 10.5s	remaining: 40.7s
205:	learn: 0.1509553	total: 10.6s	remaining: 40.7s
206:	learn: 0.1506595	total: 10.6s	remaining: 40.7s
207:	learn: 0.1503861	total: 10.7s	remaining: 40.6s
208:	learn: 0.1500489	total: 10.7s	remaining: 40.6s
209:	learn: 0.149728

352:	learn: 0.1338843	total: 18.3s	remaining: 33.5s
353:	learn: 0.1338353	total: 18.3s	remaining: 33.5s
354:	learn: 0.1337854	total: 18.4s	remaining: 33.4s
355:	learn: 0.1337400	total: 18.4s	remaining: 33.4s
356:	learn: 0.1336773	total: 18.5s	remaining: 33.3s
357:	learn: 0.1336449	total: 18.6s	remaining: 33.3s
358:	learn: 0.1336303	total: 18.6s	remaining: 33.2s
359:	learn: 0.1335857	total: 18.7s	remaining: 33.2s
360:	learn: 0.1335732	total: 18.7s	remaining: 33.1s
361:	learn: 0.1335591	total: 18.8s	remaining: 33.1s
362:	learn: 0.1334814	total: 18.8s	remaining: 33s
363:	learn: 0.1334224	total: 18.9s	remaining: 33s
364:	learn: 0.1334006	total: 18.9s	remaining: 32.9s
365:	learn: 0.1333859	total: 19s	remaining: 32.9s
366:	learn: 0.1333438	total: 19s	remaining: 32.8s
367:	learn: 0.1333265	total: 19.1s	remaining: 32.7s
368:	learn: 0.1333071	total: 19.1s	remaining: 32.7s
369:	learn: 0.1332653	total: 19.2s	remaining: 32.6s
370:	learn: 0.1332505	total: 19.2s	remaining: 32.6s
371:	learn: 0.133245

512:	learn: 0.1298283	total: 26.1s	remaining: 24.8s
513:	learn: 0.1298066	total: 26.1s	remaining: 24.7s
514:	learn: 0.1297847	total: 26.2s	remaining: 24.6s
515:	learn: 0.1297602	total: 26.2s	remaining: 24.6s
516:	learn: 0.1297388	total: 26.3s	remaining: 24.5s
517:	learn: 0.1297353	total: 26.3s	remaining: 24.5s
518:	learn: 0.1297285	total: 26.3s	remaining: 24.4s
519:	learn: 0.1297240	total: 26.4s	remaining: 24.4s
520:	learn: 0.1296521	total: 26.4s	remaining: 24.3s
521:	learn: 0.1296359	total: 26.5s	remaining: 24.3s
522:	learn: 0.1295836	total: 26.6s	remaining: 24.2s
523:	learn: 0.1295784	total: 26.6s	remaining: 24.2s
524:	learn: 0.1295582	total: 26.6s	remaining: 24.1s
525:	learn: 0.1295553	total: 26.7s	remaining: 24.1s
526:	learn: 0.1295473	total: 26.7s	remaining: 24s
527:	learn: 0.1295395	total: 26.8s	remaining: 23.9s
528:	learn: 0.1295214	total: 26.8s	remaining: 23.9s
529:	learn: 0.1295036	total: 26.9s	remaining: 23.8s
530:	learn: 0.1294977	total: 26.9s	remaining: 23.8s
531:	learn: 0.

671:	learn: 0.1266665	total: 33.6s	remaining: 16.4s
672:	learn: 0.1266623	total: 33.6s	remaining: 16.3s
673:	learn: 0.1266260	total: 33.7s	remaining: 16.3s
674:	learn: 0.1266026	total: 33.7s	remaining: 16.2s
675:	learn: 0.1265978	total: 33.8s	remaining: 16.2s
676:	learn: 0.1265687	total: 33.8s	remaining: 16.1s
677:	learn: 0.1265492	total: 33.9s	remaining: 16.1s
678:	learn: 0.1265432	total: 33.9s	remaining: 16s
679:	learn: 0.1265392	total: 34s	remaining: 16s
680:	learn: 0.1265353	total: 34s	remaining: 15.9s
681:	learn: 0.1264996	total: 34.1s	remaining: 15.9s
682:	learn: 0.1264686	total: 34.1s	remaining: 15.8s
683:	learn: 0.1264479	total: 34.2s	remaining: 15.8s
684:	learn: 0.1264283	total: 34.2s	remaining: 15.7s
685:	learn: 0.1264224	total: 34.3s	remaining: 15.7s
686:	learn: 0.1264062	total: 34.3s	remaining: 15.6s
687:	learn: 0.1263918	total: 34.3s	remaining: 15.6s
688:	learn: 0.1263856	total: 34.4s	remaining: 15.5s
689:	learn: 0.1263819	total: 34.4s	remaining: 15.5s
690:	learn: 0.126351

831:	learn: 0.1233389	total: 41.5s	remaining: 8.37s
832:	learn: 0.1233312	total: 41.5s	remaining: 8.32s
833:	learn: 0.1233267	total: 41.6s	remaining: 8.27s
834:	learn: 0.1233019	total: 41.6s	remaining: 8.22s
835:	learn: 0.1232607	total: 41.7s	remaining: 8.18s
836:	learn: 0.1232551	total: 41.7s	remaining: 8.13s
837:	learn: 0.1232184	total: 41.8s	remaining: 8.08s
838:	learn: 0.1232141	total: 41.8s	remaining: 8.03s
839:	learn: 0.1232108	total: 41.9s	remaining: 7.97s
840:	learn: 0.1231577	total: 41.9s	remaining: 7.93s
841:	learn: 0.1231450	total: 42s	remaining: 7.88s
842:	learn: 0.1231412	total: 42s	remaining: 7.83s
843:	learn: 0.1231118	total: 42.1s	remaining: 7.77s
844:	learn: 0.1230682	total: 42.1s	remaining: 7.72s
845:	learn: 0.1230361	total: 42.2s	remaining: 7.68s
846:	learn: 0.1229931	total: 42.2s	remaining: 7.63s
847:	learn: 0.1229916	total: 42.3s	remaining: 7.58s
848:	learn: 0.1229563	total: 42.3s	remaining: 7.53s
849:	learn: 0.1229517	total: 42.4s	remaining: 7.48s
850:	learn: 0.12

990:	learn: 0.1202613	total: 49.5s	remaining: 449ms
991:	learn: 0.1202158	total: 49.5s	remaining: 399ms
992:	learn: 0.1201917	total: 49.6s	remaining: 350ms
993:	learn: 0.1201823	total: 49.6s	remaining: 300ms
994:	learn: 0.1201734	total: 49.7s	remaining: 250ms
995:	learn: 0.1201687	total: 49.8s	remaining: 200ms
996:	learn: 0.1201664	total: 49.8s	remaining: 150ms
997:	learn: 0.1201365	total: 49.8s	remaining: 99.9ms
998:	learn: 0.1201269	total: 49.9s	remaining: 49.9ms
999:	learn: 0.1201181	total: 49.9s	remaining: 0us


In [24]:
print("Log loss: {:.3f}".format(log_2.mean()))

Log loss: -0.189


In [69]:
opt_cat = CatBoostClassifier(**best_params,od_type='Iter', loss_function='Logloss', cat_features=categorical_f)
opt_cat.fit(X_train, y_train)
y_pred_cat = opt_cat.predict_proba(X_test.values)[:, 1]
print("model logloss: %.3f" % log_loss(y_test, y_pred_cat))

0:	learn: 0.6872590	total: 2.98s	remaining: 37m 35s
1:	learn: 0.6815207	total: 5.03s	remaining: 31m 44s
2:	learn: 0.6759145	total: 6.83s	remaining: 28m 42s
3:	learn: 0.6705470	total: 8.7s	remaining: 27m 21s
4:	learn: 0.6652111	total: 10.5s	remaining: 26m 27s
5:	learn: 0.6601074	total: 12.4s	remaining: 25m 50s
6:	learn: 0.6551261	total: 14.2s	remaining: 25m 25s
7:	learn: 0.6502594	total: 16.1s	remaining: 25m 10s
8:	learn: 0.6455273	total: 18s	remaining: 25m 1s
9:	learn: 0.6409089	total: 20.2s	remaining: 25m 13s
10:	learn: 0.6364029	total: 22.1s	remaining: 25m 2s
11:	learn: 0.6319297	total: 23.7s	remaining: 24m 34s
12:	learn: 0.6276322	total: 25.8s	remaining: 24m 37s
13:	learn: 0.6233885	total: 27.5s	remaining: 24m 23s
14:	learn: 0.6192511	total: 29.1s	remaining: 24m 2s
15:	learn: 0.6152651	total: 31.1s	remaining: 24m 5s
16:	learn: 0.6113927	total: 33.2s	remaining: 24m 7s
17:	learn: 0.6076081	total: 35.2s	remaining: 24m 10s
18:	learn: 0.6039616	total: 37.1s	remaining: 24m 5s
19:	learn: 0

153:	learn: 0.4542701	total: 5m 42s	remaining: 22m 24s
154:	learn: 0.4540435	total: 5m 44s	remaining: 22m 22s
155:	learn: 0.4538059	total: 5m 46s	remaining: 22m 18s
156:	learn: 0.4535863	total: 5m 47s	remaining: 22m 10s
157:	learn: 0.4533752	total: 5m 48s	remaining: 22m 7s
158:	learn: 0.4531693	total: 5m 50s	remaining: 22m 3s
159:	learn: 0.4527284	total: 5m 53s	remaining: 22m 5s
160:	learn: 0.4522976	total: 5m 56s	remaining: 22m 4s
161:	learn: 0.4518767	total: 5m 59s	remaining: 22m 4s
162:	learn: 0.4514611	total: 6m 2s	remaining: 22m 4s
163:	learn: 0.4510466	total: 6m 6s	remaining: 22m 11s
164:	learn: 0.4506314	total: 6m 11s	remaining: 22m 16s
165:	learn: 0.4502258	total: 6m 15s	remaining: 22m 20s
166:	learn: 0.4498586	total: 6m 19s	remaining: 22m 24s
167:	learn: 0.4494706	total: 6m 23s	remaining: 22m 28s
168:	learn: 0.4490998	total: 6m 27s	remaining: 22m 33s
169:	learn: 0.4487373	total: 6m 31s	remaining: 22m 37s
170:	learn: 0.4483832	total: 6m 35s	remaining: 22m 41s
171:	learn: 0.4480

303:	learn: 0.4313955	total: 16m 17s	remaining: 24m 23s
304:	learn: 0.4313479	total: 16m 22s	remaining: 24m 21s
305:	learn: 0.4313150	total: 16m 26s	remaining: 24m 20s
306:	learn: 0.4312795	total: 16m 30s	remaining: 24m 18s
307:	learn: 0.4312312	total: 16m 34s	remaining: 24m 16s
308:	learn: 0.4311954	total: 16m 39s	remaining: 24m 16s
309:	learn: 0.4311518	total: 16m 44s	remaining: 24m 14s
310:	learn: 0.4311191	total: 16m 48s	remaining: 24m 12s
311:	learn: 0.4310785	total: 16m 53s	remaining: 24m 11s
312:	learn: 0.4310495	total: 16m 58s	remaining: 24m 10s
313:	learn: 0.4310096	total: 17m 3s	remaining: 24m 10s
314:	learn: 0.4309788	total: 17m 7s	remaining: 24m 8s
315:	learn: 0.4309433	total: 17m 12s	remaining: 24m 7s
316:	learn: 0.4309043	total: 17m 16s	remaining: 24m 5s
317:	learn: 0.4308675	total: 17m 22s	remaining: 24m 5s
318:	learn: 0.4308354	total: 17m 26s	remaining: 24m 3s
319:	learn: 0.4307969	total: 17m 31s	remaining: 24m 2s
320:	learn: 0.4307621	total: 17m 36s	remaining: 24m 1s
3

451:	learn: 0.4280462	total: 26m 30s	remaining: 18m
452:	learn: 0.4280334	total: 26m 33s	remaining: 17m 56s
453:	learn: 0.4280158	total: 26m 37s	remaining: 17m 53s
454:	learn: 0.4280030	total: 26m 41s	remaining: 17m 49s
455:	learn: 0.4279908	total: 26m 43s	remaining: 17m 45s
456:	learn: 0.4279809	total: 26m 47s	remaining: 17m 42s
457:	learn: 0.4279712	total: 26m 51s	remaining: 17m 39s
458:	learn: 0.4279617	total: 26m 55s	remaining: 17m 35s
459:	learn: 0.4279521	total: 26m 59s	remaining: 17m 32s
460:	learn: 0.4279364	total: 27m 3s	remaining: 17m 29s
461:	learn: 0.4279247	total: 27m 7s	remaining: 17m 26s
462:	learn: 0.4279083	total: 27m 11s	remaining: 17m 22s
463:	learn: 0.4278993	total: 27m 15s	remaining: 17m 19s
464:	learn: 0.4278901	total: 27m 19s	remaining: 17m 16s
465:	learn: 0.4278788	total: 27m 21s	remaining: 17m 12s
466:	learn: 0.4278699	total: 27m 25s	remaining: 17m 8s
467:	learn: 0.4278569	total: 27m 28s	remaining: 17m 4s
468:	learn: 0.4278459	total: 27m 30s	remaining: 17m
469:

599:	learn: 0.4266669	total: 35m 28s	remaining: 9m 24s
600:	learn: 0.4266605	total: 35m 32s	remaining: 9m 20s
601:	learn: 0.4266543	total: 35m 37s	remaining: 9m 17s
602:	learn: 0.4266481	total: 35m 39s	remaining: 9m 13s
603:	learn: 0.4266421	total: 35m 44s	remaining: 9m 10s
604:	learn: 0.4266365	total: 35m 47s	remaining: 9m 6s
605:	learn: 0.4266278	total: 35m 50s	remaining: 9m 3s
606:	learn: 0.4266203	total: 35m 53s	remaining: 8m 59s
607:	learn: 0.4266157	total: 35m 56s	remaining: 8m 55s
608:	learn: 0.4266080	total: 36m	remaining: 8m 52s
609:	learn: 0.4266035	total: 36m 3s	remaining: 8m 48s
610:	learn: 0.4265966	total: 36m 7s	remaining: 8m 44s
611:	learn: 0.4265895	total: 36m 9s	remaining: 8m 41s
612:	learn: 0.4265849	total: 36m 12s	remaining: 8m 37s
613:	learn: 0.4265800	total: 36m 13s	remaining: 8m 33s
614:	learn: 0.4265752	total: 36m 16s	remaining: 8m 29s
615:	learn: 0.4265669	total: 36m 21s	remaining: 8m 26s
616:	learn: 0.4265622	total: 36m 24s	remaining: 8m 22s
617:	learn: 0.42655

750:	learn: 0.4258830	total: 44m 35s	remaining: 28.5s
751:	learn: 0.4258793	total: 44m 37s	remaining: 24.9s
752:	learn: 0.4258756	total: 44m 41s	remaining: 21.4s
753:	learn: 0.4258651	total: 44m 47s	remaining: 17.8s
754:	learn: 0.4258562	total: 44m 51s	remaining: 14.3s
755:	learn: 0.4258460	total: 44m 56s	remaining: 10.7s
756:	learn: 0.4258362	total: 45m	remaining: 7.13s
757:	learn: 0.4258286	total: 45m 5s	remaining: 3.57s
758:	learn: 0.4258188	total: 45m 10s	remaining: 0us
model logloss: 0.417


#### XGBoost Training

In [59]:
df_train

Unnamed: 0,click,hour,C1,banner_pos,site_domain,site_category,app_domain,app_category,device_id,device_ip,...,device_type,device_conn_type,C14,C15,C16,C17,C18,C19,C20,C21
0,0,14102100,5,0,dd641cc7,8fd0aea4,2021c8a9,07d7df22,a99f214a,ddd2926e,...,1,2,15706,201,31,1300,0,3,-1,67
1,0,14102100,5,0,dd641cc7,8fd0aea4,2021c8a9,07d7df22,a99f214a,96809ac8,...,1,0,15704,201,31,1300,0,3,85,67
2,0,14102100,5,0,dd641cc7,8fd0aea4,2021c8a9,07d7df22,a99f214a,b3cf8def,...,1,0,15704,201,31,1300,0,3,85,67
3,0,14102100,5,0,dd641cc7,8fd0aea4,2021c8a9,07d7df22,a99f214a,e8275b8f,...,1,0,15706,201,31,1300,0,3,85,67
4,0,14102100,5,1,cbee4b41,72722551,2021c8a9,07d7df22,a99f214a,9644d0bf,...,1,0,18993,201,31,1739,0,3,-1,145
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,0,14102100,5,0,dd641cc7,8fd0aea4,2021c8a9,07d7df22,a99f214a,fd4045ce,...,1,0,15702,201,31,1300,0,3,-1,67
19996,0,14102100,5,0,dd641cc7,8fd0aea4,2021c8a9,07d7df22,a99f214a,f5fe9cc8,...,1,0,15708,201,31,1300,0,3,85,67
19997,0,14102100,5,0,dd641cc7,8fd0aea4,2021c8a9,07d7df22,a99f214a,6bcbcc53,...,1,2,15708,201,31,1300,0,3,-1,67
19998,0,14102100,5,0,78590e4e,74073276,2021c8a9,07d7df22,a99f214a,e5025233,...,1,0,15699,201,31,1300,0,3,-1,67


### lightgbm

In [120]:
df_lgb = df_copy.copy()
def convert_obj_to_int(self):
    
    object_list_columns = self.columns
    object_list_dtypes = self.dtypes
    new_col_suffix = '_int'
    for index in range(0,len(object_list_columns)):
        if object_list_dtypes[index] == object :
            self[object_list_columns[index]+new_col_suffix] = self[object_list_columns[index]].map( lambda  x: hash(x))
            self.drop([object_list_columns[index]],inplace=True,axis=1)
    return self
df_lgb = convert_obj_to_int(df_lgb)

X_train_lgb, X_test_lgb, y_train_lgb, y_test_lgb = train_test_split(df_lgb, Y, test_size=0.3)

In [125]:
import lightgbm as lgb
msk = np.random.rand(len(X_train)) < 0.8
lgb_train = lgb.Dataset(X_train_lgb[msk], y_train_lgb[msk])
lgb_eval = lgb.Dataset(X_train_lgb[~msk], y_train_lgb[~msk], reference=lgb_train)

In [126]:
params_lgb = {
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': {'binary_logloss'},
    'max_depth': 6,
    'learning_rate': 0.06,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'max_bin': 255,
    'verbose': 30
}

In [128]:
categorical_f = ['C1','device_type','device_conn_type','C18','banner_pos','site_domain','site_category',
                 'device_id','device_ip','device_model','C14','C15','C16','C17','C19','C20','C21']

lgb_model = lgb.LGBMClassifier()
gbm = lgb.train(params_lgb,
                lgb_train,
                num_boost_round=120)

[LightGBM] [Info] Number of positive: 2117, number of negative: 9003
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.911331
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.428862
[LightGBM] [Debug] init for col-wise cost 0.001513 seconds, init for row-wise cost 0.006227 seconds
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Debug] Using Sparse Multi-Val Bin
[LightGBM] [Info] Total Bins 1227
[LightGBM] [Info] Number of data points in the train set: 11120, number of used features: 17
[LightGBM] [Debug] Use subset for bagging
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.190378 -> initscore=-1.447558
[LightGBM] [Info] Start training from score -1.447558
[LightGBM] [Debug] Re-bagging, using 8912 data to train
[LightGBM] [Debug] Trained a tree with leaves = 31 and max_depth = 6
[LightGBM] [Debug] Trained a tree with leaves = 31 and max_depth = 6
[LightGBM

### Bayesian Search CV for lightgbm

In [134]:
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.metrics import make_scorer, auc, log_loss, roc_auc_score

import lightgbm as lgb

# categorical_f = [ca for ca in X_train.columns if X_train[ca].dtype == 'object']

param = {
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': {'binary_logloss'},
    'max_depth': Integer(3, 7),
    'learning_rate': Real(0.01, 1.0, 'log-uniform'),
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'max_bin': Integer(20, 255, 'uniform'),
    'n_estimators': Integer(100, 1000, 'uniform'),
    'num_leaves': Integer(24, 80, 'uniform')
}

# lgb_model = lgb.LGBMClassifier()

# log-uniform: understand as search over p = exp(x) by varying x
opt_xgb = BayesSearchCV(
    lgb,
    param,
    scoring = LogLoss,
    n_iter=32,
    cv=5,
    random_state=42
)

# executes bayesian optimization
opt_xgb.fit(lgb_train)

ValueError: Dimension has to be a list or tuple.

In [None]:

def bayes_parameter_opt_lgb(X, y, init_round=15, opt_round=25, n_folds=3, random_seed=6,n_estimators=10000, output_process=False):
    # prepare data
    train_data = lgb.Dataset(data=X, label=y, free_raw_data=False)
    # parameters
    def lgb_eval(learning_rate,num_leaves, feature_fraction, bagging_fraction, max_depth, max_bin, min_data_in_leaf,min_sum_hessian_in_leaf,subsample):
        params = {'application':'binary', 'metric':'auc'}
        params['learning_rate'] = max(min(learning_rate, 1), 0)
        params["num_leaves"] = int(round(num_leaves))
        params['feature_fraction'] = max(min(feature_fraction, 1), 0)
        params['bagging_fraction'] = max(min(bagging_fraction, 1), 0)
        params['max_depth'] = int(round(max_depth))
        params['max_bin'] = int(round(max_depth))
        params['min_data_in_leaf'] = int(round(min_data_in_leaf))
        params['min_sum_hessian_in_leaf'] = min_sum_hessian_in_leaf
        params['subsample'] = max(min(subsample, 1), 0)

In [None]:
def random_search(param_grid, out_file, max_evals = MAX_EVALS):
    """Random search for hyperparameter optimization. 
       Writes result of search to csv file every search iteration."""
    
    
    # Dataframe for results
    results = pd.DataFrame(columns = ['score', 'params', 'iteration'],
                                  index = list(range(MAX_EVALS)))
    for i in range(MAX_EVALS):
        
        # Choose random hyperparameters
        random_params = {k: random.sample(v, 1)[0] for k, v in param_grid.items()}
        random_params['subsample'] = 1.0 if random_params['boosting_type'] == 'goss' else random_params['subsample']

        # Evaluate randomly selected hyperparameters
        eval_results = objective(random_params, i)
        results.loc[i, :] = eval_results

        # open connection (append option) and write results
        of_connection = open(out_file, 'a')
        writer = csv.writer(of_connection)
        writer.writerow(eval_results)
        
        # make sure to close connection
        of_connection.close()
        
    # Sort with best score on top
    results.sort_values('score', ascending = False, inplace = True)
    results.reset_index(inplace = True)

    return results 

### Output_1

In [33]:
opt_cat = CatBoostClassifier(**best_params,od_type='Iter', loss_function='Logloss', cat_features=categorical_f)
opt_cat.fit(df_copy, Y)

0:	learn: 0.6876016	total: 37.8ms	remaining: 37.8s
1:	learn: 0.6821458	total: 77.9ms	remaining: 38.9s
2:	learn: 0.6768222	total: 114ms	remaining: 38s
3:	learn: 0.6716256	total: 150ms	remaining: 37.4s
4:	learn: 0.6665317	total: 186ms	remaining: 37s
5:	learn: 0.6615421	total: 224ms	remaining: 37.1s
6:	learn: 0.6567268	total: 270ms	remaining: 38.3s
7:	learn: 0.6520342	total: 309ms	remaining: 38.4s
8:	learn: 0.6474125	total: 345ms	remaining: 38s
9:	learn: 0.6428989	total: 381ms	remaining: 37.7s
10:	learn: 0.6384977	total: 413ms	remaining: 37.1s
11:	learn: 0.6341903	total: 446ms	remaining: 36.8s
12:	learn: 0.6299779	total: 482ms	remaining: 36.6s
13:	learn: 0.6258756	total: 523ms	remaining: 36.8s
14:	learn: 0.6218983	total: 560ms	remaining: 36.8s
15:	learn: 0.6179753	total: 599ms	remaining: 36.8s
16:	learn: 0.6142200	total: 633ms	remaining: 36.6s
17:	learn: 0.6104715	total: 666ms	remaining: 36.3s
18:	learn: 0.6068361	total: 707ms	remaining: 36.5s
19:	learn: 0.6032959	total: 754ms	remaining: 

163:	learn: 0.4467688	total: 7.01s	remaining: 35.7s
164:	learn: 0.4465398	total: 7.06s	remaining: 35.7s
165:	learn: 0.4463409	total: 7.11s	remaining: 35.7s
166:	learn: 0.4461307	total: 7.16s	remaining: 35.7s
167:	learn: 0.4459211	total: 7.2s	remaining: 35.7s
168:	learn: 0.4457363	total: 7.25s	remaining: 35.7s
169:	learn: 0.4455335	total: 7.31s	remaining: 35.7s
170:	learn: 0.4453196	total: 7.36s	remaining: 35.7s
171:	learn: 0.4451401	total: 7.41s	remaining: 35.7s
172:	learn: 0.4449336	total: 7.46s	remaining: 35.7s
173:	learn: 0.4447397	total: 7.51s	remaining: 35.6s
174:	learn: 0.4446155	total: 7.55s	remaining: 35.6s
175:	learn: 0.4444641	total: 7.6s	remaining: 35.6s
176:	learn: 0.4443120	total: 7.65s	remaining: 35.6s
177:	learn: 0.4441577	total: 7.72s	remaining: 35.7s
178:	learn: 0.4440145	total: 7.79s	remaining: 35.7s
179:	learn: 0.4438734	total: 7.85s	remaining: 35.8s
180:	learn: 0.4436916	total: 7.9s	remaining: 35.8s
181:	learn: 0.4435609	total: 7.95s	remaining: 35.7s
182:	learn: 0.4

325:	learn: 0.4251704	total: 15.4s	remaining: 31.8s
326:	learn: 0.4250799	total: 15.4s	remaining: 31.8s
327:	learn: 0.4249846	total: 15.5s	remaining: 31.7s
328:	learn: 0.4249004	total: 15.6s	remaining: 31.7s
329:	learn: 0.4248181	total: 15.6s	remaining: 31.7s
330:	learn: 0.4247516	total: 15.7s	remaining: 31.7s
331:	learn: 0.4246609	total: 15.8s	remaining: 31.7s
332:	learn: 0.4245710	total: 15.8s	remaining: 31.7s
333:	learn: 0.4245002	total: 15.9s	remaining: 31.7s
334:	learn: 0.4243986	total: 15.9s	remaining: 31.6s
335:	learn: 0.4242974	total: 16s	remaining: 31.6s
336:	learn: 0.4242170	total: 16.1s	remaining: 31.6s
337:	learn: 0.4241757	total: 16.1s	remaining: 31.6s
338:	learn: 0.4240839	total: 16.2s	remaining: 31.6s
339:	learn: 0.4240379	total: 16.2s	remaining: 31.5s
340:	learn: 0.4239635	total: 16.3s	remaining: 31.5s
341:	learn: 0.4239126	total: 16.4s	remaining: 31.5s
342:	learn: 0.4238606	total: 16.4s	remaining: 31.4s
343:	learn: 0.4237977	total: 16.5s	remaining: 31.4s
344:	learn: 0.

484:	learn: 0.4183183	total: 24.6s	remaining: 26.1s
485:	learn: 0.4182884	total: 24.7s	remaining: 26.1s
486:	learn: 0.4182569	total: 24.7s	remaining: 26s
487:	learn: 0.4182362	total: 24.8s	remaining: 26s
488:	learn: 0.4182184	total: 24.8s	remaining: 25.9s
489:	learn: 0.4181925	total: 24.9s	remaining: 25.9s
490:	learn: 0.4181637	total: 24.9s	remaining: 25.8s
491:	learn: 0.4181346	total: 25s	remaining: 25.8s
492:	learn: 0.4181060	total: 25s	remaining: 25.7s
493:	learn: 0.4180996	total: 25.1s	remaining: 25.7s
494:	learn: 0.4180321	total: 25.1s	remaining: 25.6s
495:	learn: 0.4180217	total: 25.2s	remaining: 25.6s
496:	learn: 0.4179978	total: 25.2s	remaining: 25.5s
497:	learn: 0.4179654	total: 25.3s	remaining: 25.5s
498:	learn: 0.4179464	total: 25.3s	remaining: 25.4s
499:	learn: 0.4178968	total: 25.4s	remaining: 25.4s
500:	learn: 0.4178813	total: 25.4s	remaining: 25.3s
501:	learn: 0.4178577	total: 25.5s	remaining: 25.3s
502:	learn: 0.4178389	total: 25.5s	remaining: 25.2s
503:	learn: 0.417817

643:	learn: 0.4143940	total: 33.2s	remaining: 18.3s
644:	learn: 0.4143369	total: 33.2s	remaining: 18.3s
645:	learn: 0.4143307	total: 33.3s	remaining: 18.2s
646:	learn: 0.4142956	total: 33.3s	remaining: 18.2s
647:	learn: 0.4142874	total: 33.4s	remaining: 18.1s
648:	learn: 0.4142300	total: 33.4s	remaining: 18.1s
649:	learn: 0.4142098	total: 33.5s	remaining: 18s
650:	learn: 0.4142018	total: 33.5s	remaining: 18s
651:	learn: 0.4141751	total: 33.6s	remaining: 17.9s
652:	learn: 0.4141712	total: 33.6s	remaining: 17.9s
653:	learn: 0.4141331	total: 33.7s	remaining: 17.8s
654:	learn: 0.4140896	total: 33.7s	remaining: 17.8s
655:	learn: 0.4140503	total: 33.8s	remaining: 17.7s
656:	learn: 0.4140323	total: 33.8s	remaining: 17.7s
657:	learn: 0.4140286	total: 33.9s	remaining: 17.6s
658:	learn: 0.4140165	total: 34s	remaining: 17.6s
659:	learn: 0.4140035	total: 34s	remaining: 17.5s
660:	learn: 0.4139874	total: 34.1s	remaining: 17.5s
661:	learn: 0.4139668	total: 34.1s	remaining: 17.4s
662:	learn: 0.413924

802:	learn: 0.4103689	total: 41.7s	remaining: 10.2s
803:	learn: 0.4103639	total: 41.7s	remaining: 10.2s
804:	learn: 0.4103431	total: 41.8s	remaining: 10.1s
805:	learn: 0.4102792	total: 41.8s	remaining: 10.1s
806:	learn: 0.4102618	total: 41.9s	remaining: 10s
807:	learn: 0.4102476	total: 41.9s	remaining: 9.96s
808:	learn: 0.4101754	total: 42s	remaining: 9.92s
809:	learn: 0.4101689	total: 42.1s	remaining: 9.86s
810:	learn: 0.4101160	total: 42.1s	remaining: 9.81s
811:	learn: 0.4100946	total: 42.2s	remaining: 9.76s
812:	learn: 0.4100718	total: 42.2s	remaining: 9.71s
813:	learn: 0.4100307	total: 42.3s	remaining: 9.66s
814:	learn: 0.4099833	total: 42.3s	remaining: 9.61s
815:	learn: 0.4099638	total: 42.4s	remaining: 9.56s
816:	learn: 0.4099486	total: 42.4s	remaining: 9.51s
817:	learn: 0.4099373	total: 42.5s	remaining: 9.45s
818:	learn: 0.4099139	total: 42.5s	remaining: 9.4s
819:	learn: 0.4098590	total: 42.6s	remaining: 9.35s
820:	learn: 0.4098335	total: 42.7s	remaining: 9.3s
821:	learn: 0.4098

961:	learn: 0.4061496	total: 51.9s	remaining: 2.05s
962:	learn: 0.4061288	total: 52s	remaining: 2s
963:	learn: 0.4060975	total: 52s	remaining: 1.94s
964:	learn: 0.4060600	total: 52.1s	remaining: 1.89s
965:	learn: 0.4060170	total: 52.2s	remaining: 1.84s
966:	learn: 0.4059948	total: 52.2s	remaining: 1.78s
967:	learn: 0.4059736	total: 52.3s	remaining: 1.73s
968:	learn: 0.4059436	total: 52.3s	remaining: 1.67s
969:	learn: 0.4059073	total: 52.4s	remaining: 1.62s
970:	learn: 0.4058730	total: 52.5s	remaining: 1.57s
971:	learn: 0.4058516	total: 52.6s	remaining: 1.51s
972:	learn: 0.4058463	total: 52.6s	remaining: 1.46s
973:	learn: 0.4058428	total: 52.7s	remaining: 1.41s
974:	learn: 0.4058229	total: 52.8s	remaining: 1.35s
975:	learn: 0.4058096	total: 52.8s	remaining: 1.3s
976:	learn: 0.4057693	total: 52.9s	remaining: 1.25s
977:	learn: 0.4057553	total: 52.9s	remaining: 1.19s
978:	learn: 0.4057544	total: 53s	remaining: 1.14s
979:	learn: 0.4057393	total: 53.1s	remaining: 1.08s
980:	learn: 0.4057099	

<catboost.core.CatBoostClassifier at 0x7fa17b3d6670>

In [35]:
df_test = pd.read_csv("test.gz", compression='gzip', header='infer')
df_test_copy = df_test.copy()
unused_cols = ['id','site_id', 'app_id']
df_test_copy.drop(unused_cols, axis=1, inplace=True)
df_test_copy.drop(['hour', 'app_domain', 'app_category'], axis=1,inplace=True)

ctr_pred = opt_cat.predict_proba(df_test_copy)[:, 1]
all_id = df_test['id']
df_out = pd.DataFrame({'id': all_id, 'ctr': ctr_pred})
df_out.to_csv('Submission.csv', index=False)

### Logistic Regression (provided)

In [16]:
# embedding (all features are categorical)
print('embedding...')
from sklearn.feature_extraction import DictVectorizer
import pickle

try:
    with open('X_train_dict.pkl', 'rb') as ff:
        X_train_dict = pickle.load(ff)
    vectorizer = joblib.load('vectorizer.joblib')
    X_train = vectorizer.transform(X_train_dict)
    print('saved vectorizer loaded & applied to training set')
except:
    X_train_dict = list(df_copy.drop('click', axis=1).T.to_dict().values())
    with open('X_train_dict.pkl', 'wb') as ff:
        pickle.dump(X_train_dict, ff)
    vectorizer = DictVectorizer(sparse=True)
    X_train = vectorizer.fit_transform(X_train_dict) # can only see training dataset
    joblib.dump(vectorizer, 'vectorizer.joblib')
    print('imported data & built a vectorizer on the training set')

n, d = X_train.shape
print("n = {}, d = {}".format(n, d))

embedding...
imported data & built a vectorizer on the training set
n = 20000, d = 14076


In [17]:
X_train

<20000x14076 sparse matrix of type '<class 'numpy.float64'>'
	with 340000 stored elements in Compressed Sparse Row format>

In [18]:
# train
from sklearn.linear_model import LogisticRegression
print('fit a simple logistic regression with l1 regularization...')
clf = LogisticRegression(max_iter=20000, penalty='l1', solver='liblinear', C=1)
clf.fit(X_train, y_train)
print('...done training')

fit a simple logistic regression with l1 regularization...
...done training


In [20]:
##########################################################
######### testing code ###################################

# transform test data as well
print('loading and transforming test data...')
df_test = pd.read_csv("test.gz", compression='gzip', header='infer')
# df_test.set_index('id', inplace=True)
unused_cols = ['id', 'site_id', 'app_id']
df_test.drop(unused_cols, axis=1, inplace=True)

try:
    with open('X_test_dict.pkl', 'rb') as ff:
        X_test_dict = pickle.load('ff')
except:
    X_test_dict = list(df_test.T.to_dict().values())
    with open('X_test_dict.pkl', 'wb') as ff:
        pickle.dump(X_test_dict, ff)

X_test = vectorizer.transform(X_test_dict)

loading and transforming test data...


In [21]:
print('predicting and output to csv...')
ctr_pred = clf.predict_proba(X_test)[:, 1]
# save output: every line is (id, ctr_pred)
all_id = df_test['id']
df_out = pd.DataFrame({'id': all_id, 'ctr': ctr_pred})
df_out.to_csv('Submission.csv', index=False)

print('...done')

predicting and output to csv...
...done
