# Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats

from sklearn.model_selection import cross_validate, cross_val_score, cross_val_predict, learning_curve,\
train_test_split, GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, OneHotEncoder, FunctionTransformer, LabelEncoder, OrdinalEncoder
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.inspection import permutation_importance
from sklearn.metrics import plot_confusion_matrix, classification_report, precision_recall_curve
from sklearn.linear_model import LinearRegression, LogisticRegression, SGDRegressor, SGDClassifier, Ridge, RidgeClassifier
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.svm import SVC
from sklearn.compose import make_column_selector

## pipeline stuff

from sklearn.pipeline import Pipeline, make_pipeline, make_union
from sklearn.compose import ColumnTransformer, make_column_transformer, make_column_selector
from sklearn import set_config; set_config(display='diagram')

import warnings 
warnings.filterwarnings('ignore')

from sklearn.metrics import make_scorer
from tqdm.auto import tqdm


# Preprocessing

In [2]:
# def_df = pd.read_csv("/home/slawa/code/code-rep0/projects/data/defaulter_data_13364.csv", index_col=[0])
# pay_df = pd.read_csv("/home/slawa/code/code-rep0/projects/data/payer_data_41940.csv", index_col=[0])

def_df = pd.read_parquet("/Users/sjoerddewit/Desktop/Programming/6 Le Wagon Data Science/final_project/defaulter_data_6k_ids_compress.parquet")
# def_df = pd.read_parquet("/Users/sjoerddewit/Desktop/Programming/6 Le Wagon Data Science/final_project/defaulter_data_20k_ids_compress.parquet")
pay_df = pd.read_parquet("/Users/sjoerddewit/Desktop/Programming/6 Le Wagon Data Science/final_project/payer_data_20k_ids.parquet")

def_df['default'] = 1
pay_df['default'] = 0

df = pd.concat([def_df, pay_df])

df.reset_index(inplace=True)

y = df['default'].reset_index(drop=True)

X = df.drop(columns=['default']).reset_index(drop=True)

cat_vars = ['B_30', 
            'B_38', 
            'D_114', 
            'D_116', 
            'D_117', 
            'D_120', 
            'D_126', 
            'D_63', 
            'D_64', 
            'D_66', 
            'D_68']

X_corr = X.corr()

X_corr = X_corr.unstack().reset_index() # Unstack correlation matrix 
X_corr.columns = ['feature_1','feature_2', 'correlation_all'] # rename columns
X_corr.sort_values(by="correlation_all",ascending=False, inplace=True) # sort by correlation
X_corr = X_corr[X_corr['feature_1'] != X_corr['feature_2']] # Remove self correlation
X_corr = X_corr.drop_duplicates(subset='correlation_all')

red_features = list(X_corr[abs(X_corr['correlation_all'])>=.95]['feature_1']) ## abs so we also consider the negative corrs

X_red = X.drop(columns=red_features) ## dropping the highly correlated columns

## checking whether the high correlations are gone
X_red_corr = X_red.corr()
X_red_corr = X_red_corr.unstack().reset_index() # Unstack correlation matrix 
X_red_corr.columns = ['feature_1','feature_2', 'correlation_all'] # rename columns
X_red_corr.sort_values(by="correlation_all",ascending=False, inplace=True) # sort by correlation
X_red_corr = X_red_corr[X_red_corr['feature_1'] != X_red_corr['feature_2']] # Remove self correlation
X_red_corr = X_red_corr.drop_duplicates(subset='correlation_all')


## drop columns with nans if in both groups > 80% nans

nan_threshold= 0.8 ## adjust the hardcoded values
def_nans = def_df.isna().sum()/len(def_df) 
def_nans_80 = def_nans[def_nans >= nan_threshold].index
pay_nans = pay_df.isna().sum()/len(pay_df)
pay_nans_80 = pay_nans[pay_nans>= nan_threshold].index
nans_80 = [feature for feature in pay_nans_80 if feature in def_nans_80]

## check whether features were already removed
red_features_nan = [feature for feature in nans_80 if feature not in red_features] 
X_red = X_red.drop(columns=red_features_nan)
dropped_columns = red_features + red_features_nan

## Builsing the pipeline
num_vars = [feature for feature in X_red.columns[2:] if feature not in cat_vars] ## exclude dates and IDs (first two columns)
str_vars = [feature for feature in X_red.columns[2:] if not pd.api.types.is_numeric_dtype(X_red[feature])] ## columns that are not numeric at all 
#red_cat_vars = [feature for feature in cat_vars if feature not in dropped_columns + str_vars] ## remaining categorical variables that have no string values
red_cat_vars = [feature for feature in cat_vars if feature not in dropped_columns + str_vars] ## remaining categorical variables 


# impute mean/most frequent value for other nans (specific to group?)
# robustscale all numerical values

num_imputer = SimpleImputer(strategy='mean')
num_scaler = RobustScaler()

#num_imputer = KNNImputer(n_neighbors=2) ## KNNIMputer is computationally demanding
## should come AFTER SCALING

num_pipe = make_pipeline(num_imputer, num_scaler)

#str_trans = OrdinalEncoder() # is only needed if one wants to do knnimputer

#nan_trans = FunctionTransformer(nan_imp)

#nan_trans = FunctionTransformer(lambda X: X.applymap(lambda x: np.nan if x in [-1,-1.0, "-1.0", "-1"] else x))

cat_imputer = SimpleImputer(strategy="most_frequent") ## replace with KNNimputer on one neighbour, after transforming to numericals
#cat_imputer = KNNImputer(n_neighbors=1) # introducing it did not improve performance, but is computationally demanding
cat_encoder = OneHotEncoder(sparse=False, handle_unknown='ignore') ## what happens to the old columns?
cat_pipe = make_pipeline(cat_imputer, cat_encoder)
#str_pipe = make_pipeline(cat_imputer, str_trans, cat_encoder)
#str_pipe = make_pipeline(cat_imputer, cat_encoder)
# impute mean/most frequent value for other nans (specific to group?)
# robustscale all numerical values

num_imputer = SimpleImputer(strategy='mean')
num_scaler = RobustScaler()

#num_imputer = KNNImputer(n_neighbors=2) ## KNNIMputer is computationally demanding
## should come AFTER SCALING

num_pipe = make_pipeline(num_imputer, num_scaler)

#str_trans = OrdinalEncoder() # is only needed if one wants to do knnimputer

#nan_trans = FunctionTransformer(nan_imp)

#nan_trans = FunctionTransformer(lambda X: X.applymap(lambda x: np.nan if x in [-1,-1.0, "-1.0", "-1"] else x))

cat_imputer = SimpleImputer(strategy="most_frequent") ## replace with KNNimputer on one neighbour, after transforming to numericals
#cat_imputer = KNNImputer(n_neighbors=1) # introducing it did not improve performance, but is computationally demanding
cat_encoder = OneHotEncoder(sparse=False, handle_unknown='ignore') ## what happens to the old columns?
cat_pipe = make_pipeline(cat_imputer, cat_encoder)
#str_pipe = make_pipeline(cat_imputer, str_trans, cat_encoder)
#str_pipe = make_pipeline(cat_imputer, cat_encoder)

print('done ✅')


done ✅


In [3]:
print(def_df.shape , pay_df.shape)

(76747, 190) (243953, 190)


In [None]:
# 6k defaulters = (76747, 190) 20k payer = (243953, 190)  
# 20k defaulters = (116027, 190) 20k payer = (243953, 190)  


# A self-contained alternative Nans imputer

In [4]:
def alt_nan_imp(X):
    
    cat_vars = ['B_30', 
            'B_38', 
            'D_114', 
            'D_116', 
            'D_117', 
            'D_120', 
            'D_126', 
            'D_63', 
            'D_64', 
            'D_66', 
            'D_68']
    
    alt_nan_list = [-1,-1.0, "-1.0", "-1"]
    
    cat_columns = [column for column in X.columns if column in cat_vars]
    
    X[cat_columns] = X[cat_columns].applymap(lambda x: np.nan if x in alt_nan_list else x)

alt_nan_imp(X_red)

preprocessor = ColumnTransformer([
    ('num_pip', num_pipe, num_vars),
    ('cat_pip', cat_pipe, red_cat_vars)],
    remainder='drop' ## all columns not in num_vars and red_cat_vars are dropped.
)

alt_nan_imp(X)
preprocessor.fit(X)

X_pp = pd.DataFrame(preprocessor.fit_transform(X_red))
X_pp['customer_ID'] = X_red['customer_ID']
X_pp_avg = X_pp.groupby('customer_ID').mean()
y_ID = pd.DataFrame(y)
y_ID['customer_ID'] = X_red['customer_ID']
y_unique = y_ID.groupby('customer_ID').mean().astype(int) ## actually, this data is just in train_labels
X_pp_avg_train, X_pp_avg_val, y_unique_train, y_unique_val = train_test_split(X_pp_avg, y_unique, test_size=0.3) 


if 'customer_ID' in X_pp.columns:
    X_pp.drop(columns='customer_ID', inplace=True)
else:
    pass

# # Train_test_split needs to be on preprocessed data
# X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3)


# pp_pred_pipe = make_pipeline(preprocessor, mod)
# pp_pred_pipe
# pp_pred_pipe.fit(X, y)

print('done ✅')


done ✅


# Creating custom amex scoring metric

In [5]:
def amex_metric(y_true: pd.DataFrame, y_pred: pd.DataFrame) -> float:
    
    ## TWEAK
    y_true = pd.DataFrame(y_true.reset_index(drop=True))
    y_pred = pd.DataFrame(y_pred)
    
    y_true = y_true.rename(columns={y_true.columns[0]:'target'})
    y_pred = y_pred.rename(columns={y_pred.columns[0]:'prediction'})
    ##
    
    def top_four_percent_captured(y_true: pd.DataFrame, y_pred: pd.DataFrame) -> float:
        df = (pd.concat([y_true, y_pred], axis='columns')
              .sort_values('prediction', ascending=False))
        df['weight'] = df['target'].apply(lambda x: 20 if x==0 else 1)
        four_pct_cutoff = int(0.04 * df['weight'].sum())
        df['weight_cumsum'] = df['weight'].cumsum()
        df_cutoff = df.loc[df['weight_cumsum'] <= four_pct_cutoff]
        return (df_cutoff['target'] == 1).sum() / (df['target'] == 1).sum()
    def weighted_gini(y_true: pd.DataFrame, y_pred: pd.DataFrame) -> float:
        df = (pd.concat([y_true, y_pred], axis='columns')
              .sort_values('prediction', ascending=False))
        df['weight'] = df['target'].apply(lambda x: 20 if x==0 else 1)
        df['random'] = (df['weight'] / df['weight'].sum()).cumsum()
        total_pos = (df['target'] * df['weight']).sum()
        df['cum_pos_found'] = (df['target'] * df['weight']).cumsum()
        df['lorentz'] = df['cum_pos_found'] / total_pos
        df['gini'] = (df['lorentz'] - df['random']) * df['weight']
        return df['gini'].sum()

    def normalized_weighted_gini(y_true: pd.DataFrame, y_pred: pd.DataFrame) -> float:
        y_true_pred = y_true.rename(columns={'target': 'prediction'})
        return weighted_gini(y_true, y_pred) / weighted_gini(y_true, y_true_pred)

    g = normalized_weighted_gini(y_true, y_pred)
    d = top_four_percent_captured(y_true, y_pred)

    return 0.5 * (g + d)

print('done ✅')


done ✅


In [6]:
amex_metric_scorer = make_scorer(amex_metric)

## dict of scoring metrics one might want to pass into cross validation
scorings = {'recall':'recall',
            'f1':'f1',
           'amex': amex_metric_scorer}

print('done ✅')

done ✅


# Averaging preprocessed X and y

In [7]:
X_pp['customer_ID'] = X_red['customer_ID']
# X_avg_pp = X_pp.groupby('customer_ID').mean()
X_avg_pp = X_pp.groupby('customer_ID').agg(['mean', 
                                            'last', 
                                            'first',
                                           'max',
                                           'min'
                                           ])


y_ID = pd.DataFrame(y)
y_ID['customer_ID'] = X_red['customer_ID']
## actually, this data is just in train_labels
y_unique = y_ID.groupby('customer_ID').mean().astype(int)
# y_unique = y_ID.groupby('customer_ID').agg(['mean', 'last', 'first']).astype(int)


print('done ✅')

print(X_avg_pp.shape , y_unique.shape)


done ✅
(26670, 910) (26670, 1)


# XG Boost & Grid Search

In [8]:
from xgboost import XGBClassifier, XGBRegressor
from sklearn.model_selection import GridSearchCV, KFold 
import xgboost as xgb
print('XGB Version',xgb.__version__)

print('done ✅')


XGB Version 2.0.0-dev
done ✅


In [None]:
%%time

xgb_class = XGBClassifier()

# Hyperparameter Grid
grid = {'max_depth': [3, 5, 7], 
        'n_estimators': [80, 90, 100],
        'learning_rate': [0.08, 0.1, 0.12]
         }

# Instanciate Grid Search
search = GridSearchCV(xgb_class, 
                      grid, 
                      scoring = amex_metric_scorer,
                      cv = 5,
                      n_jobs=-1 # paralellize computation
                     ) 


search.fit(X_pp_avg_train, y_unique_train,
    # evaluate loss at each iteration
    eval_set=[(X_pp_avg_train,y_unique_train), (X_pp_avg_val, y_unique_val)],  
    # stop iterating when eval loss increases 5 times in a row
    early_stopping_rounds=2
)

print('done ✅')

In [None]:
search.best_score_


In [None]:
search.best_params_


In [None]:
search.best_estimator_


In [9]:
xgb_best_reg = XGBClassifier(max_depth= 5,
                             n_estimators= 100, 
                             learning_rate= 0.08,
#                             tree_method='gpu_hist',
#                              predictor= 'gpu_predictor',
#                              subsample = 0.8,
#                              objective = 'binary.logistic',
#                              random_state = 42
                            )

xgb_best_reg.fit(X_pp_avg_train, y_unique_train,
    # evaluate loss at each iteration
    eval_set=[(X_pp_avg_train,y_unique_train), 
              (X_pp_avg_val, y_unique_val)
             ],  
    # stop iterating when eval loss increases 'x' times in a row
    early_stopping_rounds=100
)




[0]	validation_0-logloss:0.64409	validation_1-logloss:0.64491
[1]	validation_0-logloss:0.60178	validation_1-logloss:0.60368
[2]	validation_0-logloss:0.56526	validation_1-logloss:0.56792
[3]	validation_0-logloss:0.53349	validation_1-logloss:0.53710
[4]	validation_0-logloss:0.50524	validation_1-logloss:0.51016
[5]	validation_0-logloss:0.48032	validation_1-logloss:0.48609
[6]	validation_0-logloss:0.45819	validation_1-logloss:0.46499
[7]	validation_0-logloss:0.43836	validation_1-logloss:0.44632
[8]	validation_0-logloss:0.42053	validation_1-logloss:0.42927
[9]	validation_0-logloss:0.40433	validation_1-logloss:0.41401
[10]	validation_0-logloss:0.38960	validation_1-logloss:0.40028
[11]	validation_0-logloss:0.37647	validation_1-logloss:0.38788
[12]	validation_0-logloss:0.36433	validation_1-logloss:0.37675
[13]	validation_0-logloss:0.35315	validation_1-logloss:0.36661
[14]	validation_0-logloss:0.34324	validation_1-logloss:0.35762
[15]	validation_0-logloss:0.33416	validation_1-logloss:0.34938
[1

In [10]:
y_pred = xgb_best_reg.predict_proba(X_pp_avg_val)[:,1]


In [11]:
amex_metric(y_unique_val, y_pred)

0.7397451358794905

### Best until now

xgb_best_reg = XGBClassifier(max_depth= 5,
                             n_estimators= 100, 
                             learning_rate= 0.08)
                             
early_stopping_rounds=100


Best = 0.7397

# CatBoost

In [12]:
import catboost as ctb


In [None]:
%%time

model_CBC = ctb.CatBoostClassifier()

# Hyperparameter Grid
grid_cat = {'max_depth': [4, 5, 6, 7], 
        'iterations': [3000, 3500, 4000, 4500],
#         'learning_rate': [0.1, 0.11, 0.12, 0.13],
            'random_state' : [22, 32, 42]
         }

# Instanciate Grid Search
search = GridSearchCV(model_CBC, 
                      grid_cat, 
                      scoring = amex_metric_scorer,
                      cv = 5,
                      n_jobs=-1 # paralellize computation
                     ) 


search.fit(X_pp_avg_train, y_unique_train,
    # evaluate loss at each iteration
    eval_set=[(X_pp_avg_train,y_unique_train), (X_pp_avg_val, y_unique_val)],  
    # stop iterating when eval loss increases 5 times in a row
    early_stopping_rounds=3
)

print('done ✅')

In [None]:
search.best_score_


In [None]:
search.best_params_


In [None]:
search.best_estimator_


In [13]:
model_CBC = ctb.CatBoostClassifier(
    iterations=3000, 
       random_state=32,
       max_depth= 6,
#          n_estimators= 100, 
         learning_rate= 0.11 ,
#        task_type = 'GPU',
                                  )


model_CBC.fit(X_pp_avg_train, y_unique_train,
    # evaluate loss at each iteration
    eval_set=[(X_pp_avg_train,y_unique_train), (X_pp_avg_val, y_unique_val)],  
    # stop iterating when eval loss increases 5 times in a row
    early_stopping_rounds=100
)




0:	learn: 0.5764926	test: 0.5765355	test1: 0.5764009	best: 0.5764009 (0)	total: 81.1ms	remaining: 4m 3s
1:	learn: 0.4958155	test: 0.4958414	test1: 0.4964290	best: 0.4964290 (1)	total: 90.4ms	remaining: 2m 15s
2:	learn: 0.4396043	test: 0.4396310	test1: 0.4407757	best: 0.4407757 (2)	total: 99.8ms	remaining: 1m 39s
3:	learn: 0.4027818	test: 0.4028171	test1: 0.4054745	best: 0.4054745 (3)	total: 110ms	remaining: 1m 22s
4:	learn: 0.3746333	test: 0.3746783	test1: 0.3775449	best: 0.3775449 (4)	total: 120ms	remaining: 1m 12s
5:	learn: 0.3507438	test: 0.3507969	test1: 0.3540847	best: 0.3540847 (5)	total: 131ms	remaining: 1m 5s
6:	learn: 0.3336018	test: 0.3336569	test1: 0.3372620	best: 0.3372620 (6)	total: 142ms	remaining: 1m
7:	learn: 0.3206406	test: 0.3206964	test1: 0.3249507	best: 0.3249507 (7)	total: 151ms	remaining: 56.5s
8:	learn: 0.3107500	test: 0.3108047	test1: 0.3151200	best: 0.3151200 (8)	total: 160ms	remaining: 53.3s
9:	learn: 0.3022885	test: 0.3023417	test1: 0.3070891	best: 0.3070891 

80:	learn: 0.2134118	test: 0.2134391	test1: 0.2506885	best: 0.2506885 (80)	total: 890ms	remaining: 32.1s
81:	learn: 0.2128479	test: 0.2128753	test1: 0.2505293	best: 0.2505293 (81)	total: 902ms	remaining: 32.1s
82:	learn: 0.2123203	test: 0.2123475	test1: 0.2504694	best: 0.2504694 (82)	total: 912ms	remaining: 32.1s
83:	learn: 0.2116747	test: 0.2117019	test1: 0.2504055	best: 0.2504055 (83)	total: 924ms	remaining: 32.1s
84:	learn: 0.2110855	test: 0.2111128	test1: 0.2502178	best: 0.2502178 (84)	total: 935ms	remaining: 32.1s
85:	learn: 0.2103035	test: 0.2103308	test1: 0.2503788	best: 0.2502178 (84)	total: 945ms	remaining: 32s
86:	learn: 0.2098922	test: 0.2099195	test1: 0.2504533	best: 0.2502178 (84)	total: 957ms	remaining: 32s
87:	learn: 0.2093716	test: 0.2093988	test1: 0.2504240	best: 0.2502178 (84)	total: 967ms	remaining: 32s
88:	learn: 0.2089418	test: 0.2089689	test1: 0.2503580	best: 0.2502178 (84)	total: 979ms	remaining: 32s
89:	learn: 0.2083409	test: 0.2083679	test1: 0.2504200	best: 0.2

169:	learn: 0.1667564	test: 0.1667815	test1: 0.2481304	best: 0.2481304 (169)	total: 1.9s	remaining: 31.6s
170:	learn: 0.1666242	test: 0.1666494	test1: 0.2481204	best: 0.2481204 (170)	total: 1.94s	remaining: 32.2s
171:	learn: 0.1662846	test: 0.1663096	test1: 0.2479980	best: 0.2479980 (171)	total: 1.98s	remaining: 32.5s
172:	learn: 0.1657809	test: 0.1658059	test1: 0.2478974	best: 0.2478974 (172)	total: 2.01s	remaining: 32.9s
173:	learn: 0.1654140	test: 0.1654391	test1: 0.2477994	best: 0.2477994 (173)	total: 2.03s	remaining: 33s
174:	learn: 0.1651192	test: 0.1651443	test1: 0.2477932	best: 0.2477932 (174)	total: 2.04s	remaining: 33s
175:	learn: 0.1647642	test: 0.1647893	test1: 0.2477691	best: 0.2477691 (175)	total: 2.06s	remaining: 33s
176:	learn: 0.1644987	test: 0.1645237	test1: 0.2477014	best: 0.2477014 (176)	total: 2.07s	remaining: 33s
177:	learn: 0.1640243	test: 0.1640494	test1: 0.2476288	best: 0.2476288 (177)	total: 2.08s	remaining: 32.9s
178:	learn: 0.1636821	test: 0.1637072	test1: 0

259:	learn: 0.1336119	test: 0.1336361	test1: 0.2480564	best: 0.2469765 (222)	total: 2.92s	remaining: 30.8s
260:	learn: 0.1333215	test: 0.1333457	test1: 0.2480915	best: 0.2469765 (222)	total: 2.93s	remaining: 30.8s
261:	learn: 0.1329746	test: 0.1329988	test1: 0.2481651	best: 0.2469765 (222)	total: 2.94s	remaining: 30.8s
262:	learn: 0.1326804	test: 0.1327046	test1: 0.2481490	best: 0.2469765 (222)	total: 2.96s	remaining: 30.8s
263:	learn: 0.1323755	test: 0.1323996	test1: 0.2480514	best: 0.2469765 (222)	total: 2.97s	remaining: 30.8s
264:	learn: 0.1320779	test: 0.1321020	test1: 0.2480924	best: 0.2469765 (222)	total: 2.98s	remaining: 30.8s
265:	learn: 0.1319053	test: 0.1319294	test1: 0.2481840	best: 0.2469765 (222)	total: 2.99s	remaining: 30.8s
266:	learn: 0.1317365	test: 0.1317606	test1: 0.2481144	best: 0.2469765 (222)	total: 3.01s	remaining: 30.8s
267:	learn: 0.1314595	test: 0.1314836	test1: 0.2481520	best: 0.2469765 (222)	total: 3.02s	remaining: 30.8s
268:	learn: 0.1311322	test: 0.1311563

<catboost.core.CatBoostClassifier at 0x150e86190>

In [14]:
y_pred = model_CBC.predict_proba(X_pp_avg_val)[:,1]


In [15]:
amex_metric(y_unique_val, y_pred)

0.7400690509839548

In [51]:
model_CBC = ctb.CatBoostClassifier(iterations=5000, 
                                   random_state=22, 
#                                    task_type = 'GPU'
                                  )

model_CBC.fit(X_pp_avg_train, y_unique_train,
    # evaluate loss at each iteration
    eval_set=[(X_pp_avg_train,y_unique_train), (X_pp_avg_val, y_unique_val)],  
    # stop iterating when eval loss increases 100 times in a row
    early_stopping_rounds=100
)




Learning rate set to 0.032421
0:	learn: 0.6590648	test: 0.6591101	test1: 0.6588794	best: 0.6588794 (0)	total: 21.7ms	remaining: 1m 48s
1:	learn: 0.6259540	test: 0.6260125	test1: 0.6259032	best: 0.6259032 (1)	total: 36.2ms	remaining: 1m 30s
2:	learn: 0.5981965	test: 0.5982511	test1: 0.5982448	best: 0.5982448 (2)	total: 48.6ms	remaining: 1m 20s
3:	learn: 0.5712616	test: 0.5713062	test1: 0.5713872	best: 0.5713872 (3)	total: 64.5ms	remaining: 1m 20s
4:	learn: 0.5453963	test: 0.5454303	test1: 0.5453502	best: 0.5453502 (4)	total: 77.6ms	remaining: 1m 17s
5:	learn: 0.5207451	test: 0.5207726	test1: 0.5207040	best: 0.5207040 (5)	total: 97.4ms	remaining: 1m 21s
6:	learn: 0.5028288	test: 0.5028534	test1: 0.5029181	best: 0.5029181 (6)	total: 114ms	remaining: 1m 21s
7:	learn: 0.4856552	test: 0.4856776	test1: 0.4858263	best: 0.4858263 (7)	total: 126ms	remaining: 1m 18s
8:	learn: 0.4691526	test: 0.4691750	test1: 0.4693740	best: 0.4693740 (8)	total: 141ms	remaining: 1m 18s
9:	learn: 0.4534770	test: 0.

86:	learn: 0.2519770	test: 0.2520141	test1: 0.2646227	best: 0.2646227 (86)	total: 1.46s	remaining: 1m 22s
87:	learn: 0.2515772	test: 0.2516141	test1: 0.2643454	best: 0.2643454 (87)	total: 1.48s	remaining: 1m 22s
88:	learn: 0.2511891	test: 0.2512258	test1: 0.2641950	best: 0.2641950 (88)	total: 1.49s	remaining: 1m 22s
89:	learn: 0.2508225	test: 0.2508592	test1: 0.2639164	best: 0.2639164 (89)	total: 1.5s	remaining: 1m 22s
90:	learn: 0.2505398	test: 0.2505763	test1: 0.2638135	best: 0.2638135 (90)	total: 1.52s	remaining: 1m 21s
91:	learn: 0.2502325	test: 0.2502689	test1: 0.2636518	best: 0.2636518 (91)	total: 1.53s	remaining: 1m 21s
92:	learn: 0.2498942	test: 0.2499305	test1: 0.2634757	best: 0.2634757 (92)	total: 1.54s	remaining: 1m 21s
93:	learn: 0.2495164	test: 0.2495526	test1: 0.2633039	best: 0.2633039 (93)	total: 1.55s	remaining: 1m 21s
94:	learn: 0.2490281	test: 0.2490640	test1: 0.2629648	best: 0.2629648 (94)	total: 1.57s	remaining: 1m 20s
95:	learn: 0.2487270	test: 0.2487628	test1: 0.2

164:	learn: 0.2309287	test: 0.2309589	test1: 0.2548429	best: 0.2548429 (164)	total: 2.74s	remaining: 1m 20s
165:	learn: 0.2307533	test: 0.2307836	test1: 0.2547468	best: 0.2547468 (165)	total: 2.75s	remaining: 1m 20s
166:	learn: 0.2305951	test: 0.2306254	test1: 0.2547052	best: 0.2547052 (166)	total: 2.78s	remaining: 1m 20s
167:	learn: 0.2303728	test: 0.2304031	test1: 0.2546582	best: 0.2546582 (167)	total: 2.8s	remaining: 1m 20s
168:	learn: 0.2302112	test: 0.2302416	test1: 0.2546327	best: 0.2546327 (168)	total: 2.81s	remaining: 1m 20s
169:	learn: 0.2300397	test: 0.2300700	test1: 0.2545783	best: 0.2545783 (169)	total: 2.83s	remaining: 1m 20s
170:	learn: 0.2297470	test: 0.2297773	test1: 0.2544948	best: 0.2544948 (170)	total: 2.84s	remaining: 1m 20s
171:	learn: 0.2295861	test: 0.2296163	test1: 0.2544524	best: 0.2544524 (171)	total: 2.85s	remaining: 1m 20s
172:	learn: 0.2294116	test: 0.2294418	test1: 0.2544117	best: 0.2544117 (172)	total: 2.87s	remaining: 1m 19s
173:	learn: 0.2292359	test: 0

240:	learn: 0.2176122	test: 0.2176406	test1: 0.2514892	best: 0.2514375 (238)	total: 3.97s	remaining: 1m 18s
241:	learn: 0.2174716	test: 0.2175000	test1: 0.2514166	best: 0.2514166 (241)	total: 3.99s	remaining: 1m 18s
242:	learn: 0.2172780	test: 0.2173063	test1: 0.2513329	best: 0.2513329 (242)	total: 4.01s	remaining: 1m 18s
243:	learn: 0.2171004	test: 0.2171287	test1: 0.2512883	best: 0.2512883 (243)	total: 4.03s	remaining: 1m 18s
244:	learn: 0.2169328	test: 0.2169610	test1: 0.2511981	best: 0.2511981 (244)	total: 4.04s	remaining: 1m 18s
245:	learn: 0.2168211	test: 0.2168493	test1: 0.2511828	best: 0.2511828 (245)	total: 4.06s	remaining: 1m 18s
246:	learn: 0.2166762	test: 0.2167044	test1: 0.2511240	best: 0.2511240 (246)	total: 4.07s	remaining: 1m 18s
247:	learn: 0.2165066	test: 0.2165348	test1: 0.2511083	best: 0.2511083 (247)	total: 4.09s	remaining: 1m 18s
248:	learn: 0.2162974	test: 0.2163256	test1: 0.2510735	best: 0.2510735 (248)	total: 4.1s	remaining: 1m 18s
249:	learn: 0.2161459	test: 0

325:	learn: 0.2037657	test: 0.2037930	test1: 0.2489538	best: 0.2489538 (325)	total: 5.44s	remaining: 1m 18s
326:	learn: 0.2035602	test: 0.2035875	test1: 0.2489524	best: 0.2489524 (326)	total: 5.46s	remaining: 1m 17s
327:	learn: 0.2033865	test: 0.2034138	test1: 0.2489355	best: 0.2489355 (327)	total: 5.47s	remaining: 1m 17s
328:	learn: 0.2032013	test: 0.2032286	test1: 0.2488874	best: 0.2488874 (328)	total: 5.49s	remaining: 1m 17s
329:	learn: 0.2030462	test: 0.2030734	test1: 0.2488699	best: 0.2488699 (329)	total: 5.54s	remaining: 1m 18s
330:	learn: 0.2028679	test: 0.2028951	test1: 0.2488350	best: 0.2488350 (330)	total: 5.55s	remaining: 1m 18s
331:	learn: 0.2026774	test: 0.2027045	test1: 0.2488242	best: 0.2488242 (331)	total: 5.57s	remaining: 1m 18s
332:	learn: 0.2025301	test: 0.2025573	test1: 0.2488080	best: 0.2488080 (332)	total: 5.58s	remaining: 1m 18s
333:	learn: 0.2023099	test: 0.2023370	test1: 0.2487980	best: 0.2487980 (333)	total: 5.6s	remaining: 1m 18s
334:	learn: 0.2021145	test: 0

404:	learn: 0.1898439	test: 0.1898704	test1: 0.2475470	best: 0.2475470 (404)	total: 6.68s	remaining: 1m 15s
405:	learn: 0.1896685	test: 0.1896949	test1: 0.2474737	best: 0.2474737 (405)	total: 6.7s	remaining: 1m 15s
406:	learn: 0.1894605	test: 0.1894870	test1: 0.2474438	best: 0.2474438 (406)	total: 6.71s	remaining: 1m 15s
407:	learn: 0.1892742	test: 0.1893006	test1: 0.2474365	best: 0.2474365 (407)	total: 6.73s	remaining: 1m 15s
408:	learn: 0.1891223	test: 0.1891487	test1: 0.2474246	best: 0.2474246 (408)	total: 6.74s	remaining: 1m 15s
409:	learn: 0.1889876	test: 0.1890140	test1: 0.2474278	best: 0.2474246 (408)	total: 6.76s	remaining: 1m 15s
410:	learn: 0.1887936	test: 0.1888199	test1: 0.2473918	best: 0.2473918 (410)	total: 6.78s	remaining: 1m 15s
411:	learn: 0.1886546	test: 0.1886809	test1: 0.2473913	best: 0.2473913 (411)	total: 6.8s	remaining: 1m 15s
412:	learn: 0.1885271	test: 0.1885534	test1: 0.2473896	best: 0.2473896 (412)	total: 6.82s	remaining: 1m 15s
413:	learn: 0.1883739	test: 0.

482:	learn: 0.1781253	test: 0.1781511	test1: 0.2468213	best: 0.2467805 (467)	total: 7.95s	remaining: 1m 14s
483:	learn: 0.1779797	test: 0.1780055	test1: 0.2468150	best: 0.2467805 (467)	total: 7.97s	remaining: 1m 14s
484:	learn: 0.1778655	test: 0.1778912	test1: 0.2468183	best: 0.2467805 (467)	total: 7.98s	remaining: 1m 14s
485:	learn: 0.1777780	test: 0.1778038	test1: 0.2468257	best: 0.2467805 (467)	total: 7.99s	remaining: 1m 14s
486:	learn: 0.1776644	test: 0.1776902	test1: 0.2468394	best: 0.2467805 (467)	total: 8.01s	remaining: 1m 14s
487:	learn: 0.1775230	test: 0.1775488	test1: 0.2468348	best: 0.2467805 (467)	total: 8.02s	remaining: 1m 14s
488:	learn: 0.1773827	test: 0.1774085	test1: 0.2468014	best: 0.2467805 (467)	total: 8.04s	remaining: 1m 14s
489:	learn: 0.1772337	test: 0.1772595	test1: 0.2468007	best: 0.2467805 (467)	total: 8.05s	remaining: 1m 14s
490:	learn: 0.1770793	test: 0.1771050	test1: 0.2468134	best: 0.2467805 (467)	total: 8.06s	remaining: 1m 14s
491:	learn: 0.1769596	test: 

559:	learn: 0.1677681	test: 0.1677936	test1: 0.2463686	best: 0.2463449 (557)	total: 9.18s	remaining: 1m 12s
560:	learn: 0.1676810	test: 0.1677065	test1: 0.2463414	best: 0.2463414 (560)	total: 9.19s	remaining: 1m 12s
561:	learn: 0.1675235	test: 0.1675491	test1: 0.2463242	best: 0.2463242 (561)	total: 9.21s	remaining: 1m 12s
562:	learn: 0.1673855	test: 0.1674110	test1: 0.2463339	best: 0.2463242 (561)	total: 9.23s	remaining: 1m 12s
563:	learn: 0.1672849	test: 0.1673104	test1: 0.2463103	best: 0.2463103 (563)	total: 9.25s	remaining: 1m 12s
564:	learn: 0.1671323	test: 0.1671577	test1: 0.2462747	best: 0.2462747 (564)	total: 9.27s	remaining: 1m 12s
565:	learn: 0.1670065	test: 0.1670320	test1: 0.2462643	best: 0.2462643 (565)	total: 9.28s	remaining: 1m 12s
566:	learn: 0.1669414	test: 0.1669669	test1: 0.2462532	best: 0.2462532 (566)	total: 9.29s	remaining: 1m 12s
567:	learn: 0.1668286	test: 0.1668540	test1: 0.2462778	best: 0.2462532 (566)	total: 9.3s	remaining: 1m 12s
568:	learn: 0.1666921	test: 0

640:	learn: 0.1576716	test: 0.1576970	test1: 0.2459474	best: 0.2459094 (601)	total: 10.4s	remaining: 1m 11s
641:	learn: 0.1575120	test: 0.1575374	test1: 0.2459005	best: 0.2459005 (641)	total: 10.5s	remaining: 1m 10s
642:	learn: 0.1573733	test: 0.1573987	test1: 0.2458696	best: 0.2458696 (642)	total: 10.5s	remaining: 1m 11s
643:	learn: 0.1572531	test: 0.1572785	test1: 0.2458266	best: 0.2458266 (643)	total: 10.5s	remaining: 1m 10s
644:	learn: 0.1571327	test: 0.1571581	test1: 0.2458407	best: 0.2458266 (643)	total: 10.5s	remaining: 1m 10s
645:	learn: 0.1570326	test: 0.1570580	test1: 0.2458357	best: 0.2458266 (643)	total: 10.5s	remaining: 1m 10s
646:	learn: 0.1569040	test: 0.1569294	test1: 0.2458165	best: 0.2458165 (646)	total: 10.5s	remaining: 1m 10s
647:	learn: 0.1568947	test: 0.1569200	test1: 0.2458173	best: 0.2458165 (646)	total: 10.6s	remaining: 1m 10s
648:	learn: 0.1567520	test: 0.1567773	test1: 0.2458398	best: 0.2458165 (646)	total: 10.6s	remaining: 1m 10s
649:	learn: 0.1565972	test: 

728:	learn: 0.1475318	test: 0.1475565	test1: 0.2457884	best: 0.2456644 (680)	total: 11.7s	remaining: 1m 8s
729:	learn: 0.1474253	test: 0.1474500	test1: 0.2457617	best: 0.2456644 (680)	total: 11.7s	remaining: 1m 8s
730:	learn: 0.1473063	test: 0.1473310	test1: 0.2457486	best: 0.2456644 (680)	total: 11.8s	remaining: 1m 8s
731:	learn: 0.1472478	test: 0.1472725	test1: 0.2457679	best: 0.2456644 (680)	total: 11.8s	remaining: 1m 8s
732:	learn: 0.1471554	test: 0.1471801	test1: 0.2458222	best: 0.2456644 (680)	total: 11.8s	remaining: 1m 8s
733:	learn: 0.1470523	test: 0.1470770	test1: 0.2458024	best: 0.2456644 (680)	total: 11.8s	remaining: 1m 8s
734:	learn: 0.1469332	test: 0.1469578	test1: 0.2457824	best: 0.2456644 (680)	total: 11.8s	remaining: 1m 8s
735:	learn: 0.1468485	test: 0.1468732	test1: 0.2457868	best: 0.2456644 (680)	total: 11.8s	remaining: 1m 8s
736:	learn: 0.1467265	test: 0.1467512	test1: 0.2457885	best: 0.2456644 (680)	total: 11.8s	remaining: 1m 8s
737:	learn: 0.1466089	test: 0.1466335

818:	learn: 0.1380816	test: 0.1381059	test1: 0.2454287	best: 0.2453534 (798)	total: 13s	remaining: 1m 6s
819:	learn: 0.1379985	test: 0.1380227	test1: 0.2454429	best: 0.2453534 (798)	total: 13s	remaining: 1m 6s
820:	learn: 0.1378924	test: 0.1379167	test1: 0.2454390	best: 0.2453534 (798)	total: 13s	remaining: 1m 6s
821:	learn: 0.1377738	test: 0.1377982	test1: 0.2454068	best: 0.2453534 (798)	total: 13s	remaining: 1m 6s
822:	learn: 0.1376680	test: 0.1376923	test1: 0.2454151	best: 0.2453534 (798)	total: 13s	remaining: 1m 6s
823:	learn: 0.1375702	test: 0.1375945	test1: 0.2454063	best: 0.2453534 (798)	total: 13.1s	remaining: 1m 6s
824:	learn: 0.1374750	test: 0.1374993	test1: 0.2453846	best: 0.2453534 (798)	total: 13.1s	remaining: 1m 6s
825:	learn: 0.1373982	test: 0.1374225	test1: 0.2453734	best: 0.2453534 (798)	total: 13.1s	remaining: 1m 6s
826:	learn: 0.1372845	test: 0.1373088	test1: 0.2454024	best: 0.2453534 (798)	total: 13.1s	remaining: 1m 6s
827:	learn: 0.1371986	test: 0.1372230	test1: 0.

903:	learn: 0.1298134	test: 0.1298374	test1: 0.2450927	best: 0.2450795 (898)	total: 14.4s	remaining: 1m 5s
904:	learn: 0.1297164	test: 0.1297404	test1: 0.2450829	best: 0.2450795 (898)	total: 14.4s	remaining: 1m 5s
905:	learn: 0.1296094	test: 0.1296333	test1: 0.2451363	best: 0.2450795 (898)	total: 14.5s	remaining: 1m 5s
906:	learn: 0.1295299	test: 0.1295538	test1: 0.2451465	best: 0.2450795 (898)	total: 14.5s	remaining: 1m 5s
907:	learn: 0.1294446	test: 0.1294685	test1: 0.2451446	best: 0.2450795 (898)	total: 14.5s	remaining: 1m 5s
908:	learn: 0.1293565	test: 0.1293804	test1: 0.2451421	best: 0.2450795 (898)	total: 14.5s	remaining: 1m 5s
909:	learn: 0.1292289	test: 0.1292528	test1: 0.2451701	best: 0.2450795 (898)	total: 14.5s	remaining: 1m 5s
910:	learn: 0.1291321	test: 0.1291561	test1: 0.2451443	best: 0.2450795 (898)	total: 14.5s	remaining: 1m 5s
911:	learn: 0.1290167	test: 0.1290407	test1: 0.2451219	best: 0.2450795 (898)	total: 14.6s	remaining: 1m 5s
912:	learn: 0.1289263	test: 0.1289503

989:	learn: 0.1219669	test: 0.1219904	test1: 0.2449155	best: 0.2448894 (986)	total: 15.9s	remaining: 1m 4s
990:	learn: 0.1219236	test: 0.1219471	test1: 0.2449063	best: 0.2448894 (986)	total: 15.9s	remaining: 1m 4s
991:	learn: 0.1218078	test: 0.1218313	test1: 0.2448900	best: 0.2448894 (986)	total: 15.9s	remaining: 1m 4s
992:	learn: 0.1217303	test: 0.1217538	test1: 0.2449151	best: 0.2448894 (986)	total: 15.9s	remaining: 1m 4s
993:	learn: 0.1216134	test: 0.1216369	test1: 0.2449197	best: 0.2448894 (986)	total: 16s	remaining: 1m 4s
994:	learn: 0.1215241	test: 0.1215477	test1: 0.2449268	best: 0.2448894 (986)	total: 16s	remaining: 1m 4s
995:	learn: 0.1214362	test: 0.1214597	test1: 0.2448935	best: 0.2448894 (986)	total: 16s	remaining: 1m 4s
996:	learn: 0.1213176	test: 0.1213411	test1: 0.2449042	best: 0.2448894 (986)	total: 16s	remaining: 1m 4s
997:	learn: 0.1212300	test: 0.1212534	test1: 0.2449286	best: 0.2448894 (986)	total: 16.1s	remaining: 1m 4s
998:	learn: 0.1211103	test: 0.1211337	test1: 

1078:	learn: 0.1146309	test: 0.1146541	test1: 0.2450606	best: 0.2448730 (1014)	total: 17.3s	remaining: 1m 2s
1079:	learn: 0.1145729	test: 0.1145960	test1: 0.2450477	best: 0.2448730 (1014)	total: 17.3s	remaining: 1m 2s
1080:	learn: 0.1145083	test: 0.1145315	test1: 0.2450502	best: 0.2448730 (1014)	total: 17.4s	remaining: 1m 2s
1081:	learn: 0.1144245	test: 0.1144477	test1: 0.2450381	best: 0.2448730 (1014)	total: 17.4s	remaining: 1m 2s
1082:	learn: 0.1143462	test: 0.1143694	test1: 0.2450183	best: 0.2448730 (1014)	total: 17.4s	remaining: 1m 2s
1083:	learn: 0.1142721	test: 0.1142953	test1: 0.2450011	best: 0.2448730 (1014)	total: 17.4s	remaining: 1m 2s
1084:	learn: 0.1142068	test: 0.1142299	test1: 0.2449876	best: 0.2448730 (1014)	total: 17.4s	remaining: 1m 2s
1085:	learn: 0.1141463	test: 0.1141694	test1: 0.2449624	best: 0.2448730 (1014)	total: 17.4s	remaining: 1m 2s
1086:	learn: 0.1140384	test: 0.1140615	test1: 0.2448909	best: 0.2448730 (1014)	total: 17.4s	remaining: 1m 2s
1087:	learn: 0.1139

1159:	learn: 0.1083150	test: 0.1083381	test1: 0.2448734	best: 0.2447495 (1116)	total: 18.6s	remaining: 1m 1s
1160:	learn: 0.1082242	test: 0.1082473	test1: 0.2448349	best: 0.2447495 (1116)	total: 18.6s	remaining: 1m 1s
1161:	learn: 0.1081384	test: 0.1081615	test1: 0.2448661	best: 0.2447495 (1116)	total: 18.6s	remaining: 1m 1s
1162:	learn: 0.1080841	test: 0.1081072	test1: 0.2448715	best: 0.2447495 (1116)	total: 18.6s	remaining: 1m 1s
1163:	learn: 0.1080013	test: 0.1080243	test1: 0.2448975	best: 0.2447495 (1116)	total: 18.6s	remaining: 1m 1s
1164:	learn: 0.1079001	test: 0.1079231	test1: 0.2449070	best: 0.2447495 (1116)	total: 18.6s	remaining: 1m 1s
1165:	learn: 0.1078472	test: 0.1078702	test1: 0.2449030	best: 0.2447495 (1116)	total: 18.7s	remaining: 1m 1s
1166:	learn: 0.1077874	test: 0.1078103	test1: 0.2448893	best: 0.2447495 (1116)	total: 18.7s	remaining: 1m 1s
1167:	learn: 0.1077274	test: 0.1077504	test1: 0.2448576	best: 0.2447495 (1116)	total: 18.7s	remaining: 1m 1s
1168:	learn: 0.1076

<catboost.core.CatBoostClassifier at 0x153d1d760>

In [52]:
y_pred = model_CBC.predict_proba(X_pp_avg_val)[:,1]


In [53]:
amex_metric(y_unique_val, y_pred)

0.7468562780979094

In [39]:
print (X_pp_avg_train.shape, y_unique_train.shape)

(18669, 182) (18669, 1)


In [43]:
import pickle

In [44]:
pickle.dump(model_CBC, open('../pickles/cat_boost_075', 'wb'))

### Best until now

model_CBC = ctb.CatBoostClassifier(iterations=5000, 
                                   random_state=22
                                  )
                             
early_stopping_rounds=100


Best = 0.7539

# Cat-Boost & Optuna

https://optuna.org/

In [19]:
import optuna
from sklearn.metrics import accuracy_score


In [20]:
def objective(trial):
    train_x, valid_x, train_y, valid_y = train_test_split(X_pp_avg,y_unique, test_size=0.3)

    param = {
        "objective": trial.suggest_categorical("objective", ["Logloss", "CrossEntropy"]),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.01, 0.1),
        "depth": trial.suggest_int("depth", 1, 12),
        "boosting_type": trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
        "bootstrap_type": trial.suggest_categorical(
            "bootstrap_type", ["Bayesian", "Bernoulli", "MVS"]
        ),
        "used_ram_limit": "3gb",
    }

    if param["bootstrap_type"] == "Bayesian":
        param["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0, 10)
    elif param["bootstrap_type"] == "Bernoulli":
        param["subsample"] = trial.suggest_float("subsample", 0.1, 1)

    gbm = ctb.CatBoostClassifier(**param)
    gbm = ctb.CatBoostClassifier(**param)

    gbm.fit(train_x, train_y, eval_set=[(valid_x, valid_y)], verbose=0, early_stopping_rounds=100)

    preds = gbm.predict(valid_x)
    pred_labels = np.rint(preds)
    accuracy = accuracy_score(valid_y, pred_labels)
    return accuracy

In [21]:
%%time
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50, timeout=600)

[32m[I 2022-09-06 14:46:48,390][0m A new study created in memory with name: no-name-9563455e-8766-4b66-a32f-8720fb1bc1d6[0m
[32m[I 2022-09-06 14:47:01,916][0m Trial 0 finished with value: 0.89501312335958 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.08693493234319488, 'depth': 5, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 0.2172473720864343}. Best is trial 0 with value: 0.89501312335958.[0m
[32m[I 2022-09-06 14:47:47,910][0m Trial 1 finished with value: 0.889763779527559 and parameters: {'objective': 'CrossEntropy', 'colsample_bylevel': 0.04910626041261207, 'depth': 10, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 0.322360252204148}. Best is trial 0 with value: 0.89501312335958.[0m
[32m[I 2022-09-06 14:48:00,242][0m Trial 2 finished with value: 0.8926384201974753 and parameters: {'objective': 'CrossEntropy', 'colsample_bylevel': 0.012125567445736842, 'depth': 9, 'boosting_type': 'Ord

[32m[I 2022-09-06 14:51:47,728][0m Trial 26 finished with value: 0.8888888888888888 and parameters: {'objective': 'CrossEntropy', 'colsample_bylevel': 0.08489845427320424, 'depth': 3, 'boosting_type': 'Plain', 'bootstrap_type': 'Bernoulli', 'subsample': 0.506966197388123}. Best is trial 0 with value: 0.89501312335958.[0m
[32m[I 2022-09-06 14:51:59,932][0m Trial 27 finished with value: 0.8932633420822397 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.032668606824735595, 'depth': 7, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 1.9596807774139768}. Best is trial 0 with value: 0.89501312335958.[0m
[32m[I 2022-09-06 14:52:12,784][0m Trial 28 finished with value: 0.8880139982502188 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.03314568977026316, 'depth': 5, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 2.3123116292387182}. Best is trial 0 with value: 0.89501312335958.[0m
[32m[I

CPU times: user 29min 25s, sys: 10min 36s, total: 40min 2s
Wall time: 10min 55s


In [22]:
print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Number of finished trials: 43
Best trial:
  Value: 0.8956380452443444
  Params: 
    objective: Logloss
    colsample_bylevel: 0.0705495263963352
    depth: 10
    boosting_type: Ordered
    bootstrap_type: MVS


In [23]:
param_dist = { }
for key, value in trial.params.items():
    param_dist[key]=value
param_dist


{'objective': 'Logloss',
 'colsample_bylevel': 0.0705495263963352,
 'depth': 10,
 'boosting_type': 'Ordered',
 'bootstrap_type': 'MVS'}

In [48]:
gbm = ctb.CatBoostClassifier(**param_dist,iterations=5000)

model_CBC.fit(X_pp_avg_train, y_unique_train,
    # evaluate loss at each iteration
    eval_set=[(X_pp_avg_train,y_unique_train), (X_pp_avg_val, y_unique_val)],  
    # stop iterating when eval loss increases 5 times in a row
    early_stopping_rounds=100
)


Learning rate set to 0.008218
0:	learn: 0.6843013	total: 20ms	remaining: 1m 39s
1:	learn: 0.6750722	total: 33.2ms	remaining: 1m 22s
2:	learn: 0.6667166	total: 44.6ms	remaining: 1m 14s
3:	learn: 0.6580421	total: 57.2ms	remaining: 1m 11s
4:	learn: 0.6491690	total: 75.8ms	remaining: 1m 15s
5:	learn: 0.6402522	total: 87.1ms	remaining: 1m 12s
6:	learn: 0.6329898	total: 98.5ms	remaining: 1m 10s
7:	learn: 0.6254789	total: 114ms	remaining: 1m 10s
8:	learn: 0.6181434	total: 124ms	remaining: 1m 9s
9:	learn: 0.6096090	total: 137ms	remaining: 1m 8s
10:	learn: 0.6021988	total: 154ms	remaining: 1m 9s
11:	learn: 0.5947460	total: 166ms	remaining: 1m 8s
12:	learn: 0.5877849	total: 177ms	remaining: 1m 7s
13:	learn: 0.5812210	total: 191ms	remaining: 1m 7s
14:	learn: 0.5750302	total: 202ms	remaining: 1m 7s
15:	learn: 0.5679367	total: 219ms	remaining: 1m 8s
16:	learn: 0.5617033	total: 233ms	remaining: 1m 8s
17:	learn: 0.5552729	total: 251ms	remaining: 1m 9s
18:	learn: 0.5495914	total: 263ms	remaining: 1m 9

166:	learn: 0.2872311	total: 2.49s	remaining: 1m 11s
167:	learn: 0.2867497	total: 2.5s	remaining: 1m 11s
168:	learn: 0.2863147	total: 2.51s	remaining: 1m 11s
169:	learn: 0.2859577	total: 2.53s	remaining: 1m 11s
170:	learn: 0.2855244	total: 2.55s	remaining: 1m 12s
171:	learn: 0.2852151	total: 2.57s	remaining: 1m 12s
172:	learn: 0.2848114	total: 2.58s	remaining: 1m 12s
173:	learn: 0.2843309	total: 2.6s	remaining: 1m 12s
174:	learn: 0.2839323	total: 2.61s	remaining: 1m 12s
175:	learn: 0.2835323	total: 2.63s	remaining: 1m 12s
176:	learn: 0.2831755	total: 2.65s	remaining: 1m 12s
177:	learn: 0.2828571	total: 2.66s	remaining: 1m 12s
178:	learn: 0.2824505	total: 2.67s	remaining: 1m 12s
179:	learn: 0.2820267	total: 2.7s	remaining: 1m 12s
180:	learn: 0.2817240	total: 2.71s	remaining: 1m 12s
181:	learn: 0.2813641	total: 2.72s	remaining: 1m 12s
182:	learn: 0.2810688	total: 2.74s	remaining: 1m 12s
183:	learn: 0.2807952	total: 2.75s	remaining: 1m 11s
184:	learn: 0.2804001	total: 2.76s	remaining: 1m 

333:	learn: 0.2522909	total: 4.98s	remaining: 1m 9s
334:	learn: 0.2521909	total: 4.99s	remaining: 1m 9s
335:	learn: 0.2521128	total: 5.01s	remaining: 1m 9s
336:	learn: 0.2519897	total: 5.03s	remaining: 1m 9s
337:	learn: 0.2518905	total: 5.04s	remaining: 1m 9s
338:	learn: 0.2517976	total: 5.06s	remaining: 1m 9s
339:	learn: 0.2517135	total: 5.07s	remaining: 1m 9s
340:	learn: 0.2516319	total: 5.09s	remaining: 1m 9s
341:	learn: 0.2515457	total: 5.1s	remaining: 1m 9s
342:	learn: 0.2514218	total: 5.12s	remaining: 1m 9s
343:	learn: 0.2513208	total: 5.18s	remaining: 1m 10s
344:	learn: 0.2512274	total: 5.2s	remaining: 1m 10s
345:	learn: 0.2511119	total: 5.21s	remaining: 1m 10s
346:	learn: 0.2510349	total: 5.23s	remaining: 1m 10s
347:	learn: 0.2509436	total: 5.25s	remaining: 1m 10s
348:	learn: 0.2508281	total: 5.27s	remaining: 1m 10s
349:	learn: 0.2507518	total: 5.28s	remaining: 1m 10s
350:	learn: 0.2506525	total: 5.29s	remaining: 1m 10s
351:	learn: 0.2505534	total: 5.31s	remaining: 1m 10s
352:	

493:	learn: 0.2392317	total: 7.27s	remaining: 1m 6s
494:	learn: 0.2391680	total: 7.29s	remaining: 1m 6s
495:	learn: 0.2391219	total: 7.3s	remaining: 1m 6s
496:	learn: 0.2390674	total: 7.32s	remaining: 1m 6s
497:	learn: 0.2390087	total: 7.33s	remaining: 1m 6s
498:	learn: 0.2389532	total: 7.35s	remaining: 1m 6s
499:	learn: 0.2388826	total: 7.36s	remaining: 1m 6s
500:	learn: 0.2388020	total: 7.38s	remaining: 1m 6s
501:	learn: 0.2387459	total: 7.39s	remaining: 1m 6s
502:	learn: 0.2386870	total: 7.4s	remaining: 1m 6s
503:	learn: 0.2386325	total: 7.41s	remaining: 1m 6s
504:	learn: 0.2385742	total: 7.43s	remaining: 1m 6s
505:	learn: 0.2385145	total: 7.44s	remaining: 1m 6s
506:	learn: 0.2384418	total: 7.45s	remaining: 1m 6s
507:	learn: 0.2383800	total: 7.46s	remaining: 1m 5s
508:	learn: 0.2383191	total: 7.48s	remaining: 1m 6s
509:	learn: 0.2382680	total: 7.5s	remaining: 1m 6s
510:	learn: 0.2381902	total: 7.51s	remaining: 1m 5s
511:	learn: 0.2381254	total: 7.53s	remaining: 1m 6s
512:	learn: 0.2

662:	learn: 0.2297962	total: 9.54s	remaining: 1m 2s
663:	learn: 0.2297519	total: 9.55s	remaining: 1m 2s
664:	learn: 0.2297049	total: 9.56s	remaining: 1m 2s
665:	learn: 0.2296568	total: 9.57s	remaining: 1m 2s
666:	learn: 0.2296052	total: 9.59s	remaining: 1m 2s
667:	learn: 0.2295611	total: 9.6s	remaining: 1m 2s
668:	learn: 0.2295105	total: 9.62s	remaining: 1m 2s
669:	learn: 0.2294666	total: 9.63s	remaining: 1m 2s
670:	learn: 0.2294288	total: 9.64s	remaining: 1m 2s
671:	learn: 0.2293861	total: 9.66s	remaining: 1m 2s
672:	learn: 0.2293327	total: 9.67s	remaining: 1m 2s
673:	learn: 0.2292845	total: 9.68s	remaining: 1m 2s
674:	learn: 0.2292276	total: 9.7s	remaining: 1m 2s
675:	learn: 0.2291839	total: 9.71s	remaining: 1m 2s
676:	learn: 0.2291373	total: 9.72s	remaining: 1m 2s
677:	learn: 0.2291013	total: 9.74s	remaining: 1m 2s
678:	learn: 0.2290491	total: 9.75s	remaining: 1m 2s
679:	learn: 0.2290055	total: 9.77s	remaining: 1m 2s
680:	learn: 0.2289495	total: 9.79s	remaining: 1m 2s
681:	learn: 0.

835:	learn: 0.2218351	total: 11.8s	remaining: 58.8s
836:	learn: 0.2217875	total: 11.8s	remaining: 58.8s
837:	learn: 0.2217422	total: 11.8s	remaining: 58.8s
838:	learn: 0.2216821	total: 11.9s	remaining: 58.8s
839:	learn: 0.2216332	total: 11.9s	remaining: 58.8s
840:	learn: 0.2216006	total: 11.9s	remaining: 58.8s
841:	learn: 0.2215505	total: 11.9s	remaining: 58.7s
842:	learn: 0.2215104	total: 11.9s	remaining: 58.7s
843:	learn: 0.2214751	total: 11.9s	remaining: 58.7s
844:	learn: 0.2214313	total: 11.9s	remaining: 58.7s
845:	learn: 0.2213957	total: 11.9s	remaining: 58.7s
846:	learn: 0.2213515	total: 12s	remaining: 58.6s
847:	learn: 0.2213030	total: 12s	remaining: 58.6s
848:	learn: 0.2212584	total: 12s	remaining: 58.6s
849:	learn: 0.2212074	total: 12s	remaining: 58.6s
850:	learn: 0.2211679	total: 12s	remaining: 58.6s
851:	learn: 0.2211291	total: 12s	remaining: 58.6s
852:	learn: 0.2210761	total: 12.1s	remaining: 58.6s
853:	learn: 0.2210391	total: 12.1s	remaining: 58.6s
854:	learn: 0.2209834	to

1004:	learn: 0.2146341	total: 14.1s	remaining: 56.1s
1005:	learn: 0.2145693	total: 14.1s	remaining: 56.1s
1006:	learn: 0.2145120	total: 14.1s	remaining: 56.1s
1007:	learn: 0.2144693	total: 14.2s	remaining: 56.1s
1008:	learn: 0.2144350	total: 14.2s	remaining: 56.1s
1009:	learn: 0.2143922	total: 14.2s	remaining: 56.1s
1010:	learn: 0.2143422	total: 14.2s	remaining: 56s
1011:	learn: 0.2143019	total: 14.2s	remaining: 56s
1012:	learn: 0.2142683	total: 14.2s	remaining: 56s
1013:	learn: 0.2142184	total: 14.2s	remaining: 56s
1014:	learn: 0.2141739	total: 14.3s	remaining: 56s
1015:	learn: 0.2141330	total: 14.3s	remaining: 56s
1016:	learn: 0.2140943	total: 14.3s	remaining: 55.9s
1017:	learn: 0.2140519	total: 14.3s	remaining: 55.9s
1018:	learn: 0.2140109	total: 14.3s	remaining: 55.9s
1019:	learn: 0.2139750	total: 14.3s	remaining: 55.9s
1020:	learn: 0.2139397	total: 14.3s	remaining: 55.9s
1021:	learn: 0.2139035	total: 14.4s	remaining: 55.9s
1022:	learn: 0.2138551	total: 14.4s	remaining: 55.8s
1023:

1171:	learn: 0.2077744	total: 16.4s	remaining: 53.5s
1172:	learn: 0.2077460	total: 16.4s	remaining: 53.5s
1173:	learn: 0.2077021	total: 16.4s	remaining: 53.5s
1174:	learn: 0.2076481	total: 16.4s	remaining: 53.5s
1175:	learn: 0.2076010	total: 16.5s	remaining: 53.5s
1176:	learn: 0.2075605	total: 16.5s	remaining: 53.5s
1177:	learn: 0.2075138	total: 16.5s	remaining: 53.6s
1178:	learn: 0.2074634	total: 16.5s	remaining: 53.5s
1179:	learn: 0.2074144	total: 16.5s	remaining: 53.6s
1180:	learn: 0.2073702	total: 16.6s	remaining: 53.5s
1181:	learn: 0.2073285	total: 16.6s	remaining: 53.5s
1182:	learn: 0.2072819	total: 16.6s	remaining: 53.5s
1183:	learn: 0.2072384	total: 16.6s	remaining: 53.5s
1184:	learn: 0.2071737	total: 16.6s	remaining: 53.5s
1185:	learn: 0.2071337	total: 16.6s	remaining: 53.5s
1186:	learn: 0.2070846	total: 16.6s	remaining: 53.5s
1187:	learn: 0.2070470	total: 16.6s	remaining: 53.4s
1188:	learn: 0.2069981	total: 16.7s	remaining: 53.4s
1189:	learn: 0.2069536	total: 16.7s	remaining:

1339:	learn: 0.2003863	total: 18.9s	remaining: 51.6s
1340:	learn: 0.2003386	total: 18.9s	remaining: 51.6s
1341:	learn: 0.2002963	total: 18.9s	remaining: 51.6s
1342:	learn: 0.2002445	total: 18.9s	remaining: 51.6s
1343:	learn: 0.2001908	total: 18.9s	remaining: 51.5s
1344:	learn: 0.2001507	total: 19s	remaining: 51.5s
1345:	learn: 0.2000995	total: 19s	remaining: 51.5s
1346:	learn: 0.2000612	total: 19s	remaining: 51.5s
1347:	learn: 0.2000132	total: 19s	remaining: 51.5s
1348:	learn: 0.1999631	total: 19s	remaining: 51.5s
1349:	learn: 0.1999252	total: 19s	remaining: 51.5s
1350:	learn: 0.1998633	total: 19.1s	remaining: 51.5s
1351:	learn: 0.1998109	total: 19.1s	remaining: 51.5s
1352:	learn: 0.1997605	total: 19.1s	remaining: 51.4s
1353:	learn: 0.1997227	total: 19.1s	remaining: 51.4s
1354:	learn: 0.1996730	total: 19.1s	remaining: 51.4s
1355:	learn: 0.1996216	total: 19.1s	remaining: 51.4s
1356:	learn: 0.1995696	total: 19.1s	remaining: 51.4s
1357:	learn: 0.1995310	total: 19.2s	remaining: 51.4s
1358:

1509:	learn: 0.1926061	total: 21.6s	remaining: 49.8s
1510:	learn: 0.1925569	total: 21.6s	remaining: 49.8s
1511:	learn: 0.1925216	total: 21.6s	remaining: 49.8s
1512:	learn: 0.1924844	total: 21.6s	remaining: 49.8s
1513:	learn: 0.1924458	total: 21.6s	remaining: 49.8s
1514:	learn: 0.1923989	total: 21.6s	remaining: 49.8s
1515:	learn: 0.1923636	total: 21.7s	remaining: 49.8s
1516:	learn: 0.1923083	total: 21.7s	remaining: 49.7s
1517:	learn: 0.1922663	total: 21.7s	remaining: 49.7s
1518:	learn: 0.1922212	total: 21.7s	remaining: 49.7s
1519:	learn: 0.1921835	total: 21.7s	remaining: 49.7s
1520:	learn: 0.1921332	total: 21.7s	remaining: 49.7s
1521:	learn: 0.1920912	total: 21.7s	remaining: 49.7s
1522:	learn: 0.1920534	total: 21.8s	remaining: 49.7s
1523:	learn: 0.1920263	total: 21.8s	remaining: 49.6s
1524:	learn: 0.1919843	total: 21.8s	remaining: 49.6s
1525:	learn: 0.1919430	total: 21.8s	remaining: 49.6s
1526:	learn: 0.1918951	total: 21.8s	remaining: 49.6s
1527:	learn: 0.1918510	total: 21.8s	remaining:

1680:	learn: 0.1854658	total: 24.1s	remaining: 47.5s
1681:	learn: 0.1854321	total: 24.1s	remaining: 47.5s
1682:	learn: 0.1853905	total: 24.1s	remaining: 47.5s
1683:	learn: 0.1853505	total: 24.1s	remaining: 47.5s
1684:	learn: 0.1853095	total: 24.1s	remaining: 47.5s
1685:	learn: 0.1852754	total: 24.2s	remaining: 47.5s
1686:	learn: 0.1852298	total: 24.2s	remaining: 47.4s
1687:	learn: 0.1851865	total: 24.2s	remaining: 47.4s
1688:	learn: 0.1851497	total: 24.2s	remaining: 47.4s
1689:	learn: 0.1851013	total: 24.2s	remaining: 47.4s
1690:	learn: 0.1850663	total: 24.2s	remaining: 47.4s
1691:	learn: 0.1850290	total: 24.2s	remaining: 47.4s
1692:	learn: 0.1849967	total: 24.2s	remaining: 47.3s
1693:	learn: 0.1849650	total: 24.2s	remaining: 47.3s
1694:	learn: 0.1849258	total: 24.3s	remaining: 47.3s
1695:	learn: 0.1848880	total: 24.3s	remaining: 47.3s
1696:	learn: 0.1848439	total: 24.3s	remaining: 47.3s
1697:	learn: 0.1847990	total: 24.3s	remaining: 47.3s
1698:	learn: 0.1847706	total: 24.3s	remaining:

1842:	learn: 0.1794357	total: 26.4s	remaining: 45.2s
1843:	learn: 0.1794164	total: 26.4s	remaining: 45.2s
1844:	learn: 0.1793665	total: 26.4s	remaining: 45.1s
1845:	learn: 0.1793249	total: 26.4s	remaining: 45.1s
1846:	learn: 0.1792816	total: 26.4s	remaining: 45.1s
1847:	learn: 0.1792534	total: 26.4s	remaining: 45.1s
1848:	learn: 0.1792200	total: 26.5s	remaining: 45.1s
1849:	learn: 0.1791753	total: 26.5s	remaining: 45.1s
1850:	learn: 0.1791270	total: 26.5s	remaining: 45s
1851:	learn: 0.1790879	total: 26.5s	remaining: 45s
1852:	learn: 0.1790527	total: 26.5s	remaining: 45s
1853:	learn: 0.1790270	total: 26.5s	remaining: 45s
1854:	learn: 0.1789998	total: 26.5s	remaining: 45s
1855:	learn: 0.1789618	total: 26.5s	remaining: 45s
1856:	learn: 0.1789232	total: 26.5s	remaining: 44.9s
1857:	learn: 0.1788995	total: 26.6s	remaining: 44.9s
1858:	learn: 0.1788662	total: 26.6s	remaining: 44.9s
1859:	learn: 0.1788253	total: 26.6s	remaining: 44.9s
1860:	learn: 0.1787939	total: 26.6s	remaining: 44.9s
1861:

1999:	learn: 0.1740319	total: 28.5s	remaining: 42.7s
2000:	learn: 0.1739984	total: 28.5s	remaining: 42.7s
2001:	learn: 0.1739621	total: 28.5s	remaining: 42.7s
2002:	learn: 0.1739274	total: 28.5s	remaining: 42.6s
2003:	learn: 0.1738982	total: 28.5s	remaining: 42.6s
2004:	learn: 0.1738566	total: 28.5s	remaining: 42.6s
2005:	learn: 0.1738214	total: 28.5s	remaining: 42.6s
2006:	learn: 0.1737792	total: 28.6s	remaining: 42.6s
2007:	learn: 0.1737602	total: 28.6s	remaining: 42.6s
2008:	learn: 0.1737300	total: 28.6s	remaining: 42.6s
2009:	learn: 0.1737042	total: 28.6s	remaining: 42.5s
2010:	learn: 0.1736704	total: 28.6s	remaining: 42.5s
2011:	learn: 0.1736353	total: 28.6s	remaining: 42.5s
2012:	learn: 0.1735967	total: 28.6s	remaining: 42.5s
2013:	learn: 0.1735784	total: 28.6s	remaining: 42.5s
2014:	learn: 0.1735500	total: 28.7s	remaining: 42.4s
2015:	learn: 0.1735086	total: 28.7s	remaining: 42.4s
2016:	learn: 0.1734666	total: 28.7s	remaining: 42.4s
2017:	learn: 0.1734413	total: 28.7s	remaining:

2157:	learn: 0.1686175	total: 30.8s	remaining: 40.5s
2158:	learn: 0.1685807	total: 30.8s	remaining: 40.5s
2159:	learn: 0.1685430	total: 30.8s	remaining: 40.5s
2160:	learn: 0.1685219	total: 30.8s	remaining: 40.5s
2161:	learn: 0.1684820	total: 30.8s	remaining: 40.5s
2162:	learn: 0.1684701	total: 30.8s	remaining: 40.5s
2163:	learn: 0.1684309	total: 30.9s	remaining: 40.4s
2164:	learn: 0.1684016	total: 30.9s	remaining: 40.4s
2165:	learn: 0.1683559	total: 30.9s	remaining: 40.4s
2166:	learn: 0.1683232	total: 30.9s	remaining: 40.4s
2167:	learn: 0.1682905	total: 30.9s	remaining: 40.4s
2168:	learn: 0.1682561	total: 30.9s	remaining: 40.4s
2169:	learn: 0.1682222	total: 31s	remaining: 40.4s
2170:	learn: 0.1681841	total: 31s	remaining: 40.4s
2171:	learn: 0.1681799	total: 31s	remaining: 40.3s
2172:	learn: 0.1681359	total: 31s	remaining: 40.3s
2173:	learn: 0.1680967	total: 31s	remaining: 40.3s
2174:	learn: 0.1680681	total: 31s	remaining: 40.3s
2175:	learn: 0.1680392	total: 31s	remaining: 40.3s
2176:	l

2313:	learn: 0.1636013	total: 33s	remaining: 38.3s
2314:	learn: 0.1635925	total: 33s	remaining: 38.3s
2315:	learn: 0.1635560	total: 33.1s	remaining: 38.3s
2316:	learn: 0.1635058	total: 33.1s	remaining: 38.3s
2317:	learn: 0.1634789	total: 33.1s	remaining: 38.3s
2318:	learn: 0.1634488	total: 33.1s	remaining: 38.3s
2319:	learn: 0.1634214	total: 33.1s	remaining: 38.3s
2320:	learn: 0.1633797	total: 33.1s	remaining: 38.2s
2321:	learn: 0.1633485	total: 33.1s	remaining: 38.2s
2322:	learn: 0.1633110	total: 33.2s	remaining: 38.2s
2323:	learn: 0.1632877	total: 33.2s	remaining: 38.2s
2324:	learn: 0.1632554	total: 33.2s	remaining: 38.2s
2325:	learn: 0.1632275	total: 33.2s	remaining: 38.2s
2326:	learn: 0.1631969	total: 33.2s	remaining: 38.2s
2327:	learn: 0.1631652	total: 33.2s	remaining: 38.1s
2328:	learn: 0.1631337	total: 33.2s	remaining: 38.1s
2329:	learn: 0.1630869	total: 33.3s	remaining: 38.1s
2330:	learn: 0.1630580	total: 33.3s	remaining: 38.1s
2331:	learn: 0.1630162	total: 33.3s	remaining: 38.

2475:	learn: 0.1586453	total: 35.5s	remaining: 36.2s
2476:	learn: 0.1586164	total: 35.6s	remaining: 36.2s
2477:	learn: 0.1585773	total: 35.6s	remaining: 36.2s
2478:	learn: 0.1585505	total: 35.6s	remaining: 36.2s
2479:	learn: 0.1585208	total: 35.6s	remaining: 36.2s
2480:	learn: 0.1584805	total: 35.6s	remaining: 36.2s
2481:	learn: 0.1584564	total: 35.6s	remaining: 36.1s
2482:	learn: 0.1584091	total: 35.6s	remaining: 36.1s
2483:	learn: 0.1583767	total: 35.7s	remaining: 36.1s
2484:	learn: 0.1583355	total: 35.7s	remaining: 36.1s
2485:	learn: 0.1582995	total: 35.7s	remaining: 36.1s
2486:	learn: 0.1582680	total: 35.7s	remaining: 36.1s
2487:	learn: 0.1582376	total: 35.7s	remaining: 36.1s
2488:	learn: 0.1582062	total: 35.7s	remaining: 36s
2489:	learn: 0.1581800	total: 35.8s	remaining: 36s
2490:	learn: 0.1581408	total: 35.8s	remaining: 36s
2491:	learn: 0.1581122	total: 35.8s	remaining: 36s
2492:	learn: 0.1580804	total: 35.8s	remaining: 36s
2493:	learn: 0.1580577	total: 35.8s	remaining: 36s
2494:

2640:	learn: 0.1536520	total: 38.3s	remaining: 34.2s
2641:	learn: 0.1536271	total: 38.3s	remaining: 34.2s
2642:	learn: 0.1536029	total: 38.3s	remaining: 34.2s
2643:	learn: 0.1535767	total: 38.3s	remaining: 34.1s
2644:	learn: 0.1535535	total: 38.3s	remaining: 34.1s
2645:	learn: 0.1535179	total: 38.3s	remaining: 34.1s
2646:	learn: 0.1534931	total: 38.4s	remaining: 34.1s
2647:	learn: 0.1534675	total: 38.4s	remaining: 34.1s
2648:	learn: 0.1534372	total: 38.4s	remaining: 34.1s
2649:	learn: 0.1534009	total: 38.4s	remaining: 34.1s
2650:	learn: 0.1533649	total: 38.4s	remaining: 34s
2651:	learn: 0.1533304	total: 38.4s	remaining: 34s
2652:	learn: 0.1533018	total: 38.4s	remaining: 34s
2653:	learn: 0.1532771	total: 38.4s	remaining: 34s
2654:	learn: 0.1532625	total: 38.5s	remaining: 34s
2655:	learn: 0.1532299	total: 38.5s	remaining: 34s
2656:	learn: 0.1532087	total: 38.5s	remaining: 33.9s
2657:	learn: 0.1531842	total: 38.5s	remaining: 33.9s
2658:	learn: 0.1531574	total: 38.5s	remaining: 33.9s
2659:

2807:	learn: 0.1490782	total: 40.6s	remaining: 31.7s
2808:	learn: 0.1490458	total: 40.6s	remaining: 31.7s
2809:	learn: 0.1490172	total: 40.6s	remaining: 31.7s
2810:	learn: 0.1489890	total: 40.6s	remaining: 31.7s
2811:	learn: 0.1489608	total: 40.7s	remaining: 31.6s
2812:	learn: 0.1489328	total: 40.7s	remaining: 31.6s
2813:	learn: 0.1489151	total: 40.7s	remaining: 31.6s
2814:	learn: 0.1488943	total: 40.7s	remaining: 31.6s
2815:	learn: 0.1488671	total: 40.7s	remaining: 31.6s
2816:	learn: 0.1488323	total: 40.7s	remaining: 31.6s
2817:	learn: 0.1488035	total: 40.7s	remaining: 31.6s
2818:	learn: 0.1487691	total: 40.8s	remaining: 31.5s
2819:	learn: 0.1487355	total: 40.8s	remaining: 31.5s
2820:	learn: 0.1487128	total: 40.8s	remaining: 31.5s
2821:	learn: 0.1486868	total: 40.8s	remaining: 31.5s
2822:	learn: 0.1486592	total: 40.8s	remaining: 31.5s
2823:	learn: 0.1486299	total: 40.8s	remaining: 31.5s
2824:	learn: 0.1485938	total: 40.9s	remaining: 31.5s
2825:	learn: 0.1485615	total: 40.9s	remaining:

2964:	learn: 0.1448619	total: 43.1s	remaining: 29.6s
2965:	learn: 0.1448340	total: 43.1s	remaining: 29.6s
2966:	learn: 0.1448006	total: 43.1s	remaining: 29.6s
2967:	learn: 0.1447732	total: 43.2s	remaining: 29.5s
2968:	learn: 0.1447431	total: 43.2s	remaining: 29.5s
2969:	learn: 0.1447113	total: 43.2s	remaining: 29.5s
2970:	learn: 0.1446873	total: 43.2s	remaining: 29.5s
2971:	learn: 0.1446611	total: 43.2s	remaining: 29.5s
2972:	learn: 0.1446317	total: 43.2s	remaining: 29.5s
2973:	learn: 0.1445999	total: 43.2s	remaining: 29.4s
2974:	learn: 0.1445763	total: 43.2s	remaining: 29.4s
2975:	learn: 0.1445560	total: 43.3s	remaining: 29.4s
2976:	learn: 0.1445352	total: 43.3s	remaining: 29.4s
2977:	learn: 0.1445043	total: 43.3s	remaining: 29.4s
2978:	learn: 0.1444832	total: 43.3s	remaining: 29.4s
2979:	learn: 0.1444541	total: 43.3s	remaining: 29.4s
2980:	learn: 0.1444315	total: 43.3s	remaining: 29.3s
2981:	learn: 0.1444019	total: 43.3s	remaining: 29.3s
2982:	learn: 0.1443704	total: 43.3s	remaining:

3131:	learn: 0.1403946	total: 45.6s	remaining: 27.2s
3132:	learn: 0.1403749	total: 45.6s	remaining: 27.2s
3133:	learn: 0.1403463	total: 45.6s	remaining: 27.2s
3134:	learn: 0.1403213	total: 45.6s	remaining: 27.2s
3135:	learn: 0.1402896	total: 45.7s	remaining: 27.1s
3136:	learn: 0.1402635	total: 45.7s	remaining: 27.1s
3137:	learn: 0.1402337	total: 45.7s	remaining: 27.1s
3138:	learn: 0.1402132	total: 45.7s	remaining: 27.1s
3139:	learn: 0.1401853	total: 45.7s	remaining: 27.1s
3140:	learn: 0.1401539	total: 45.7s	remaining: 27.1s
3141:	learn: 0.1401257	total: 45.8s	remaining: 27.1s
3142:	learn: 0.1400974	total: 45.8s	remaining: 27s
3143:	learn: 0.1400699	total: 45.8s	remaining: 27s
3144:	learn: 0.1400389	total: 45.8s	remaining: 27s
3145:	learn: 0.1400124	total: 45.8s	remaining: 27s
3146:	learn: 0.1399796	total: 45.8s	remaining: 27s
3147:	learn: 0.1399582	total: 45.9s	remaining: 27s
3148:	learn: 0.1399230	total: 45.9s	remaining: 27s
3149:	learn: 0.1398922	total: 45.9s	remaining: 26.9s
3150:	l

3302:	learn: 0.1359478	total: 48.3s	remaining: 24.8s
3303:	learn: 0.1359223	total: 48.3s	remaining: 24.8s
3304:	learn: 0.1358988	total: 48.3s	remaining: 24.8s
3305:	learn: 0.1358786	total: 48.3s	remaining: 24.8s
3306:	learn: 0.1358584	total: 48.3s	remaining: 24.7s
3307:	learn: 0.1358356	total: 48.3s	remaining: 24.7s
3308:	learn: 0.1358189	total: 48.4s	remaining: 24.7s
3309:	learn: 0.1357893	total: 48.4s	remaining: 24.7s
3310:	learn: 0.1357625	total: 48.4s	remaining: 24.7s
3311:	learn: 0.1357439	total: 48.4s	remaining: 24.7s
3312:	learn: 0.1357274	total: 48.4s	remaining: 24.6s
3313:	learn: 0.1357066	total: 48.4s	remaining: 24.6s
3314:	learn: 0.1356818	total: 48.4s	remaining: 24.6s
3315:	learn: 0.1356510	total: 48.4s	remaining: 24.6s
3316:	learn: 0.1356289	total: 48.4s	remaining: 24.6s
3317:	learn: 0.1356032	total: 48.5s	remaining: 24.6s
3318:	learn: 0.1355774	total: 48.5s	remaining: 24.5s
3319:	learn: 0.1355582	total: 48.5s	remaining: 24.5s
3320:	learn: 0.1355397	total: 48.5s	remaining:

3464:	learn: 0.1319691	total: 50.8s	remaining: 22.5s
3465:	learn: 0.1319358	total: 50.8s	remaining: 22.5s
3466:	learn: 0.1319056	total: 50.8s	remaining: 22.5s
3467:	learn: 0.1318827	total: 50.8s	remaining: 22.5s
3468:	learn: 0.1318656	total: 50.9s	remaining: 22.4s
3469:	learn: 0.1318332	total: 50.9s	remaining: 22.4s
3470:	learn: 0.1318196	total: 50.9s	remaining: 22.4s
3471:	learn: 0.1317923	total: 50.9s	remaining: 22.4s
3472:	learn: 0.1317704	total: 50.9s	remaining: 22.4s
3473:	learn: 0.1317433	total: 50.9s	remaining: 22.4s
3474:	learn: 0.1317197	total: 50.9s	remaining: 22.4s
3475:	learn: 0.1316935	total: 51s	remaining: 22.3s
3476:	learn: 0.1316634	total: 51s	remaining: 22.3s
3477:	learn: 0.1316406	total: 51s	remaining: 22.3s
3478:	learn: 0.1316177	total: 51s	remaining: 22.3s
3479:	learn: 0.1315944	total: 51s	remaining: 22.3s
3480:	learn: 0.1315737	total: 51s	remaining: 22.3s
3481:	learn: 0.1315492	total: 51.1s	remaining: 22.3s
3482:	learn: 0.1315275	total: 51.1s	remaining: 22.2s
3483:

3626:	learn: 0.1281114	total: 53.3s	remaining: 20.2s
3627:	learn: 0.1280880	total: 53.3s	remaining: 20.2s
3628:	learn: 0.1280639	total: 53.4s	remaining: 20.2s
3629:	learn: 0.1280338	total: 53.4s	remaining: 20.1s
3630:	learn: 0.1280054	total: 53.4s	remaining: 20.1s
3631:	learn: 0.1279737	total: 53.4s	remaining: 20.1s
3632:	learn: 0.1279536	total: 53.4s	remaining: 20.1s
3633:	learn: 0.1279255	total: 53.4s	remaining: 20.1s
3634:	learn: 0.1278970	total: 53.4s	remaining: 20.1s
3635:	learn: 0.1278730	total: 53.5s	remaining: 20.1s
3636:	learn: 0.1278450	total: 53.5s	remaining: 20s
3637:	learn: 0.1278260	total: 53.5s	remaining: 20s
3638:	learn: 0.1277966	total: 53.5s	remaining: 20s
3639:	learn: 0.1277694	total: 53.5s	remaining: 20s
3640:	learn: 0.1277434	total: 53.5s	remaining: 20s
3641:	learn: 0.1277150	total: 53.5s	remaining: 20s
3642:	learn: 0.1276890	total: 53.6s	remaining: 19.9s
3643:	learn: 0.1276745	total: 53.6s	remaining: 19.9s
3644:	learn: 0.1276554	total: 53.6s	remaining: 19.9s
3645:

3782:	learn: 0.1245418	total: 55.8s	remaining: 17.9s
3783:	learn: 0.1245160	total: 55.8s	remaining: 17.9s
3784:	learn: 0.1244865	total: 55.8s	remaining: 17.9s
3785:	learn: 0.1244594	total: 55.8s	remaining: 17.9s
3786:	learn: 0.1244377	total: 55.8s	remaining: 17.9s
3787:	learn: 0.1244184	total: 55.9s	remaining: 17.9s
3788:	learn: 0.1243989	total: 55.9s	remaining: 17.9s
3789:	learn: 0.1243777	total: 55.9s	remaining: 17.8s
3790:	learn: 0.1243434	total: 55.9s	remaining: 17.8s
3791:	learn: 0.1243179	total: 55.9s	remaining: 17.8s
3792:	learn: 0.1242917	total: 55.9s	remaining: 17.8s
3793:	learn: 0.1242635	total: 56s	remaining: 17.8s
3794:	learn: 0.1242325	total: 56s	remaining: 17.8s
3795:	learn: 0.1242160	total: 56s	remaining: 17.8s
3796:	learn: 0.1241859	total: 56s	remaining: 17.7s
3797:	learn: 0.1241639	total: 56s	remaining: 17.7s
3798:	learn: 0.1241385	total: 56s	remaining: 17.7s
3799:	learn: 0.1241076	total: 56.1s	remaining: 17.7s
3800:	learn: 0.1240836	total: 56.1s	remaining: 17.7s
3801:

3953:	learn: 0.1207057	total: 58.5s	remaining: 15.5s
3954:	learn: 0.1206841	total: 58.5s	remaining: 15.5s
3955:	learn: 0.1206657	total: 58.5s	remaining: 15.4s
3956:	learn: 0.1206330	total: 58.5s	remaining: 15.4s
3957:	learn: 0.1206071	total: 58.5s	remaining: 15.4s
3958:	learn: 0.1205749	total: 58.6s	remaining: 15.4s
3959:	learn: 0.1205595	total: 58.6s	remaining: 15.4s
3960:	learn: 0.1205335	total: 58.6s	remaining: 15.4s
3961:	learn: 0.1205155	total: 58.6s	remaining: 15.4s
3962:	learn: 0.1204800	total: 58.6s	remaining: 15.3s
3963:	learn: 0.1204593	total: 58.6s	remaining: 15.3s
3964:	learn: 0.1204300	total: 58.6s	remaining: 15.3s
3965:	learn: 0.1204094	total: 58.6s	remaining: 15.3s
3966:	learn: 0.1203789	total: 58.7s	remaining: 15.3s
3967:	learn: 0.1203544	total: 58.7s	remaining: 15.3s
3968:	learn: 0.1203318	total: 58.7s	remaining: 15.2s
3969:	learn: 0.1203049	total: 58.7s	remaining: 15.2s
3970:	learn: 0.1202897	total: 58.7s	remaining: 15.2s
3971:	learn: 0.1202634	total: 58.7s	remaining:

4121:	learn: 0.1170363	total: 1m	remaining: 13s
4122:	learn: 0.1170114	total: 1m	remaining: 13s
4123:	learn: 0.1169808	total: 1m 1s	remaining: 13s
4124:	learn: 0.1169545	total: 1m 1s	remaining: 12.9s
4125:	learn: 0.1169381	total: 1m 1s	remaining: 12.9s
4126:	learn: 0.1169193	total: 1m 1s	remaining: 12.9s
4127:	learn: 0.1168998	total: 1m 1s	remaining: 12.9s
4128:	learn: 0.1168785	total: 1m 1s	remaining: 12.9s
4129:	learn: 0.1168534	total: 1m 1s	remaining: 12.9s
4130:	learn: 0.1168485	total: 1m 1s	remaining: 12.9s
4131:	learn: 0.1168237	total: 1m 1s	remaining: 12.8s
4132:	learn: 0.1167960	total: 1m 1s	remaining: 12.8s
4133:	learn: 0.1167811	total: 1m 1s	remaining: 12.8s
4134:	learn: 0.1167671	total: 1m 1s	remaining: 12.8s
4135:	learn: 0.1167499	total: 1m 1s	remaining: 12.8s
4136:	learn: 0.1167277	total: 1m 1s	remaining: 12.8s
4137:	learn: 0.1167003	total: 1m 1s	remaining: 12.8s
4138:	learn: 0.1166661	total: 1m 1s	remaining: 12.7s
4139:	learn: 0.1166460	total: 1m 1s	remaining: 12.7s
4140:

4289:	learn: 0.1135925	total: 1m 3s	remaining: 10.5s
4290:	learn: 0.1135672	total: 1m 3s	remaining: 10.5s
4291:	learn: 0.1135497	total: 1m 3s	remaining: 10.5s
4292:	learn: 0.1135316	total: 1m 3s	remaining: 10.5s
4293:	learn: 0.1135134	total: 1m 3s	remaining: 10.4s
4294:	learn: 0.1135012	total: 1m 3s	remaining: 10.4s
4295:	learn: 0.1134756	total: 1m 3s	remaining: 10.4s
4296:	learn: 0.1134557	total: 1m 3s	remaining: 10.4s
4297:	learn: 0.1134288	total: 1m 3s	remaining: 10.4s
4298:	learn: 0.1134087	total: 1m 3s	remaining: 10.4s
4299:	learn: 0.1133859	total: 1m 3s	remaining: 10.4s
4300:	learn: 0.1133660	total: 1m 3s	remaining: 10.3s
4301:	learn: 0.1133495	total: 1m 3s	remaining: 10.3s
4302:	learn: 0.1133330	total: 1m 3s	remaining: 10.3s
4303:	learn: 0.1133174	total: 1m 3s	remaining: 10.3s
4304:	learn: 0.1132942	total: 1m 3s	remaining: 10.3s
4305:	learn: 0.1132752	total: 1m 3s	remaining: 10.3s
4306:	learn: 0.1132577	total: 1m 3s	remaining: 10.3s
4307:	learn: 0.1132351	total: 1m 3s	remaining:

4451:	learn: 0.1104289	total: 1m 5s	remaining: 8.12s
4452:	learn: 0.1104149	total: 1m 5s	remaining: 8.1s
4453:	learn: 0.1103995	total: 1m 5s	remaining: 8.09s
4454:	learn: 0.1103708	total: 1m 5s	remaining: 8.07s
4455:	learn: 0.1103501	total: 1m 5s	remaining: 8.05s
4456:	learn: 0.1103244	total: 1m 5s	remaining: 8.04s
4457:	learn: 0.1103130	total: 1m 6s	remaining: 8.03s
4458:	learn: 0.1102996	total: 1m 6s	remaining: 8.01s
4459:	learn: 0.1102859	total: 1m 6s	remaining: 8s
4460:	learn: 0.1102619	total: 1m 6s	remaining: 7.98s
4461:	learn: 0.1102382	total: 1m 6s	remaining: 7.96s
4462:	learn: 0.1102148	total: 1m 6s	remaining: 7.95s
4463:	learn: 0.1102036	total: 1m 6s	remaining: 7.93s
4464:	learn: 0.1101845	total: 1m 6s	remaining: 7.92s
4465:	learn: 0.1101606	total: 1m 6s	remaining: 7.9s
4466:	learn: 0.1101390	total: 1m 6s	remaining: 7.89s
4467:	learn: 0.1101179	total: 1m 6s	remaining: 7.87s
4468:	learn: 0.1100913	total: 1m 6s	remaining: 7.86s
4469:	learn: 0.1100740	total: 1m 6s	remaining: 7.84

4610:	learn: 0.1074039	total: 1m 8s	remaining: 5.76s
4611:	learn: 0.1073793	total: 1m 8s	remaining: 5.75s
4612:	learn: 0.1073622	total: 1m 8s	remaining: 5.73s
4613:	learn: 0.1073464	total: 1m 8s	remaining: 5.72s
4614:	learn: 0.1073197	total: 1m 8s	remaining: 5.7s
4615:	learn: 0.1073003	total: 1m 8s	remaining: 5.69s
4616:	learn: 0.1072804	total: 1m 8s	remaining: 5.67s
4617:	learn: 0.1072580	total: 1m 8s	remaining: 5.66s
4618:	learn: 0.1072384	total: 1m 8s	remaining: 5.64s
4619:	learn: 0.1072286	total: 1m 8s	remaining: 5.63s
4620:	learn: 0.1072138	total: 1m 8s	remaining: 5.61s
4621:	learn: 0.1072049	total: 1m 8s	remaining: 5.6s
4622:	learn: 0.1071770	total: 1m 8s	remaining: 5.58s
4623:	learn: 0.1071683	total: 1m 8s	remaining: 5.57s
4624:	learn: 0.1071514	total: 1m 8s	remaining: 5.55s
4625:	learn: 0.1071293	total: 1m 8s	remaining: 5.54s
4626:	learn: 0.1070997	total: 1m 8s	remaining: 5.52s
4627:	learn: 0.1070779	total: 1m 8s	remaining: 5.51s
4628:	learn: 0.1070638	total: 1m 8s	remaining: 5

4766:	learn: 0.1047382	total: 1m 10s	remaining: 3.45s
4767:	learn: 0.1047139	total: 1m 10s	remaining: 3.43s
4768:	learn: 0.1046988	total: 1m 10s	remaining: 3.42s
4769:	learn: 0.1046813	total: 1m 10s	remaining: 3.4s
4770:	learn: 0.1046578	total: 1m 10s	remaining: 3.39s
4771:	learn: 0.1046343	total: 1m 10s	remaining: 3.37s
4772:	learn: 0.1046207	total: 1m 10s	remaining: 3.36s
4773:	learn: 0.1045947	total: 1m 10s	remaining: 3.35s
4774:	learn: 0.1045712	total: 1m 10s	remaining: 3.33s
4775:	learn: 0.1045595	total: 1m 10s	remaining: 3.32s
4776:	learn: 0.1045462	total: 1m 10s	remaining: 3.3s
4777:	learn: 0.1045306	total: 1m 10s	remaining: 3.29s
4778:	learn: 0.1045098	total: 1m 10s	remaining: 3.27s
4779:	learn: 0.1044893	total: 1m 10s	remaining: 3.26s
4780:	learn: 0.1044715	total: 1m 10s	remaining: 3.24s
4781:	learn: 0.1044584	total: 1m 10s	remaining: 3.23s
4782:	learn: 0.1044332	total: 1m 10s	remaining: 3.21s
4783:	learn: 0.1044150	total: 1m 10s	remaining: 3.2s
4784:	learn: 0.1043917	total: 1

4921:	learn: 0.1019201	total: 1m 13s	remaining: 1.16s
4922:	learn: 0.1019056	total: 1m 13s	remaining: 1.14s
4923:	learn: 0.1018866	total: 1m 13s	remaining: 1.13s
4924:	learn: 0.1018721	total: 1m 13s	remaining: 1.11s
4925:	learn: 0.1018620	total: 1m 13s	remaining: 1.1s
4926:	learn: 0.1018418	total: 1m 13s	remaining: 1.08s
4927:	learn: 0.1018282	total: 1m 13s	remaining: 1.07s
4928:	learn: 0.1018103	total: 1m 13s	remaining: 1.05s
4929:	learn: 0.1017898	total: 1m 13s	remaining: 1.04s
4930:	learn: 0.1017751	total: 1m 13s	remaining: 1.02s
4931:	learn: 0.1017562	total: 1m 13s	remaining: 1.01s
4932:	learn: 0.1017413	total: 1m 13s	remaining: 994ms
4933:	learn: 0.1017186	total: 1m 13s	remaining: 980ms
4934:	learn: 0.1017016	total: 1m 13s	remaining: 965ms
4935:	learn: 0.1016853	total: 1m 13s	remaining: 950ms
4936:	learn: 0.1016671	total: 1m 13s	remaining: 935ms
4937:	learn: 0.1016487	total: 1m 13s	remaining: 921ms
4938:	learn: 0.1016302	total: 1m 13s	remaining: 906ms
4939:	learn: 0.1016174	total:

<catboost.core.CatBoostClassifier at 0x153d15910>

In [49]:
y_pred = model_CBC.predict_proba(X_pp_avg_val)[:,1]


In [50]:
amex_metric(y_unique_val, y_pred)

0.7539681163513423