# Catboost Optuna Optimizer

## For Kaggle AMEX default prediction competition data

https://www.kaggle.com/competitions/amex-default-prediction/data

This competition had some random looking data where no information on columns was given. The notebook is mainly to allow me to copy it for use in Catboost optimization in other tabular data cases.

This assumes Catboost and various other Python libraries are installed. And GPU configured. I used a Docker image and Pipfile with all these installed.


In [1]:
import pandas as pd
import numpy as np

import optuna
from sklearn.metrics import log_loss
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
import catboost as cb
from tqdm import tqdm
import gc
from sklearn import preprocessing

tqdm.pandas()

import time

N_FOLDS = 10

In [2]:
# https://www.kaggle.com/kyakovlev
# https://www.kaggle.com/competitions/amex-default-prediction/discussion/327534
def amex_metric_mod(y_true, y_pred):

    labels     = np.transpose(np.array([y_true, y_pred]))
    labels     = labels[labels[:, 1].argsort()[::-1]]
    weights    = np.where(labels[:,0]==0, 20, 1)
    cut_vals   = labels[np.cumsum(weights) <= int(0.04 * np.sum(weights))]
    top_four   = np.sum(cut_vals[:,0]) / np.sum(labels[:,0])

    gini = [0,0]
    for i in [1,0]:
        labels         = np.transpose(np.array([y_true, y_pred]))
        labels         = labels[labels[:, i].argsort()[::-1]]
        weight         = np.where(labels[:,0]==0, 20, 1)
        weight_random  = np.cumsum(weight / np.sum(weight))
        total_pos      = np.sum(labels[:, 0] *  weight)
        cum_pos_found  = np.cumsum(labels[:, 0] * weight)
        lorentz        = cum_pos_found / total_pos
        gini[i]        = np.sum((lorentz - weight_random) * weight)

    return 0.5 * (gini[1]/gini[0] + top_four)




In [3]:
cat_cols = ['B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64', 'D_66', 'D_68'] 


In [4]:
def find_new_cat_cols(df_from):
    new_cat_cols = []
    for cat_col in cat_cols:
        new_cat_cols.extend([col for col in df_from.columns if col.startswith(cat_col)])
    return new_cat_cols


In [5]:
MODEL_IDENTIFIER = "lagged"
PREDICTIONS_FILE = f"predictions_cat_{MODEL_IDENTIFIER}.csv"
SUBMISSIONS_FILE = f"submission_cat_{MODEL_IDENTIFIER}.csv"
OPTIMIZER_PARAMS_FILE = f"optimizer_results_cat_{MODEL_IDENTIFIER}.csv"

In [6]:
def format_time(seconds):
    seconds = int(seconds)
    minutes = seconds // 60
    hours = minutes // 60
    minutes = minutes % 60
    seconds = seconds % 60
    if hours > 0:
        return f"{hours}h, {minutes}m, {seconds}s"
    if minutes > 0:
        return f"{minutes}m, {seconds}s"
    return f"{seconds}s"
        

In [7]:
df_train = pd.read_parquet("train_lagged.parquet", engine="pyarrow")
df_train.head()

Unnamed: 0,customer_ID,S_2_first,S_2_mean,S_2_std,S_2_min,S_2_max,S_2_last,P_2_first,P_2_mean,P_2_std,...,D_66_count,D_66_first,D_66_last,D_66_nunique,D_68_count,D_68_first,D_68_last,D_68_nunique,target,fake_splitter
0,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,0,0.0,0.0,0,0,0,0.94,0.933846,0.024337,...,13,0,0,1,13,7,7,1,0,True
1,00000fd6641609c6ece5454664794f0340ad84dddce9a2...,0,0.0,0.0,0,0,0,0.93,0.899231,0.022899,...,13,0,0,1,13,7,7,1,0,True
2,00001b22f846c82c51f6e3958ccd81970162bae8b007e8...,0,0.0,0.0,0,0,0,0.88,0.877692,0.027735,...,13,0,0,1,13,7,7,1,0,True
3,000041bdba6ecadd89a52d11886e8eaaec9325906c9723...,0,0.0,0.0,0,0,0,0.57,0.597692,0.018777,...,13,0,0,1,13,3,4,3,0,True
4,00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8a...,0,0.0,0.0,0,0,0,0.94,0.893077,0.042502,...,13,2,2,1,13,7,7,1,0,True


In [8]:
df_test = pd.read_parquet("test_lagged.parquet", engine="pyarrow")
df_test.head()

Unnamed: 0,customer_ID,S_2_first,S_2_mean,S_2_std,S_2_min,S_2_max,S_2_last,P_2_first,P_2_mean,P_2_std,...,D_64_last,D_64_nunique,D_66_count,D_66_first,D_66_last,D_66_nunique,D_68_count,D_68_first,D_68_last,D_68_nunique
0,00000469ba478561f23a92a868bd366de6f6527a684c9a...,0,0.0,0.0,0,0,0,0.63,0.602222,0.019861,...,4,2,9,0,0,1,9,0,7,2
1,00001bf2e77ff879fab36aa4fac689b9ba411dae63ae39...,0,0.0,0.0,0,0,0,0.89,0.860769,0.030947,...,1,1,13,0,0,1,13,7,7,1
2,0000210045da4f81e5f122c6bde5c2a617d03eef67f82c...,0,0.0,0.0,0,0,0,0.77,0.748462,0.061893,...,4,2,13,2,2,1,13,7,5,2
3,00003b41e58ede33b8daf61ab56d9952f17c9ad1c3976c...,0,0.0,0.0,0,0,0,0.51,0.474615,0.026962,...,3,1,13,0,0,1,13,6,6,1
4,00004b22eaeeeb0ec976890c1d9bfc14fd9427e98c4ee9...,0,0.0,0.0,0,0,0,0.29,0.323846,0.050751,...,3,2,13,0,0,1,13,7,6,2


In [9]:
strat = df_train["fake_splitter"]
df_train = df_train.drop("fake_splitter", axis=1)

In [10]:
X = df_train.drop(["customer_ID", "target"], axis=1)
y = df_train["target"]

In [11]:
stratzip = zip(strat, y)
#stratified k-fold only supports single binary value, tuple stratification breaks it
stratzip = [f"{a}-{b}" for (a,b) in stratzip]
#stratzip = list(stratzip)

# Catboost Optuna Optimizer

Defines the optimization value space for hyperparameters.
Runs the Optuna optimizer trials to find good parameter values.

In [12]:
#these variables are just used to collect statistics over all the Optuna trials for myself
#they are updated in the code I put in the optuna_objective() function
optuna_eval_results = {}
optuna_iteration_index = 0
optuna_losses = []
optuna_accuracies = []
optuna_amexes = []
optuna_parameters = []
optuna_seconds = []

def optuna_objective(trial, X, y, strat, verbosity=-1):
    global optuna_iteration_index
    
    optuna_iteration_index += 1
    #https://catboost.ai/en/docs/references/training-parameters/
    param_grid = {
        'max_depth': trial.suggest_discrete_uniform('depth', 2, 10, 1),
        'learning_rate': trial.suggest_loguniform("learning_rate", 0.01, 0.3),
        'border_count': trial.suggest_discrete_uniform('border_count', 32, 255, 1),
        'l2_leaf_reg': trial.suggest_discrete_uniform('l2_leaf_reg', 0, 5, 1),
        "leaf_estimation_method": trial.suggest_categorical("leaf_estimation_method", ["Newton", "Gradient"]),
        "use_best_model" : True,
        'early_stopping_rounds': 10,
        'iterations': 1500,
        'feature_border_type': trial.suggest_categorical('feature_border_type',
                                         ['Median', 'Uniform', 'UniformAndQuantiles', 'MaxLogSum', 'MinEntropy', 'GreedyLogSum']),
    }

    #use_gpu is just a means to easily run on CPU/GPU depending on what I have available
    #CPU/GPU supported types vary: https://catboost.ai/en/docs/concepts/algorithm-main-stages_bootstrap-options
    use_gpu = True
    if use_gpu:
        param_grid['task_type'] = "GPU"
        param_grid['bootstrap_type'] = trial.suggest_categorical('bootstrap_type', ['Bayesian', 'Bernoulli', 'Poisson', 'No'])
    else:
        param_grid['task_type'] = "CPU"
        param_grid['rsm'] = trial.suggest_discrete_uniform('rsm', 0.5, 1)
        param_grid['bootstrap_type'] = trial.suggest_categorical('bootstrap_type', ['Bayesian', 'Bernoulli', 'MVS', 'No'])

    #following parameters are only relevant for specific boostrap types. 
    #better not to add them for other boosters as sometims they stop or give warning due to unknown parameter        
    if param_grid["bootstrap_type"] == "Bayesian":
        param_grid["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0, 10)
    elif param_grid["bootstrap_type"] == "Bernoulli":
        param_grid["subsample"] = trial.suggest_float("subsample", 0.1, 1)

    n_classes = 1
    #objective: https://catboost.ai/en/docs/references/training-parameters/common#loss_function
    if n_classes > 2:
        #https://catboost.ai/en/docs/concepts/loss-functions-multilabel-classification
        param_grid['objective'] = "multiclass"
        param_grid["eval_metric"] = "multi_logloss"
    else:
        param_grid['objective'] = "Logloss"
        
    scale_pos_weight = None
    if scale_pos_weight is not None:
        #https://catboost.ai/en/docs/references/training-parameters/common#scale_pos_weight
        param_grid["scale_pos_weight"] = self.scale_pos_weight

    print(f"STARTING OPTIMIZATION iteration {optuna_iteration_index}, data size: {X.shape}, params={param_grid}")
    #N_FOLDS_RUN can be used as smaller than N_FOLDS to look for initial results faster
    #(for comparison with different features etc)
    N_FOLDS_RUN = N_FOLDS
    cv = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=1911)

    cv_scores = np.empty(N_FOLDS_RUN)
    cv_accuracies = np.empty(N_FOLDS_RUN)
    cv_amexes = np.empty(N_FOLDS_RUN)
    
    time_start_all_folds = time.time()
    acc_score_total = 0
    n_classes = 1
    train_preds = np.zeros((X.shape[0], n_classes))
    amex_total = 0
    for idx, (train_idx, test_idx) in enumerate(cv.split(X, strat)):
        if idx >= N_FOLDS_RUN:
            break
        time_start_this_fold = time.time()
        print(f"=== STARTING FOLD {idx+1}/{N_FOLDS_RUN} ({N_FOLDS}) ===")
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
        evalset = [(X_test,y_test)]
        
        print("creating classifier")
        model = cb.CatBoostClassifier(**param_grid)
        print("fitting")
                
        model.fit(
            X_train,
            y_train,
            #https://catboost.ai/en/docs/concepts/python-reference_catboost_fit#verbose 
            #(verbose 100=print stats every 100 iterations)
            verbose=100,
            eval_set=evalset,
        )

        print("predicting")
        preds = model.predict_proba(X_test)

        #amex is just the name of the competition this was used in
        preds_true_amex = preds[:, 1]
        preds_true = preds[:, 1].flatten()
        preds_true.shape = (y_test.shape[0], 1)
        amex_y_test = np.array(y_test)
        amex_y_test.shape = (y_test.shape[0], 1)
        np.add.at(train_preds, test_idx, preds_true)
        print(f"count zerO: {np.count_nonzero(train_preds)}")

        preds_this_round = (preds >= 0.5)[:,1]

        acc_score = accuracy_score(y_test, preds_this_round)
        acc_score_total += acc_score

        amex_score = amex_metric_mod(y_test, preds_true_amex)
        amex_total += amex_score

        log_loss_fold = log_loss(y_test, preds)
        cv_scores[idx] = log_loss_fold
        cv_accuracies[idx] = acc_score
        cv_amexes[idx] = amex_score
        print(f"amex: {amex_score}")

        print(f"=== FINISHED FOLD {idx+1}/{N_FOLDS}, log loss={log_loss_fold}, accuracy={acc_score}, amex={amex_score} ===")
        time_total_this_fold = time.time() - time_start_this_fold
        print(f"time to run this fold: {format_time(time_total_this_fold)}")        

        #custom condition based on earlier trials, to skip poor parameters faster by exiting the fold loop
        if amex_score < 0.78:
            print("low score on amex, dropping loop")
            return amex_score

   
    time_total_all_folds = time.time() - time_start_all_folds        
    print(f"time to run folds: {format_time(time_total_all_folds)}")
    
    iteration_log_loss = np.mean(cv_scores)
    iteration_accuracy = np.mean(cv_accuracies)
    iteration_amex = np.mean(cv_amexes)
    optuna_losses.append(iteration_log_loss)
    optuna_amexes.append(iteration_amex)
    optuna_accuracies.append(iteration_accuracy)
    optuna_parameters.append(param_grid)
    optuna_seconds.append(int(time_total_all_folds))
    print(f"iteration logloss: {iteration_log_loss}, iteration acc: {iteration_accuracy}, Iteration amex: {iteration_amex}")
    return iteration_amex #iteration_log_loss


In [13]:
optuna_eval_results = {}
optuna_iteration_index = 0
optuna_losses = []
optuna_accuracies = []
optuna_amexes = []
optuna_parameters = []
optuna_seconds = []

#study = optuna.create_study(direction="minimize", study_name="XGB Classifier")
study = optuna.create_study(direction="maximize", study_name="Catboost Classifier")
func = lambda trial: optuna_objective(trial, X, y, stratzip)
study.optimize(func, n_trials=10)

[32m[I 2022-08-28 00:20:27,668][0m A new study created in memory with name: Catboost Classifier[0m


STARTING OPTIMIZATION iteration 1, data size: (458913, 1580), params={'max_depth': 7.0, 'learning_rate': 0.0431673514792702, 'border_count': 128.0, 'l2_leaf_reg': 3.0, 'leaf_estimation_method': 'Gradient', 'use_best_model': True, 'early_stopping_rounds': 10, 'iterations': 1500, 'feature_border_type': 'MinEntropy', 'task_type': 'GPU', 'bootstrap_type': 'No', 'objective': 'Logloss'}
=== STARTING FOLD 1/10 (10) ===
creating classifier
fitting
0:	learn: 0.6488464	test: 0.6488040	best: 0.6488040 (0)	total: 31.5ms	remaining: 47.2s
100:	learn: 0.2326587	test: 0.2367472	best: 0.2367472 (100)	total: 2.45s	remaining: 34s
200:	learn: 0.2228638	test: 0.2287966	best: 0.2287966 (200)	total: 4.74s	remaining: 30.7s
300:	learn: 0.2178964	test: 0.2254416	best: 0.2254416 (300)	total: 6.98s	remaining: 27.8s
400:	learn: 0.2140033	test: 0.2235935	best: 0.2235935 (400)	total: 9.21s	remaining: 25.2s
500:	learn: 0.2108131	test: 0.2225876	best: 0.2225876 (500)	total: 11.4s	remaining: 22.7s
600:	learn: 0.2080526

400:	learn: 0.2138741	test: 0.2242118	best: 0.2242118 (400)	total: 9.27s	remaining: 25.4s
500:	learn: 0.2106779	test: 0.2232447	best: 0.2232447 (500)	total: 11.5s	remaining: 23s
600:	learn: 0.2078706	test: 0.2225810	best: 0.2225740 (598)	total: 13.7s	remaining: 20.5s
700:	learn: 0.2052414	test: 0.2221044	best: 0.2221039 (699)	total: 15.9s	remaining: 18.1s
800:	learn: 0.2028410	test: 0.2216575	best: 0.2216534 (797)	total: 18.1s	remaining: 15.8s
900:	learn: 0.2005378	test: 0.2213109	best: 0.2213107 (899)	total: 20.3s	remaining: 13.5s
1000:	learn: 0.1983768	test: 0.2210818	best: 0.2210660 (994)	total: 22.4s	remaining: 11.2s
bestTest = 0.2210660136
bestIteration = 994
Shrink model to first 995 iterations.
predicting
count zerO: 321240
amex: 0.7900836223026566
=== FINISHED FOLD 7/10, log loss=0.22106597398844385, accuracy=0.9028131877710226, amex=0.7900836223026566 ===
time to run this fold: 26s
=== STARTING FOLD 8/10 (10) ===
creating classifier
fitting
0:	learn: 0.6485928	test: 0.6483622	

[32m[I 2022-08-28 00:24:38,535][0m Trial 0 finished with value: 0.7921560538737504 and parameters: {'depth': 7.0, 'learning_rate': 0.0431673514792702, 'border_count': 128.0, 'l2_leaf_reg': 3.0, 'leaf_estimation_method': 'Gradient', 'feature_border_type': 'MinEntropy', 'bootstrap_type': 'No'}. Best is trial 0 with value: 0.7921560538737504.[0m


bestTest = 0.2169414091
bestIteration = 1053
Shrink model to first 1054 iterations.
predicting
count zerO: 458913
amex: 0.7936787062549382
=== FINISHED FOLD 10/10, log loss=0.21694136837483538, accuracy=0.9036412368438256, amex=0.7936787062549382 ===
time to run this fold: 27s
time to run folds: 4m, 10s
iteration logloss: 0.21842958993457792, iteration acc: 0.9041016494234061, Iteration amex: 0.7921560538737504
STARTING OPTIMIZATION iteration 2, data size: (458913, 1580), params={'max_depth': 8.0, 'learning_rate': 0.01314407695144808, 'border_count': 165.0, 'l2_leaf_reg': 3.0, 'leaf_estimation_method': 'Newton', 'use_best_model': True, 'early_stopping_rounds': 10, 'iterations': 1500, 'feature_border_type': 'MaxLogSum', 'task_type': 'GPU', 'bootstrap_type': 'No', 'objective': 'Logloss'}
=== STARTING FOLD 1/10 (10) ===
creating classifier
fitting
0:	learn: 0.6762452	test: 0.6762274	best: 0.6762274 (0)	total: 37.5ms	remaining: 56.2s
100:	learn: 0.2602434	test: 0.2626536	best: 0.2626536 (1

400:	learn: 0.2242696	test: 0.2295766	best: 0.2295766 (400)	total: 14.1s	remaining: 38.7s
500:	learn: 0.2214800	test: 0.2276586	best: 0.2276586 (500)	total: 17.5s	remaining: 34.9s
600:	learn: 0.2192239	test: 0.2262833	best: 0.2262833 (600)	total: 20.8s	remaining: 31.2s
700:	learn: 0.2173694	test: 0.2252464	best: 0.2252464 (700)	total: 24.2s	remaining: 27.5s
800:	learn: 0.2156939	test: 0.2244298	best: 0.2244298 (800)	total: 27.5s	remaining: 24s
900:	learn: 0.2141549	test: 0.2237469	best: 0.2237469 (900)	total: 30.8s	remaining: 20.5s
1000:	learn: 0.2126357	test: 0.2231689	best: 0.2231689 (1000)	total: 34.2s	remaining: 17s
1100:	learn: 0.2111737	test: 0.2226822	best: 0.2226822 (1100)	total: 37.5s	remaining: 13.6s
1200:	learn: 0.2097824	test: 0.2223061	best: 0.2223061 (1200)	total: 40.9s	remaining: 10.2s
1300:	learn: 0.2084677	test: 0.2219720	best: 0.2219720 (1300)	total: 44.2s	remaining: 6.76s
1400:	learn: 0.2072255	test: 0.2216429	best: 0.2216429 (1400)	total: 47.5s	remaining: 3.36s
1499

creating classifier
fitting
0:	learn: 0.6748331	test: 0.6748445	best: 0.6748445 (0)	total: 38.1ms	remaining: 57.1s
100:	learn: 0.2606651	test: 0.2610352	best: 0.2610352 (100)	total: 3.64s	remaining: 50.5s
200:	learn: 0.2356568	test: 0.2369013	best: 0.2369013 (200)	total: 7.23s	remaining: 46.7s
300:	learn: 0.2285907	test: 0.2307033	best: 0.2307033 (300)	total: 10.8s	remaining: 42.9s
400:	learn: 0.2245490	test: 0.2275015	best: 0.2275015 (400)	total: 14.2s	remaining: 38.9s
500:	learn: 0.2217269	test: 0.2254996	best: 0.2254996 (500)	total: 17.6s	remaining: 35.1s
600:	learn: 0.2194872	test: 0.2240021	best: 0.2240021 (600)	total: 21s	remaining: 31.4s
700:	learn: 0.2176519	test: 0.2229477	best: 0.2229477 (700)	total: 24.3s	remaining: 27.7s
800:	learn: 0.2160082	test: 0.2220909	best: 0.2220909 (800)	total: 27.6s	remaining: 24.1s
900:	learn: 0.2144838	test: 0.2213905	best: 0.2213905 (900)	total: 31s	remaining: 20.6s
1000:	learn: 0.2129875	test: 0.2207578	best: 0.2207578 (1000)	total: 34.3s	rema

[32m[I 2022-08-28 00:33:48,520][0m Trial 1 finished with value: 0.7900670463691375 and parameters: {'depth': 8.0, 'learning_rate': 0.01314407695144808, 'border_count': 165.0, 'l2_leaf_reg': 3.0, 'leaf_estimation_method': 'Newton', 'feature_border_type': 'MaxLogSum', 'bootstrap_type': 'No'}. Best is trial 0 with value: 0.7921560538737504.[0m


1499:	learn: 0.2064671	test: 0.2188652	best: 0.2188652 (1499)	total: 50.9s	remaining: 0us
bestTest = 0.2188652314
bestIteration = 1499
predicting
count zerO: 458913
amex: 0.7895830547107299
=== FINISHED FOLD 10/10, log loss=0.2188652340605675, accuracy=0.9024427447647687, amex=0.7895830547107299 ===
time to run this fold: 55s
time to run folds: 9m, 9s
iteration logloss: 0.21967978671333807, iteration acc: 0.9034806145655949, Iteration amex: 0.7900670463691375
STARTING OPTIMIZATION iteration 3, data size: (458913, 1580), params={'max_depth': 2.0, 'learning_rate': 0.04846537736298918, 'border_count': 208.0, 'l2_leaf_reg': 1.0, 'leaf_estimation_method': 'Gradient', 'use_best_model': True, 'early_stopping_rounds': 10, 'iterations': 1500, 'feature_border_type': 'GreedyLogSum', 'task_type': 'GPU', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 4.583080493456993, 'objective': 'Logloss'}
=== STARTING FOLD 1/10 (10) ===
creating classifier
fitting
0:	learn: 0.6512018	test: 0.6511628	best:

[32m[I 2022-08-28 00:34:04,662][0m Trial 2 finished with value: 0.7658556612428753 and parameters: {'depth': 2.0, 'learning_rate': 0.04846537736298918, 'border_count': 208.0, 'l2_leaf_reg': 1.0, 'leaf_estimation_method': 'Gradient', 'feature_border_type': 'GreedyLogSum', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 4.583080493456993}. Best is trial 0 with value: 0.7921560538737504.[0m


1499:	learn: 0.2367319	test: 0.2398778	best: 0.2398778 (1499)	total: 13.9s	remaining: 0us
bestTest = 0.2398777931
bestIteration = 1499
predicting
count zerO: 45892
amex: 0.7658556612428753
=== FINISHED FOLD 1/10, log loss=0.23987777457861334, accuracy=0.8949708010110695, amex=0.7658556612428753 ===
time to run this fold: 16s
low score on amex, dropping loop
STARTING OPTIMIZATION iteration 4, data size: (458913, 1580), params={'max_depth': 10.0, 'learning_rate': 0.06137972592081762, 'border_count': 71.0, 'l2_leaf_reg': 0.0, 'leaf_estimation_method': 'Gradient', 'use_best_model': True, 'early_stopping_rounds': 10, 'iterations': 1500, 'feature_border_type': 'MinEntropy', 'task_type': 'GPU', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 9.827293791945827, 'objective': 'Logloss'}
=== STARTING FOLD 1/10 (10) ===
creating classifier
fitting
0:	learn: 0.6367002	test: 0.6369312	best: 0.6369312 (0)	total: 68.4ms	remaining: 1m 42s
100:	learn: 0.2343389	test: 0.2468967	best: 0.2468967 (100)

[32m[I 2022-08-28 00:35:04,114][0m Trial 3 finished with value: 0.7738623216433101 and parameters: {'depth': 10.0, 'learning_rate': 0.06137972592081762, 'border_count': 71.0, 'l2_leaf_reg': 0.0, 'leaf_estimation_method': 'Gradient', 'feature_border_type': 'MinEntropy', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 9.827293791945827}. Best is trial 0 with value: 0.7921560538737504.[0m


bestTest = 0.2287684477
bestIteration = 887
Shrink model to first 888 iterations.
predicting
count zerO: 45892
amex: 0.7738623216433101
=== FINISHED FOLD 1/10, log loss=0.22876844916702457, accuracy=0.8986751503530027, amex=0.7738623216433101 ===
time to run this fold: 59s
low score on amex, dropping loop
STARTING OPTIMIZATION iteration 5, data size: (458913, 1580), params={'max_depth': 7.0, 'learning_rate': 0.012050135586794328, 'border_count': 98.0, 'l2_leaf_reg': 0.0, 'leaf_estimation_method': 'Newton', 'use_best_model': True, 'early_stopping_rounds': 10, 'iterations': 1500, 'feature_border_type': 'GreedyLogSum', 'task_type': 'GPU', 'bootstrap_type': 'No', 'objective': 'Logloss'}
=== STARTING FOLD 1/10 (10) ===
creating classifier
fitting
0:	learn: 0.6769426	test: 0.6769263	best: 0.6769263 (0)	total: 20.6ms	remaining: 30.8s
100:	learn: 0.2701210	test: 0.2720666	best: 0.2720666 (100)	total: 2.06s	remaining: 28.5s
200:	learn: 0.2398805	test: 0.2431215	best: 0.2431215 (200)	total: 4.12

500:	learn: 0.2249528	test: 0.2295839	best: 0.2295839 (500)	total: 9.91s	remaining: 19.8s
600:	learn: 0.2227916	test: 0.2280897	best: 0.2280897 (600)	total: 11.8s	remaining: 17.6s
700:	learn: 0.2209238	test: 0.2268804	best: 0.2268804 (700)	total: 13.7s	remaining: 15.6s
800:	learn: 0.2193209	test: 0.2258912	best: 0.2258912 (800)	total: 15.5s	remaining: 13.6s
900:	learn: 0.2178845	test: 0.2251078	best: 0.2251078 (900)	total: 17.4s	remaining: 11.6s
1000:	learn: 0.2165672	test: 0.2244413	best: 0.2244413 (1000)	total: 19.3s	remaining: 9.6s
1100:	learn: 0.2152951	test: 0.2238685	best: 0.2238685 (1100)	total: 21.1s	remaining: 7.64s
1200:	learn: 0.2140181	test: 0.2233951	best: 0.2233951 (1200)	total: 23s	remaining: 5.72s
1300:	learn: 0.2127929	test: 0.2230335	best: 0.2230335 (1300)	total: 24.9s	remaining: 3.8s
1400:	learn: 0.2116138	test: 0.2226742	best: 0.2226742 (1400)	total: 26.7s	remaining: 1.89s
1499:	learn: 0.2105288	test: 0.2224172	best: 0.2224164 (1494)	total: 28.5s	remaining: 0us
best

creating classifier
fitting
0:	learn: 0.6768771	test: 0.6768702	best: 0.6768702 (0)	total: 21ms	remaining: 31.5s
100:	learn: 0.2704915	test: 0.2705334	best: 0.2705334 (100)	total: 2.06s	remaining: 28.5s
200:	learn: 0.2403368	test: 0.2408918	best: 0.2408918 (200)	total: 4.11s	remaining: 26.5s
300:	learn: 0.2323083	test: 0.2334606	best: 0.2334606 (300)	total: 6.11s	remaining: 24.3s
400:	learn: 0.2281561	test: 0.2299066	best: 0.2299066 (400)	total: 8.05s	remaining: 22.1s
500:	learn: 0.2252605	test: 0.2275925	best: 0.2275925 (500)	total: 9.97s	remaining: 19.9s
600:	learn: 0.2231429	test: 0.2260389	best: 0.2260389 (600)	total: 11.8s	remaining: 17.7s
700:	learn: 0.2212780	test: 0.2247828	best: 0.2247828 (700)	total: 13.7s	remaining: 15.6s
800:	learn: 0.2196838	test: 0.2238076	best: 0.2238076 (800)	total: 15.6s	remaining: 13.6s
900:	learn: 0.2182480	test: 0.2230422	best: 0.2230422 (900)	total: 17.4s	remaining: 11.6s
1000:	learn: 0.2169441	test: 0.2223383	best: 0.2223383 (1000)	total: 19.3s	re

[32m[I 2022-08-28 00:40:10,554][0m Trial 4 finished with value: 0.7892209490411524 and parameters: {'depth': 7.0, 'learning_rate': 0.012050135586794328, 'border_count': 98.0, 'l2_leaf_reg': 0.0, 'leaf_estimation_method': 'Newton', 'feature_border_type': 'GreedyLogSum', 'bootstrap_type': 'No'}. Best is trial 0 with value: 0.7921560538737504.[0m


1499:	learn: 0.2108864	test: 0.2200907	best: 0.2200907 (1499)	total: 28.6s	remaining: 0us
bestTest = 0.2200906641
bestIteration = 1499
predicting
count zerO: 458913
amex: 0.7894119496242195
=== FINISHED FOLD 10/10, log loss=0.2200906657639202, accuracy=0.9015275326316707, amex=0.7894119496242195 ===
time to run this fold: 30s
time to run folds: 5m, 6s
iteration logloss: 0.22069915625748865, iteration acc: 0.9030905627672199, Iteration amex: 0.7892209490411524
STARTING OPTIMIZATION iteration 6, data size: (458913, 1580), params={'max_depth': 7.0, 'learning_rate': 0.01596012003813907, 'border_count': 35.0, 'l2_leaf_reg': 1.0, 'leaf_estimation_method': 'Gradient', 'use_best_model': True, 'early_stopping_rounds': 10, 'iterations': 1500, 'feature_border_type': 'MinEntropy', 'task_type': 'GPU', 'bootstrap_type': 'Poisson', 'objective': 'Logloss'}
=== STARTING FOLD 1/10 (10) ===
creating classifier
fitting
0:	learn: 0.6765409	test: 0.6765274	best: 0.6765274 (0)	total: 16.5ms	remaining: 24.7s


400:	learn: 0.2274473	test: 0.2314177	best: 0.2314177 (400)	total: 6.11s	remaining: 16.8s
500:	learn: 0.2245686	test: 0.2291545	best: 0.2291545 (500)	total: 7.58s	remaining: 15.1s
600:	learn: 0.2224285	test: 0.2276324	best: 0.2276324 (600)	total: 9.01s	remaining: 13.5s
700:	learn: 0.2206268	test: 0.2264684	best: 0.2264684 (700)	total: 10.5s	remaining: 11.9s
800:	learn: 0.2190412	test: 0.2255089	best: 0.2255089 (800)	total: 11.9s	remaining: 10.4s
900:	learn: 0.2176416	test: 0.2247763	best: 0.2247763 (900)	total: 13.4s	remaining: 8.88s
1000:	learn: 0.2164250	test: 0.2241856	best: 0.2241856 (1000)	total: 14.8s	remaining: 7.37s
1100:	learn: 0.2152970	test: 0.2236974	best: 0.2236974 (1100)	total: 16.2s	remaining: 5.87s
1200:	learn: 0.2142396	test: 0.2232777	best: 0.2232777 (1200)	total: 17.6s	remaining: 4.38s
1300:	learn: 0.2132665	test: 0.2228991	best: 0.2228991 (1300)	total: 19s	remaining: 2.91s
1400:	learn: 0.2123392	test: 0.2226073	best: 0.2226073 (1400)	total: 20.4s	remaining: 1.44s
14

creating classifier
fitting
0:	learn: 0.6766192	test: 0.6766224	best: 0.6766224 (0)	total: 15.7ms	remaining: 23.6s
100:	learn: 0.2670243	test: 0.2669938	best: 0.2669938 (100)	total: 1.57s	remaining: 21.8s
200:	learn: 0.2396359	test: 0.2401824	best: 0.2401824 (200)	total: 3.13s	remaining: 20.2s
300:	learn: 0.2318052	test: 0.2329322	best: 0.2329322 (300)	total: 4.64s	remaining: 18.5s
400:	learn: 0.2276023	test: 0.2292946	best: 0.2292946 (400)	total: 6.12s	remaining: 16.8s
500:	learn: 0.2247884	test: 0.2270089	best: 0.2270089 (500)	total: 7.58s	remaining: 15.1s
600:	learn: 0.2226414	test: 0.2253806	best: 0.2253806 (600)	total: 9.03s	remaining: 13.5s
700:	learn: 0.2208170	test: 0.2240948	best: 0.2240948 (700)	total: 10.5s	remaining: 11.9s
800:	learn: 0.2193128	test: 0.2231143	best: 0.2231143 (800)	total: 11.9s	remaining: 10.4s
900:	learn: 0.2179011	test: 0.2222814	best: 0.2222814 (900)	total: 13.3s	remaining: 8.87s
1000:	learn: 0.2166605	test: 0.2216350	best: 0.2216350 (1000)	total: 14.8s	

[32m[I 2022-08-28 00:44:12,759][0m Trial 5 finished with value: 0.7895329974333694 and parameters: {'depth': 7.0, 'learning_rate': 0.01596012003813907, 'border_count': 35.0, 'l2_leaf_reg': 1.0, 'leaf_estimation_method': 'Gradient', 'feature_border_type': 'MinEntropy', 'bootstrap_type': 'Poisson'}. Best is trial 0 with value: 0.7921560538737504.[0m


1499:	learn: 0.2117007	test: 0.2197133	best: 0.2197133 (1499)	total: 21.9s	remaining: 0us
bestTest = 0.2197133263
bestIteration = 1499
predicting
count zerO: 458913
amex: 0.7904707858119357
=== FINISHED FOLD 10/10, log loss=0.21971329899955092, accuracy=0.9022466278791048, amex=0.7904707858119357 ===
time to run this fold: 24s
time to run folds: 4m, 2s
iteration logloss: 0.2206845481307167, iteration acc: 0.9030818486453951, Iteration amex: 0.7895329974333694
STARTING OPTIMIZATION iteration 7, data size: (458913, 1580), params={'max_depth': 3.0, 'learning_rate': 0.18245637639829326, 'border_count': 62.0, 'l2_leaf_reg': 1.0, 'leaf_estimation_method': 'Gradient', 'use_best_model': True, 'early_stopping_rounds': 10, 'iterations': 1500, 'feature_border_type': 'UniformAndQuantiles', 'task_type': 'GPU', 'bootstrap_type': 'Bernoulli', 'subsample': 0.9244927568088176, 'objective': 'Logloss'}
=== STARTING FOLD 1/10 (10) ===
creating classifier
fitting
0:	learn: 0.5410831	test: 0.5406841	best: 0

600:	learn: 0.2127445	test: 0.2188528	best: 0.2188528 (600)	total: 4.33s	remaining: 6.48s
700:	learn: 0.2111911	test: 0.2184384	best: 0.2184384 (700)	total: 5.03s	remaining: 5.73s
bestTest = 0.2183292722
bestIteration = 749
Shrink model to first 750 iterations.
predicting
count zerO: 413022
amex: 0.7935012337600709
=== FINISHED FOLD 9/10, log loss=0.21832924775455323, accuracy=0.9035976553136781, amex=0.7935012337600709 ===
time to run this fold: 7s
=== STARTING FOLD 10/10 (10) ===
creating classifier
fitting
0:	learn: 0.5420460	test: 0.5417205	best: 0.5417205 (0)	total: 8.3ms	remaining: 12.4s
100:	learn: 0.2275864	test: 0.2283750	best: 0.2283750 (100)	total: 768ms	remaining: 10.6s
200:	learn: 0.2216800	test: 0.2234776	best: 0.2234776 (200)	total: 1.5s	remaining: 9.7s
300:	learn: 0.2184716	test: 0.2214729	best: 0.2214729 (300)	total: 2.23s	remaining: 8.88s


[32m[I 2022-08-28 00:45:08,352][0m Trial 6 finished with value: 0.7890606852181979 and parameters: {'depth': 3.0, 'learning_rate': 0.18245637639829326, 'border_count': 62.0, 'l2_leaf_reg': 1.0, 'leaf_estimation_method': 'Gradient', 'feature_border_type': 'UniformAndQuantiles', 'bootstrap_type': 'Bernoulli', 'subsample': 0.9244927568088176}. Best is trial 0 with value: 0.7921560538737504.[0m


bestTest = 0.2209219903
bestIteration = 353
Shrink model to first 354 iterations.
predicting
count zerO: 458913
amex: 0.7881894341239681
=== FINISHED FOLD 10/10, log loss=0.22092196133981964, accuracy=0.9012006711555642, amex=0.7881894341239681 ===
time to run this fold: 4s
time to run folds: 55s
iteration logloss: 0.22056763737817953, iteration acc: 0.9027331943149758, Iteration amex: 0.7890606852181979
STARTING OPTIMIZATION iteration 8, data size: (458913, 1580), params={'max_depth': 10.0, 'learning_rate': 0.03138042609392179, 'border_count': 74.0, 'l2_leaf_reg': 3.0, 'leaf_estimation_method': 'Newton', 'use_best_model': True, 'early_stopping_rounds': 10, 'iterations': 1500, 'feature_border_type': 'GreedyLogSum', 'task_type': 'GPU', 'bootstrap_type': 'No', 'objective': 'Logloss'}
=== STARTING FOLD 1/10 (10) ===
creating classifier
fitting
0:	learn: 0.6491518	test: 0.6491356	best: 0.6491356 (0)	total: 65.8ms	remaining: 1m 38s
100:	learn: 0.2252417	test: 0.2332417	best: 0.2332417 (100)

800:	learn: 0.1786480	test: 0.2208555	best: 0.2208555 (800)	total: 45.4s	remaining: 39.6s
bestTest = 0.2208071419
bestIteration = 819
Shrink model to first 820 iterations.
predicting
count zerO: 321240
amex: 0.7906094111159355
=== FINISHED FOLD 7/10, log loss=0.22080709302484505, accuracy=0.9029875138916127, amex=0.7906094111159355 ===
time to run this fold: 48s
=== STARTING FOLD 8/10 (10) ===
creating classifier
fitting
0:	learn: 0.6489566	test: 0.6488126	best: 0.6488126 (0)	total: 64.4ms	remaining: 1m 36s
100:	learn: 0.2258302	test: 0.2288748	best: 0.2288748 (100)	total: 6.12s	remaining: 1m 24s
200:	learn: 0.2139585	test: 0.2216708	best: 0.2216708 (200)	total: 11.9s	remaining: 1m 16s
300:	learn: 0.2064943	test: 0.2190816	best: 0.2190816 (300)	total: 17.6s	remaining: 1m 9s
400:	learn: 0.2007016	test: 0.2176562	best: 0.2176562 (400)	total: 23.1s	remaining: 1m 3s
500:	learn: 0.1945850	test: 0.2167048	best: 0.2167048 (500)	total: 28.7s	remaining: 57.3s
600:	learn: 0.1890304	test: 0.21611

[32m[I 2022-08-28 00:53:13,793][0m Trial 7 finished with value: 0.7914097256431988 and parameters: {'depth': 10.0, 'learning_rate': 0.03138042609392179, 'border_count': 74.0, 'l2_leaf_reg': 3.0, 'leaf_estimation_method': 'Newton', 'feature_border_type': 'GreedyLogSum', 'bootstrap_type': 'No'}. Best is trial 0 with value: 0.7921560538737504.[0m


bestTest = 0.2174250832
bestIteration = 876
Shrink model to first 877 iterations.
predicting
count zerO: 458913
amex: 0.7906542097957594
=== FINISHED FOLD 10/10, log loss=0.21742505010642021, accuracy=0.9032925846026454, amex=0.7906542097957594 ===
time to run this fold: 52s
time to run folds: 8m, 5s
iteration logloss: 0.21831592539566413, iteration acc: 0.9041474077508911, Iteration amex: 0.7914097256431988
STARTING OPTIMIZATION iteration 9, data size: (458913, 1580), params={'max_depth': 3.0, 'learning_rate': 0.23205307026125302, 'border_count': 59.0, 'l2_leaf_reg': 0.0, 'leaf_estimation_method': 'Gradient', 'use_best_model': True, 'early_stopping_rounds': 10, 'iterations': 1500, 'feature_border_type': 'MaxLogSum', 'task_type': 'GPU', 'bootstrap_type': 'Poisson', 'objective': 'Logloss'}
=== STARTING FOLD 1/10 (10) ===
creating classifier
fitting
0:	learn: 0.5150317	test: 0.5148296	best: 0.5148296 (0)	total: 7.98ms	remaining: 12s
100:	learn: 0.2259934	test: 0.2300090	best: 0.2300090 (

400:	learn: 0.2162092	test: 0.2205816	best: 0.2205816 (399)	total: 2.96s	remaining: 8.11s


[32m[I 2022-08-28 00:54:11,853][0m Trial 8 finished with value: 0.7871476406413148 and parameters: {'depth': 3.0, 'learning_rate': 0.23205307026125302, 'border_count': 59.0, 'l2_leaf_reg': 0.0, 'leaf_estimation_method': 'Gradient', 'feature_border_type': 'MaxLogSum', 'bootstrap_type': 'Poisson'}. Best is trial 0 with value: 0.7921560538737504.[0m


bestTest = 0.2202050656
bestIteration = 468
Shrink model to first 469 iterations.
predicting
count zerO: 458913
amex: 0.7868306585426463
=== FINISHED FOLD 10/10, log loss=0.22020500592338485, accuracy=0.9028131877710226, amex=0.7868306585426463 ===
time to run this fold: 6s
time to run folds: 58s
iteration logloss: 0.22131867567707908, iteration acc: 0.9028312556067701, Iteration amex: 0.7871476406413148
STARTING OPTIMIZATION iteration 10, data size: (458913, 1580), params={'max_depth': 10.0, 'learning_rate': 0.015793084442224617, 'border_count': 82.0, 'l2_leaf_reg': 4.0, 'leaf_estimation_method': 'Gradient', 'use_best_model': True, 'early_stopping_rounds': 10, 'iterations': 1500, 'feature_border_type': 'UniformAndQuantiles', 'task_type': 'GPU', 'bootstrap_type': 'Bernoulli', 'subsample': 0.8877871631347805, 'objective': 'Logloss'}
=== STARTING FOLD 1/10 (10) ===
creating classifier
fitting
0:	learn: 0.6762544	test: 0.6762588	best: 0.6762588 (0)	total: 57.3ms	remaining: 1m 25s
100:	lea

400:	learn: 0.2157786	test: 0.2279370	best: 0.2279370 (400)	total: 22.1s	remaining: 1m
500:	learn: 0.2114878	test: 0.2260208	best: 0.2260208 (500)	total: 27.4s	remaining: 54.6s
600:	learn: 0.2077924	test: 0.2247253	best: 0.2247253 (600)	total: 32.6s	remaining: 48.8s
700:	learn: 0.2044564	test: 0.2237912	best: 0.2237912 (700)	total: 37.9s	remaining: 43.2s
800:	learn: 0.2013741	test: 0.2230013	best: 0.2230013 (800)	total: 43.1s	remaining: 37.6s
900:	learn: 0.1981567	test: 0.2223551	best: 0.2223551 (900)	total: 48.4s	remaining: 32.2s
1000:	learn: 0.1950223	test: 0.2218384	best: 0.2218384 (1000)	total: 53.8s	remaining: 26.8s
1100:	learn: 0.1921513	test: 0.2214987	best: 0.2214938 (1099)	total: 59.1s	remaining: 21.4s
1200:	learn: 0.1892668	test: 0.2211469	best: 0.2211469 (1200)	total: 1m 4s	remaining: 16s
bestTest = 0.2211103825
bestIteration = 1208
Shrink model to first 1209 iterations.
predicting
count zerO: 229458
amex: 0.7888322427953525
=== FINISHED FOLD 5/10, log loss=0.221110376675428

300:	learn: 0.2216020	test: 0.2291084	best: 0.2291084 (300)	total: 16.9s	remaining: 1m 7s
400:	learn: 0.2160047	test: 0.2259539	best: 0.2259539 (400)	total: 22.2s	remaining: 1m
500:	learn: 0.2114885	test: 0.2239335	best: 0.2239335 (500)	total: 27.6s	remaining: 55.1s
600:	learn: 0.2078811	test: 0.2225703	best: 0.2225703 (600)	total: 32.9s	remaining: 49.2s
700:	learn: 0.2045959	test: 0.2215307	best: 0.2215307 (700)	total: 38.1s	remaining: 43.4s
800:	learn: 0.2015002	test: 0.2207776	best: 0.2207776 (800)	total: 43.4s	remaining: 37.8s
900:	learn: 0.1982653	test: 0.2200866	best: 0.2200866 (900)	total: 48.7s	remaining: 32.4s
1000:	learn: 0.1951756	test: 0.2195553	best: 0.2195553 (1000)	total: 54s	remaining: 26.9s
1100:	learn: 0.1923154	test: 0.2191445	best: 0.2191445 (1100)	total: 59.3s	remaining: 21.5s


[32m[I 2022-08-28 01:07:13,637][0m Trial 9 finished with value: 0.7915203170260824 and parameters: {'depth': 10.0, 'learning_rate': 0.015793084442224617, 'border_count': 82.0, 'l2_leaf_reg': 4.0, 'leaf_estimation_method': 'Gradient', 'feature_border_type': 'UniformAndQuantiles', 'bootstrap_type': 'Bernoulli', 'subsample': 0.8877871631347805}. Best is trial 0 with value: 0.7921560538737504.[0m


bestTest = 0.2188328219
bestIteration = 1170
Shrink model to first 1171 iterations.
predicting
count zerO: 458913
amex: 0.7920449368309816
=== FINISHED FOLD 10/10, log loss=0.21883277972457751, accuracy=0.9023991632346212, amex=0.7920449368309816 ===
time to run this fold: 1m, 5s
time to run folds: 13m, 1s
iteration logloss: 0.21892264961057145, iteration acc: 0.9039295029965986, Iteration amex: 0.7915203170260824


In [14]:
sum(optuna_seconds)

2726

In [15]:
df_params = pd.DataFrame(optuna_parameters)
df_params["loss"] = optuna_losses
df_params["accuracy"] = optuna_accuracies
df_params["amex"] = optuna_amexes
df_params["time"] = optuna_seconds

#df_params

In [16]:
#df_params.to_csv("optimizer_results_cat_lagged_seed2.csv")
df_params.to_csv(OPTIMIZER_PARAMS_FILE)


In [17]:
optuna_parameters

[{'max_depth': 7.0,
  'learning_rate': 0.0431673514792702,
  'border_count': 128.0,
  'l2_leaf_reg': 3.0,
  'leaf_estimation_method': 'Gradient',
  'use_best_model': True,
  'early_stopping_rounds': 10,
  'iterations': 1500,
  'feature_border_type': 'MinEntropy',
  'task_type': 'GPU',
  'bootstrap_type': 'No',
  'objective': 'Logloss'},
 {'max_depth': 8.0,
  'learning_rate': 0.01314407695144808,
  'border_count': 165.0,
  'l2_leaf_reg': 3.0,
  'leaf_estimation_method': 'Newton',
  'use_best_model': True,
  'early_stopping_rounds': 10,
  'iterations': 1500,
  'feature_border_type': 'MaxLogSum',
  'task_type': 'GPU',
  'bootstrap_type': 'No',
  'objective': 'Logloss'},
 {'max_depth': 7.0,
  'learning_rate': 0.012050135586794328,
  'border_count': 98.0,
  'l2_leaf_reg': 0.0,
  'leaf_estimation_method': 'Newton',
  'use_best_model': True,
  'early_stopping_rounds': 10,
  'iterations': 1500,
  'feature_border_type': 'GreedyLogSum',
  'task_type': 'GPU',
  'bootstrap_type': 'No',
  'objectiv

In [18]:
best_idx = np.argmax(df_params["amex"])
best_idx

0

In [19]:
df_params.sort_values(by="amex", ascending=False).head(5)

Unnamed: 0,max_depth,learning_rate,border_count,l2_leaf_reg,leaf_estimation_method,use_best_model,early_stopping_rounds,iterations,feature_border_type,task_type,bootstrap_type,objective,subsample,loss,accuracy,amex,time
0,7.0,0.043167,128.0,3.0,Gradient,True,10,1500,MinEntropy,GPU,No,Logloss,,0.21843,0.904102,0.792156,250
7,10.0,0.015793,82.0,4.0,Gradient,True,10,1500,UniformAndQuantiles,GPU,Bernoulli,Logloss,0.887787,0.218923,0.90393,0.79152,781
5,10.0,0.03138,74.0,3.0,Newton,True,10,1500,GreedyLogSum,GPU,No,Logloss,,0.218316,0.904147,0.79141,485
1,8.0,0.013144,165.0,3.0,Newton,True,10,1500,MaxLogSum,GPU,No,Logloss,,0.21968,0.903481,0.790067,549
3,7.0,0.01596,35.0,1.0,Gradient,True,10,1500,MinEntropy,GPU,Poisson,Logloss,,0.220685,0.903082,0.789533,242


In [20]:
df_params.shape

(8, 17)

In [21]:
optuna_params = df_params.iloc[best_idx]
optuna_params = dict(optuna_params)


In [22]:
optuna_params

{'max_depth': 7.0,
 'learning_rate': 0.0431673514792702,
 'border_count': 128.0,
 'l2_leaf_reg': 3.0,
 'leaf_estimation_method': 'Gradient',
 'use_best_model': True,
 'early_stopping_rounds': 10,
 'iterations': 1500,
 'feature_border_type': 'MinEntropy',
 'task_type': 'GPU',
 'bootstrap_type': 'No',
 'objective': 'Logloss',
 'subsample': nan,
 'loss': 0.21842958993457792,
 'accuracy': 0.9041016494234061,
 'amex': 0.7921560538737504,
 'time': 250}

In [23]:
keys = list(optuna_params.keys())
for param in keys:
    if isinstance(optuna_params[param], (np.number)) and np.isnan(optuna_params[param]):
        print(f"delete: {param}")
        del optuna_params[param]
print("delete loss")
del optuna_params["loss"]
if "Unnamed: 0" in optuna_params:
    del optuna_params["Unnamed: 0"]
del optuna_params["accuracy"]
del optuna_params["time"]
del optuna_params["amex"]


delete: subsample
delete loss


# Train and Predict After Optimization

In [24]:
def train_and_predict(optuna_params, df_from):
    X = df_from.drop(["customer_ID", "target"], axis=1)
    y = df_from["target"]

    verbosity=50 #print status every 50 iteration of tree building
    optuna_eval_results = {}

    cv = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=1121218)

    cv_scores = np.empty(N_FOLDS)
    cv_accuracies = np.empty(N_FOLDS)
    time_start_all_folds = time.time()
    acc_score_total = 0
    models = []

    n_classes = 1
    train_preds = np.zeros((X.shape[0], n_classes))
    #balanced_preds = np.empty([X.shape[0], n_classes], dtype = float)

    for idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):

        
        time_start_this_fold = time.time()
        print(f"=== STARTING FOLD {idx+1}/{N_FOLDS} ===")
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
#        evalset = [(X_train, y_train), (X_test,y_test)]
        evalset = [(X_test,y_test)]
        
        print("creating classifier")
        model = cb.CatBoostClassifier(**optuna_params)

        
        print("fitting")
        model.fit(
            X_train,
            y_train,
            verbose=100,
            eval_set=evalset,
        )
        
        print(f"predicting")
        preds = model.predict_proba(X_test)

        preds_true_amex = preds[:, 1]
        preds_true = preds[:, 1].flatten()
        preds_true.shape = (y_test.shape[0], 1)
        np.add.at(train_preds, test_idx, preds_true)
        print(f"count zerO: {np.count_nonzero(train_preds)}")
        
        amex_score = amex_metric_mod(y_test, preds_true_amex)        

        preds_this_round = (preds >= 0.5)[:,1]
        
        acc_score = accuracy_score(y_test, preds_this_round)
        acc_score_total += acc_score

        log_loss_fold = log_loss(y_test, preds)
        cv_scores[idx] = log_loss_fold
        cv_accuracies[idx] = acc_score
        print(f"=== FINISHED FOLD {idx+1}/{N_FOLDS}, log loss={log_loss_fold}, accuracy={acc_score}, amex={amex_score} ===")
        time_total_this_fold = time.time() - time_start_this_fold
        print(f"time to run this fold: {format_time(time_total_this_fold)}")        
        
        models.append(model)
        
    return models, train_preds


In [25]:
#optuna_params = df_params.iloc[best_idx]
#final_models, final_preds = train_and_predict(optuna_params, df_full_train[:50000])
final_models, final_preds = train_and_predict(optuna_params, df_train)


=== STARTING FOLD 1/10 ===
creating classifier
fitting
0:	learn: 0.6486545	test: 0.6485966	best: 0.6485966 (0)	total: 25.6ms	remaining: 38.4s
100:	learn: 0.2331780	test: 0.2318670	best: 0.2318670 (100)	total: 2.47s	remaining: 34.2s
200:	learn: 0.2235273	test: 0.2237682	best: 0.2237682 (200)	total: 4.74s	remaining: 30.6s
300:	learn: 0.2185227	test: 0.2204770	best: 0.2204770 (300)	total: 7s	remaining: 27.9s
400:	learn: 0.2145664	test: 0.2186514	best: 0.2186498 (399)	total: 9.26s	remaining: 25.4s
500:	learn: 0.2113695	test: 0.2176161	best: 0.2176161 (500)	total: 11.5s	remaining: 22.9s
600:	learn: 0.2085144	test: 0.2169876	best: 0.2169876 (600)	total: 13.7s	remaining: 20.5s
700:	learn: 0.2059022	test: 0.2164225	best: 0.2164225 (700)	total: 15.9s	remaining: 18.1s
800:	learn: 0.2035917	test: 0.2160731	best: 0.2160731 (800)	total: 18.1s	remaining: 15.8s
900:	learn: 0.2013122	test: 0.2158042	best: 0.2158012 (892)	total: 20.3s	remaining: 13.5s
1000:	learn: 0.1990975	test: 0.2155234	best: 0.2155

200:	learn: 0.2229548	test: 0.2278142	best: 0.2278142 (200)	total: 4.8s	remaining: 31s
300:	learn: 0.2180137	test: 0.2244066	best: 0.2244066 (300)	total: 7.06s	remaining: 28.1s
400:	learn: 0.2141097	test: 0.2225212	best: 0.2225202 (399)	total: 9.32s	remaining: 25.5s
500:	learn: 0.2109633	test: 0.2213019	best: 0.2213019 (500)	total: 11.5s	remaining: 23s
600:	learn: 0.2082115	test: 0.2205571	best: 0.2205571 (600)	total: 13.7s	remaining: 20.5s
700:	learn: 0.2056438	test: 0.2199556	best: 0.2199543 (699)	total: 15.9s	remaining: 18.1s
800:	learn: 0.2033361	test: 0.2196275	best: 0.2196275 (800)	total: 18.1s	remaining: 15.8s
900:	learn: 0.2010868	test: 0.2193099	best: 0.2193099 (900)	total: 20.2s	remaining: 13.4s
bestTest = 0.2190971201
bestIteration = 955
Shrink model to first 956 iterations.
predicting
count zerO: 321240
=== FINISHED FOLD 7/10, log loss=0.2190971141663266, accuracy=0.9033797476629405, amex=0.7905233652126531 ===
time to run this fold: 25s
=== STARTING FOLD 8/10 ===
creating 

## Save Final Train Predictions

These can be used to train an ensemble.

In [26]:
df_preds = pd.DataFrame()
df_preds["customer_ID"] = df_train["customer_ID"]
df_preds["prediction"] = final_preds

In [27]:
final_preds[:, 0].shape

(458913,)

In [28]:
amex_metric_mod(y, final_preds[:, 0])        


0.7922845612852325

In [29]:
#df_preds.to_csv("predictions_cat_lagged.csv")
df_preds.to_csv(PREDICTIONS_FILE)


In [30]:
#test.replace([np.inf, -np.inf], np.nan,inplace=True)
X = df_test.reset_index().drop(["customer_ID"], axis=1)


# Predict With all Final Models

In [31]:
from tqdm import tqdm

n_classes = 1
sub_preds = np.zeros((X.shape[0], n_classes))

#tqdm.auto()
tqdm.pandas()

for model in tqdm(final_models): #lqtm
    preds = model.predict_proba(X)
    preds_true = preds[:, 1]
    preds_true.shape = (X.shape[0], 1)
    sub_preds += preds_true
sub_preds /= N_FOLDS


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:07<00:00,  1.32it/s]


In [32]:
submission = pd.DataFrame()
submission["customer_ID"] = df_test.reset_index()["customer_ID"]
submission["prediction"] = sub_preds
submission

Unnamed: 0,customer_ID,prediction
0,00000469ba478561f23a92a868bd366de6f6527a684c9a...,0.023526
1,00001bf2e77ff879fab36aa4fac689b9ba411dae63ae39...,0.002717
2,0000210045da4f81e5f122c6bde5c2a617d03eef67f82c...,0.040681
3,00003b41e58ede33b8daf61ab56d9952f17c9ad1c3976c...,0.259924
4,00004b22eaeeeb0ec976890c1d9bfc14fd9427e98c4ee9...,0.845987
...,...,...
924616,ffff952c631f2c911b8a2a8ca56ea6e656309a83d2f64c...,0.013381
924617,ffffcf5df59e5e0bba2a5ac4578a34e2b5aa64a1546cd3...,0.858926
924618,ffffd61f098cc056dbd7d2a21380c4804bbfe60856f475...,0.562557
924619,ffffddef1fc3643ea179c93245b68dca0f36941cd83977...,0.365587


In [33]:
#submission.to_csv("submission_lagged_cat_seed2.csv", index=False)
submission.to_csv(SUBMISSIONS_FILE, index=False)


In [35]:
import pickle

for idx, model in enumerate(final_models):    
    with open(f'final_model{idx}.pickle','wb') as f:
        pickle.dump(model,f)