# LGBM Optuna Optimizer

## For Kaggle AMEX default prediction competition data

https://www.kaggle.com/competitions/amex-default-prediction/data

This competition had some random looking data where no information on columns was given. The notebook is mainly to allow me to copy it for use in LGBM optimization in other tabular data cases.

This assumes LGBM  and various other Python libraries are installed. And GPU configured. I used a Docker image and Pipfile with all these installed.


In [1]:
#!pip install lightgbm


In [2]:
import pandas as pd
import numpy as np

import optuna  # pip install optuna
from sklearn.metrics import log_loss
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from optuna.integration import LightGBMPruningCallback
from sklearn.model_selection import train_test_split
import lightgbm as lgbm
from tqdm import tqdm
import gc
from sklearn import preprocessing

tqdm.pandas()

import time

N_FOLDS = 5

In [3]:
# https://www.kaggle.com/kyakovlev
# https://www.kaggle.com/competitions/amex-default-prediction/discussion/327534
def amex_metric_mod(y_true, y_pred):

    labels     = np.transpose(np.array([y_true, y_pred]))
    labels     = labels[labels[:, 1].argsort()[::-1]]
    weights    = np.where(labels[:,0]==0, 20, 1)
    cut_vals   = labels[np.cumsum(weights) <= int(0.04 * np.sum(weights))]
    top_four   = np.sum(cut_vals[:,0]) / np.sum(labels[:,0])

    gini = [0,0]
    for i in [1,0]:
        labels         = np.transpose(np.array([y_true, y_pred]))
        labels         = labels[labels[:, i].argsort()[::-1]]
        weight         = np.where(labels[:,0]==0, 20, 1)
        weight_random  = np.cumsum(weight / np.sum(weight))
        total_pos      = np.sum(labels[:, 0] *  weight)
        cum_pos_found  = np.cumsum(labels[:, 0] * weight)
        lorentz        = cum_pos_found / total_pos
        gini[i]        = np.sum((lorentz - weight_random) * weight)

    return 0.5 * (gini[1]/gini[0] + top_four)


In [4]:
def format_time(seconds):
    seconds = int(seconds)
    minutes = seconds // 60
    hours = minutes // 60
    minutes = minutes % 60
    seconds = seconds % 60
    if hours > 0:
        return f"{hours}h, {minutes}m, {seconds}s"
    if minutes > 0:
        return f"{minutes}m, {seconds}s"
    return f"{seconds}s"

In [5]:
import pickle

def pickle_save(to_save, filename):
    with open(f'{filename}.pickle','wb') as f:
        pickle.dump(to_save,f)
    

# Load Data

In [6]:
df_train = pd.read_parquet("train_lagged.parquet", engine="pyarrow")
df_train.head()

Unnamed: 0,customer_ID,S_2_first,S_2_mean,S_2_std,S_2_min,S_2_max,S_2_last,P_2_first,P_2_mean,P_2_std,...,D_66_count,D_66_first,D_66_last,D_66_nunique,D_68_count,D_68_first,D_68_last,D_68_nunique,target,fake_splitter
0,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,0,0.0,0.0,0,0,0,0.94,0.933846,0.024337,...,13,0,0,1,13,7,7,1,0,True
1,00000fd6641609c6ece5454664794f0340ad84dddce9a2...,0,0.0,0.0,0,0,0,0.93,0.899231,0.022899,...,13,0,0,1,13,7,7,1,0,True
2,00001b22f846c82c51f6e3958ccd81970162bae8b007e8...,0,0.0,0.0,0,0,0,0.88,0.877692,0.027735,...,13,0,0,1,13,7,7,1,0,True
3,000041bdba6ecadd89a52d11886e8eaaec9325906c9723...,0,0.0,0.0,0,0,0,0.57,0.597692,0.018777,...,13,0,0,1,13,3,4,3,0,True
4,00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8a...,0,0.0,0.0,0,0,0,0.94,0.893077,0.042502,...,13,2,2,1,13,7,7,1,0,True


In [7]:
#df_train["target"] > 0.6
strat = df_train["fake_splitter"]
df_train = df_train.drop("fake_splitter", axis=1)

In [8]:
df_test = pd.read_parquet("test_lagged.parquet", engine="pyarrow")
df_test.head()

Unnamed: 0,customer_ID,S_2_first,S_2_mean,S_2_std,S_2_min,S_2_max,S_2_last,P_2_first,P_2_mean,P_2_std,...,D_64_last,D_64_nunique,D_66_count,D_66_first,D_66_last,D_66_nunique,D_68_count,D_68_first,D_68_last,D_68_nunique
0,00000469ba478561f23a92a868bd366de6f6527a684c9a...,0,0.0,0.0,0,0,0,0.63,0.602222,0.019861,...,4,2,9,0,0,1,9,0,7,2
1,00001bf2e77ff879fab36aa4fac689b9ba411dae63ae39...,0,0.0,0.0,0,0,0,0.89,0.860769,0.030947,...,1,1,13,0,0,1,13,7,7,1
2,0000210045da4f81e5f122c6bde5c2a617d03eef67f82c...,0,0.0,0.0,0,0,0,0.77,0.748462,0.061893,...,4,2,13,2,2,1,13,7,5,2
3,00003b41e58ede33b8daf61ab56d9952f17c9ad1c3976c...,0,0.0,0.0,0,0,0,0.51,0.474615,0.026962,...,3,1,13,0,0,1,13,6,6,1
4,00004b22eaeeeb0ec976890c1d9bfc14fd9427e98c4ee9...,0,0.0,0.0,0,0,0,0.29,0.323846,0.050751,...,3,2,13,0,0,1,13,7,6,2


In [11]:
def find_new_cat_cols(df_from):
    new_cat_cols = []
    for cat_col in cat_cols:
        new_cat_cols.extend([col for col in df_from.columns if col.startswith(cat_col)])
    return new_cat_cols



In [12]:
cat_cols = ['B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64', 'D_66', 'D_68'] 
cat_cols = find_new_cat_cols(df_train)

# LGBM Optuna Optimizer

In [37]:
optuna_eval_results = {}
optuna_iteration_index = 0
optuna_losses = []
optuna_accuracies = []
optuna_amexes = []
optuna_parameters = []
optuna_seconds = []

def optuna_objective(trial, X, y, strat, verbosity=100):
    global optuna_iteration_index
    optuna_iteration_index += 1
    #param_grid = {}  # to be filled in later
    param_grid = {
        "verbosity": -1,
        "n_estimators": trial.suggest_categorical("n_estimators", [12000]),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "num_leaves": trial.suggest_int("num_leaves", 20, 3000, step=20),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 200, 10000, step=100),
        #GPU only allows max bin 255 on LGBM
        "max_bin": trial.suggest_int("max_bin", 100, 255),
        "lambda_l1": trial.suggest_int("lambda_l1", 0, 100, step=5),
        "lambda_l2": trial.suggest_int("lambda_l2", 0, 100, step=5),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
        "bagging_fraction": None,
        "bagging_freq": None,
        "feature_fraction": trial.suggest_float(
            "feature_fraction", 0.2, 0.95, step=0.05
        ),
        'subsample': None,  # overridden by bagging_fraction
        'reg_alpha': None,  # overridden by lambda_l1
        'reg_lambda': None,  # overridden by lambda_l2
        'min_sum_hessian_in_leaf': None,  # overrides min_child_weight
        'min_child_samples': None,  # overridden by min_data_in_leaf
        'colsample_bytree': None,  # overridden by feature_fraction
        'subsample_freq': None, # bagging_freq
        'min_split_gain': None, # 
        #'min_split_gain': None, # min_gain_to_split

#        'boosting_type': trial.suggest_categorical('boosting_type',['dart']), #only try DART now
        'boosting_type': trial.suggest_categorical('boosting_type',['gbdt', 'goss', 'dart']), #no DART atm
    }
    if param_grid["boosting_type"] != "goss":
        param_grid["bagging_fraction"] = trial.suggest_float(
            "bagging_fraction", 0.2, 0.95, step=0.05
        )
        param_grid["bagging_freq"] = trial.suggest_categorical("bagging_freq", [1, 5, 10])

    if param_grid["boosting_type"] == "dart":
        param_grid["n_estimators"] = 2000 #TODO: maybe less in search, more in actual selected train
        
        
    print(f"STARTING OPTIMIZATION iteration {optuna_iteration_index}, data size: {X.shape}, params={param_grid}")
    #split data into N pieces, run the optimizer on only N_FOLDS_RUN of them for speed
    N_FOLDS_RUN = N_FOLDS
    cv = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=69)
#    cv = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=1121218)

    cv_scores = np.empty(N_FOLDS_RUN)
    cv_accuracies = np.empty(N_FOLDS_RUN)
    cv_amexes = np.empty(N_FOLDS_RUN)
    
    time_start_all_folds = time.time()
    acc_score_total = 0
    n_classes = 1
    train_preds = np.zeros((X.shape[0], n_classes))
    amex_total = 0
    
    for idx, (train_idx, test_idx) in enumerate(cv.split(X, strat)):
        time_start_this_fold = time.time()
        print(f"=== STARTING FOLD {idx+1}/{N_FOLDS_RUN} ({N_FOLDS}) ===")
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

        print("creating classifier")
        model = lgbm.LGBMClassifier(objective="binary", **param_grid, device="gpu")
        print("fitting")
        model.fit(
            X_train,
            y_train,
            eval_set=[(X_test, y_test)],
            eval_metric="binary_logloss",
            categorical_feature=cat_cols,
            #early_stopping_rounds=100,
            callbacks=[
                lgbm.early_stopping(100, verbose = False), 
                lgbm.record_evaluation(optuna_eval_results),
                lgbm.log_evaluation(verbosity) #by default every round
                #LightGBMPruningCallback(trial, "binary_logloss")
            ],  # Add a pruning callback
        )

        print("predicting")

        #best iteration used by default https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMClassifier.html#lightgbm.LGBMClassifier.predict_proba
        preds = model.predict_proba(X_test)

        preds_true_amex = preds[:, 1]
        preds_true = preds[:, 1].flatten()
        preds_true.shape = (y_test.shape[0], 1)
        amex_y_test = np.array(y_test)
        amex_y_test.shape = (y_test.shape[0], 1)
        np.add.at(train_preds, test_idx, preds_true)
        print(f"count zerO: {np.count_nonzero(train_preds)}")

        preds_this_round = (preds >= 0.5)[:,1]

        acc_score = accuracy_score(y_test, preds_this_round)
        acc_score_total += acc_score

        amex_score = amex_metric_mod(y_test, preds_true_amex)
        amex_total += amex_score

        log_loss_fold = log_loss(y_test, preds)
        cv_scores[idx] = log_loss_fold
        cv_accuracies[idx] = acc_score
        cv_amexes[idx] = amex_score
        print(f"amex: {amex_score}")

        print(f"=== FINISHED FOLD {idx+1}/{N_FOLDS_RUN} ({N_FOLDS}), log loss={log_loss_fold}, accuracy={acc_score}, amex={amex_score} ===")
        time_total_this_fold = time.time() - time_start_this_fold
        print(f"time to run this fold: {format_time(time_total_this_fold)}")
        
        if idx+1 >= N_FOLDS_RUN:
            break
            
        if param_grid["boosting_type"] == "dart":
            if (amex_score < 0.79 and idx == 0) or (amex_score < 0.78 and idx > 0):
                print("low score on amex, dropping loop")
                return amex_score
        
    time_total_all_folds = time.time() - time_start_all_folds        
    print(f"time to run folds: {format_time(time_total_all_folds)}")
    
    iteration_log_loss = np.mean(cv_scores)
    iteration_accuracy = np.mean(cv_accuracies)
    iteration_amex = np.mean(cv_amexes)
    optuna_losses.append(iteration_log_loss)
    optuna_amexes.append(iteration_amex)
    optuna_accuracies.append(iteration_accuracy)
    optuna_parameters.append(param_grid)
    optuna_seconds.append(int(time_total_all_folds))
    print(f"iteration logloss: {iteration_log_loss}, iteration acc: {iteration_accuracy}, Iteration amex: {iteration_amex}")
    return iteration_amex #iteration_log_loss

#TODO: use LGBM's unbalanced target weighting to try if it has any effect

# Train and Predict after Finding Optimized Params

In [10]:
def train_and_predict(optuna_params, df_from, strat):
    X = df_from.drop(["customer_ID", "target"], axis=1)
    y = df_from["target"]

    verbosity=100 #print status every n iteration of tree building
    optuna_eval_results = {}

    cv = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=1121218)

    cv_scores = np.empty(N_FOLDS)
    cv_accuracies = np.empty(N_FOLDS)
    time_start_all_folds = time.time()
    acc_score_total = 0
    amex_score_total = 0
    models = []

    n_classes = 1
    train_preds = np.zeros((X.shape[0], n_classes))
    #balanced_preds = np.empty([X.shape[0], n_classes], dtype = float)

    for idx, (train_idx, test_idx) in enumerate(cv.split(X, strat)):
    #    print(idx,train_idx,test_idx)
        time_start_this_fold = time.time()
        print(f"=== STARTING FOLD {idx+1}/{N_FOLDS} ===")
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

        print("creating classifier")
    #    model = lgbm.LGBMClassifier(objective="binary", categorical_feature=cat_cols, **optuna_parameters_balanced[best_idx])
        model = lgbm.LGBMClassifier(objective="binary", **optuna_params, device="gpu")
        print("fitting")
        model.fit(
            X_train,
            y_train,
            eval_set=[(X_test, y_test)],
            eval_metric="binary_logloss",
            categorical_feature=cat_cols,
            #early_stopping_rounds=100,
            callbacks=[
                lgbm.early_stopping(100, verbose = False), 
                lgbm.record_evaluation(optuna_eval_results),
                lgbm.log_evaluation(verbosity) #by default every round
                #LightGBMPruningCallback(trial, "binary_logloss")
            ],  # Add a pruning callback
        )
        models.append(model)

        print("predicting")
        preds = model.predict_proba(X_test)
        preds_true = preds[:, 1].flatten()
        preds_true.shape = (y_test.shape[0], 1)
        np.add.at(train_preds, test_idx, preds_true)
        print(f"count zerO: {np.count_nonzero(train_preds)}")

        preds_true_amex = preds[:, 1]
        preds_this_round = (preds >= 0.5)[:,1]
    #        positives=(preds >= 0.5)[:,1]
    #        print()
        acc_score = accuracy_score(y_test, preds_this_round)
        acc_score_total += acc_score

        amex_score = amex_metric_mod(y_test, preds_true_amex)
        amex_score_total += amex_score

        log_loss_fold = log_loss(y_test, preds)
        cv_scores[idx] = log_loss_fold
        cv_accuracies[idx] = acc_score
        print(f"=== FINISHED FOLD {idx+1}/{N_FOLDS}, log loss={log_loss_fold}, accuracy={acc_score}, amex={amex_score} ===")
        time_total_this_fold = time.time() - time_start_this_fold
        print(f"time to run this fold: {format_time(time_total_this_fold)}")

    print(f"amex avg: {amex_score_total/N_FOLDS}")
    return models, train_preds


# Split Data and Run Optimizer Search

In [13]:
X = df_train.drop(["customer_ID", "target"], axis=1)
y = df_train["target"]
stratzip = zip(strat, y)
#stratified k-fold only supports single binary value, tuple stratification breaks it
stratzip = [f"{a}-{b}" for (a,b) in stratzip]
#stratzip = list(stratzip)


In [14]:
optuna_eval_results = {}
optuna_iteration_index = 0
optuna_losses = []
optuna_accuracies = []
optuna_parameters = []
optuna_seconds = []

study = optuna.create_study(direction="maximize", study_name="LGBM Classifier")
func = lambda trial: optuna_objective(trial, X, y, stratzip)
study.optimize(func, n_trials=20) #TODO: set to 50-100 for real search, just protoing now and 20 is faster

[32m[I 2022-09-09 10:16:32,894][0m A new study created in memory with name: LGBM Classifier[0m


STARTING OPTIMIZATION iteration 1, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 12000, 'learning_rate': 0.03680792839869354, 'num_leaves': 2040, 'max_depth': 4, 'min_data_in_leaf': 300, 'max_bin': 157, 'lambda_l1': 90, 'lambda_l2': 85, 'min_gain_to_split': 8.707453098189157, 'bagging_fraction': None, 'bagging_freq': None, 'feature_fraction': 0.45, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'goss'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting
[100]	valid_0's binary_logloss: 0.246895
[200]	valid_0's binary_logloss: 0.232113
[300]	valid_0's binary_logloss: 0.227489
[400]	valid_0's binary_logloss: 0.225009
[500]	valid_0's binary_logloss: 0.223552
[600]	valid_0's binary_logloss: 0.222639
[700]	valid_0's binary_logloss: 0.222075
[800]	valid_0's binary_logloss: 0.221662
[900]	valid_0's binary_l

[5400]	valid_0's binary_logloss: 0.218653
[5500]	valid_0's binary_logloss: 0.218653
predicting
count zerO: 275349
amex: 0.7900593995414786
=== FINISHED FOLD 3/5 (5), log loss=0.21865099606998023, accuracy=0.9028687229661266, amex=0.7900593995414786 ===
time to run this fold: 4m, 44s
=== STARTING FOLD 4/5 (5) ===
creating classifier
fitting
[100]	valid_0's binary_logloss: 0.247258
[200]	valid_0's binary_logloss: 0.231936
[300]	valid_0's binary_logloss: 0.226798
[400]	valid_0's binary_logloss: 0.224119
[500]	valid_0's binary_logloss: 0.222477
[600]	valid_0's binary_logloss: 0.221409
[700]	valid_0's binary_logloss: 0.220711
[800]	valid_0's binary_logloss: 0.220253
[900]	valid_0's binary_logloss: 0.219922
[1000]	valid_0's binary_logloss: 0.219646
[1100]	valid_0's binary_logloss: 0.219461
[1200]	valid_0's binary_logloss: 0.219318
[1300]	valid_0's binary_logloss: 0.219227
[1400]	valid_0's binary_logloss: 0.219116
[1500]	valid_0's binary_logloss: 0.219023
[1600]	valid_0's binary_logloss: 0.21

[32m[I 2022-09-09 10:40:57,417][0m Trial 0 finished with value: 0.7899761091944011 and parameters: {'n_estimators': 12000, 'learning_rate': 0.03680792839869354, 'num_leaves': 2040, 'max_depth': 4, 'min_data_in_leaf': 300, 'max_bin': 157, 'lambda_l1': 90, 'lambda_l2': 85, 'min_gain_to_split': 8.707453098189157, 'feature_fraction': 0.45, 'boosting_type': 'goss'}. Best is trial 0 with value: 0.7899761091944011.[0m


count zerO: 458913
amex: 0.7850556908956438
=== FINISHED FOLD 5/5 (5), log loss=0.22032579777273437, accuracy=0.9036630276088994, amex=0.7850556908956438 ===
time to run this fold: 5m, 38s
time to run folds: 24m, 24s
iteration logloss: 0.21884749788881802, iteration acc: 0.9035612414241146, Iteration amex: 0.7899761091944011
STARTING OPTIMIZATION iteration 2, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 12000, 'learning_rate': 0.22077379374058514, 'num_leaves': 2060, 'max_depth': 12, 'min_data_in_leaf': 7700, 'max_bin': 209, 'lambda_l1': 90, 'lambda_l2': 20, 'min_gain_to_split': 3.925552320660292, 'bagging_fraction': None, 'bagging_freq': None, 'feature_fraction': 0.7, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'goss'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting
[100]	valid_0's binary_l

[32m[I 2022-09-09 10:45:46,969][0m Trial 1 finished with value: 0.7860983048898584 and parameters: {'n_estimators': 12000, 'learning_rate': 0.22077379374058514, 'num_leaves': 2060, 'max_depth': 12, 'min_data_in_leaf': 7700, 'max_bin': 209, 'lambda_l1': 90, 'lambda_l2': 20, 'min_gain_to_split': 3.925552320660292, 'feature_fraction': 0.7, 'boosting_type': 'goss'}. Best is trial 0 with value: 0.7899761091944011.[0m


count zerO: 458913
amex: 0.7803631164622111
=== FINISHED FOLD 5/5 (5), log loss=0.22337700839511634, accuracy=0.9025299078250637, amex=0.7803631164622111 ===
time to run this fold: 54s
time to run folds: 4m, 49s
iteration logloss: 0.22144668096760625, iteration acc: 0.9023039237285208, Iteration amex: 0.7860983048898584
STARTING OPTIMIZATION iteration 3, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 2000, 'learning_rate': 0.2522936172843501, 'num_leaves': 620, 'max_depth': 10, 'min_data_in_leaf': 7500, 'max_bin': 146, 'lambda_l1': 25, 'lambda_l2': 0, 'min_gain_to_split': 2.7806868606837614, 'bagging_fraction': 0.8500000000000001, 'bagging_freq': 5, 'feature_fraction': 0.8, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'dart'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting




[100]	valid_0's binary_logloss: 0.224524
[200]	valid_0's binary_logloss: 0.220707
[300]	valid_0's binary_logloss: 0.218933
[400]	valid_0's binary_logloss: 0.218286
[500]	valid_0's binary_logloss: 0.217904
[600]	valid_0's binary_logloss: 0.21779
[700]	valid_0's binary_logloss: 0.21746
[800]	valid_0's binary_logloss: 0.217444
[900]	valid_0's binary_logloss: 0.217348
[1000]	valid_0's binary_logloss: 0.217243
[1100]	valid_0's binary_logloss: 0.217243
[1200]	valid_0's binary_logloss: 0.217286
[1300]	valid_0's binary_logloss: 0.217174
[1400]	valid_0's binary_logloss: 0.217077
[1500]	valid_0's binary_logloss: 0.217087
[1600]	valid_0's binary_logloss: 0.217082
[1700]	valid_0's binary_logloss: 0.217065
[1800]	valid_0's binary_logloss: 0.217112
[1900]	valid_0's binary_logloss: 0.217072
[2000]	valid_0's binary_logloss: 0.217105
predicting
count zerO: 91783
amex: 0.7935586744860186
=== FINISHED FOLD 1/5 (5), log loss=0.21710450037847315, accuracy=0.9044703267489622, amex=0.7935586744860186 ===
tim



[100]	valid_0's binary_logloss: 0.223193
[200]	valid_0's binary_logloss: 0.218641
[300]	valid_0's binary_logloss: 0.216934
[400]	valid_0's binary_logloss: 0.216128
[500]	valid_0's binary_logloss: 0.215836
[600]	valid_0's binary_logloss: 0.215864
[700]	valid_0's binary_logloss: 0.215589
[800]	valid_0's binary_logloss: 0.21559
[900]	valid_0's binary_logloss: 0.215535
[1000]	valid_0's binary_logloss: 0.215476
[1100]	valid_0's binary_logloss: 0.215399
[1200]	valid_0's binary_logloss: 0.215356
[1300]	valid_0's binary_logloss: 0.215231
[1400]	valid_0's binary_logloss: 0.215264
[1500]	valid_0's binary_logloss: 0.21528
[1600]	valid_0's binary_logloss: 0.21524
[1700]	valid_0's binary_logloss: 0.215271
[1800]	valid_0's binary_logloss: 0.215244
[1900]	valid_0's binary_logloss: 0.215231
[2000]	valid_0's binary_logloss: 0.215248
predicting
count zerO: 183566
amex: 0.7934523195308447
=== FINISHED FOLD 2/5 (5), log loss=0.21524828439852645, accuracy=0.9054835862850419, amex=0.7934523195308447 ===
tim



[100]	valid_0's binary_logloss: 0.224501
[200]	valid_0's binary_logloss: 0.220619
[300]	valid_0's binary_logloss: 0.21844
[400]	valid_0's binary_logloss: 0.217653
[500]	valid_0's binary_logloss: 0.21723
[600]	valid_0's binary_logloss: 0.217218
[700]	valid_0's binary_logloss: 0.216985
[800]	valid_0's binary_logloss: 0.216969
[900]	valid_0's binary_logloss: 0.216737
[1000]	valid_0's binary_logloss: 0.216771
[1100]	valid_0's binary_logloss: 0.216726
[1200]	valid_0's binary_logloss: 0.216653
[1300]	valid_0's binary_logloss: 0.21675
[1400]	valid_0's binary_logloss: 0.216618
[1500]	valid_0's binary_logloss: 0.21655
[1600]	valid_0's binary_logloss: 0.216515
[1700]	valid_0's binary_logloss: 0.216498
[1800]	valid_0's binary_logloss: 0.216463
[1900]	valid_0's binary_logloss: 0.216527
[2000]	valid_0's binary_logloss: 0.216392
predicting
count zerO: 275349
amex: 0.7935352784364095
=== FINISHED FOLD 3/5 (5), log loss=0.2163918501574717, accuracy=0.903958249349008, amex=0.7935352784364095 ===
time t



[100]	valid_0's binary_logloss: 0.223845
[200]	valid_0's binary_logloss: 0.219574
[300]	valid_0's binary_logloss: 0.217511
[400]	valid_0's binary_logloss: 0.216467
[500]	valid_0's binary_logloss: 0.216218
[600]	valid_0's binary_logloss: 0.216217
[700]	valid_0's binary_logloss: 0.216145
[800]	valid_0's binary_logloss: 0.216069
[900]	valid_0's binary_logloss: 0.215994
[1000]	valid_0's binary_logloss: 0.215911
[1100]	valid_0's binary_logloss: 0.215784
[1200]	valid_0's binary_logloss: 0.215773
[1300]	valid_0's binary_logloss: 0.215824
[1400]	valid_0's binary_logloss: 0.21572
[1500]	valid_0's binary_logloss: 0.215688
[1600]	valid_0's binary_logloss: 0.215681
[1700]	valid_0's binary_logloss: 0.215708
[1800]	valid_0's binary_logloss: 0.215637
[1900]	valid_0's binary_logloss: 0.215637
[2000]	valid_0's binary_logloss: 0.215603
predicting
count zerO: 367131
amex: 0.7979801607688766
=== FINISHED FOLD 4/5 (5), log loss=0.2156031042873641, accuracy=0.9054716611100215, amex=0.7979801607688766 ===
ti



[100]	valid_0's binary_logloss: 0.225795
[200]	valid_0's binary_logloss: 0.221659
[300]	valid_0's binary_logloss: 0.21984
[400]	valid_0's binary_logloss: 0.219152
[500]	valid_0's binary_logloss: 0.218776
[600]	valid_0's binary_logloss: 0.218806
[700]	valid_0's binary_logloss: 0.218635
[800]	valid_0's binary_logloss: 0.21846
[900]	valid_0's binary_logloss: 0.218466
[1000]	valid_0's binary_logloss: 0.218429
[1100]	valid_0's binary_logloss: 0.218241
[1200]	valid_0's binary_logloss: 0.218304
[1300]	valid_0's binary_logloss: 0.218391
[1400]	valid_0's binary_logloss: 0.218388
[1500]	valid_0's binary_logloss: 0.218359
[1600]	valid_0's binary_logloss: 0.218272
[1700]	valid_0's binary_logloss: 0.21826
[1800]	valid_0's binary_logloss: 0.218269
[1900]	valid_0's binary_logloss: 0.218241
[2000]	valid_0's binary_logloss: 0.218266
predicting


[32m[I 2022-09-09 10:57:16,024][0m Trial 2 finished with value: 0.7932202487793407 and parameters: {'n_estimators': 12000, 'learning_rate': 0.2522936172843501, 'num_leaves': 620, 'max_depth': 10, 'min_data_in_leaf': 7500, 'max_bin': 146, 'lambda_l1': 25, 'lambda_l2': 0, 'min_gain_to_split': 2.7806868606837614, 'feature_fraction': 0.8, 'boosting_type': 'dart', 'bagging_fraction': 0.8500000000000001, 'bagging_freq': 5}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 458913
amex: 0.7875748106745544
=== FINISHED FOLD 5/5 (5), log loss=0.21826599296547014, accuracy=0.9037066091390469, amex=0.7875748106745544 ===
time to run this fold: 2m, 17s
time to run folds: 11m, 29s
iteration logloss: 0.2165227464374611, iteration acc: 0.9046180865264162, Iteration amex: 0.7932202487793407
STARTING OPTIMIZATION iteration 4, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 12000, 'learning_rate': 0.1557710035743997, 'num_leaves': 1440, 'max_depth': 3, 'min_data_in_leaf': 5900, 'max_bin': 251, 'lambda_l1': 40, 'lambda_l2': 80, 'min_gain_to_split': 2.331148733245221, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.8, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'gbdt'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting
[100]	valid_0's binary_logloss:

[32m[I 2022-09-09 10:58:59,043][0m Trial 3 finished with value: 0.7893963029981166 and parameters: {'n_estimators': 12000, 'learning_rate': 0.1557710035743997, 'num_leaves': 1440, 'max_depth': 3, 'min_data_in_leaf': 5900, 'max_bin': 251, 'lambda_l1': 40, 'lambda_l2': 80, 'min_gain_to_split': 2.331148733245221, 'feature_fraction': 0.8, 'boosting_type': 'gbdt', 'bagging_fraction': 0.5, 'bagging_freq': 1}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 458913
amex: 0.7838012452455423
=== FINISHED FOLD 5/5 (5), log loss=0.2217665782034657, accuracy=0.9023337909393999, amex=0.7838012452455423 ===
time to run this fold: 19s
time to run folds: 1m, 43s
iteration logloss: 0.21939213112144137, iteration acc: 0.9031973382589603, Iteration amex: 0.7893963029981166
STARTING OPTIMIZATION iteration 5, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 2000, 'learning_rate': 0.13321348370559116, 'num_leaves': 1380, 'max_depth': 5, 'min_data_in_leaf': 4800, 'max_bin': 133, 'lambda_l1': 0, 'lambda_l2': 50, 'min_gain_to_split': 14.919791424278888, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 5, 'feature_fraction': 0.7, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'dart'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting




[100]	valid_0's binary_logloss: 0.235656
[200]	valid_0's binary_logloss: 0.227535
[300]	valid_0's binary_logloss: 0.223745
[400]	valid_0's binary_logloss: 0.221807
[500]	valid_0's binary_logloss: 0.220795
[600]	valid_0's binary_logloss: 0.220899
[700]	valid_0's binary_logloss: 0.220426
[800]	valid_0's binary_logloss: 0.220328
[900]	valid_0's binary_logloss: 0.220103
[1000]	valid_0's binary_logloss: 0.220115
[1100]	valid_0's binary_logloss: 0.220043
[1200]	valid_0's binary_logloss: 0.22005
[1300]	valid_0's binary_logloss: 0.21997
[1400]	valid_0's binary_logloss: 0.219996
[1500]	valid_0's binary_logloss: 0.219858
[1600]	valid_0's binary_logloss: 0.219864
[1700]	valid_0's binary_logloss: 0.219928
[1800]	valid_0's binary_logloss: 0.219908
[1900]	valid_0's binary_logloss: 0.219986
[2000]	valid_0's binary_logloss: 0.220008
predicting


[32m[I 2022-09-09 11:00:32,142][0m Trial 4 finished with value: 0.7897888925784864 and parameters: {'n_estimators': 12000, 'learning_rate': 0.13321348370559116, 'num_leaves': 1380, 'max_depth': 5, 'min_data_in_leaf': 4800, 'max_bin': 133, 'lambda_l1': 0, 'lambda_l2': 50, 'min_gain_to_split': 14.919791424278888, 'feature_fraction': 0.7, 'boosting_type': 'dart', 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 5}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 91783
amex: 0.7897888925784864
=== FINISHED FOLD 1/5 (5), log loss=0.2200078342638724, accuracy=0.9034134861575673, amex=0.7897888925784864 ===
time to run this fold: 1m, 32s
low score on amex, dropping loop
STARTING OPTIMIZATION iteration 6, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 12000, 'learning_rate': 0.13985719400576782, 'num_leaves': 2580, 'max_depth': 11, 'min_data_in_leaf': 800, 'max_bin': 190, 'lambda_l1': 45, 'lambda_l2': 60, 'min_gain_to_split': 6.485836227829765, 'bagging_fraction': 0.9000000000000001, 'bagging_freq': 5, 'feature_fraction': 0.8, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'gbdt'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting
[100]	valid_0's binary_logloss: 0.223163
[200]	valid_0's binary_logloss: 0.222288
predicting
count zerO: 91783
amex: 0.78

[32m[I 2022-09-09 11:01:52,897][0m Trial 5 finished with value: 0.7862605469898936 and parameters: {'n_estimators': 12000, 'learning_rate': 0.13985719400576782, 'num_leaves': 2580, 'max_depth': 11, 'min_data_in_leaf': 800, 'max_bin': 190, 'lambda_l1': 45, 'lambda_l2': 60, 'min_gain_to_split': 6.485836227829765, 'feature_fraction': 0.8, 'boosting_type': 'gbdt', 'bagging_fraction': 0.9000000000000001, 'bagging_freq': 5}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 458913
amex: 0.7813043016780714
=== FINISHED FOLD 5/5 (5), log loss=0.22323171137621067, accuracy=0.901309624980933, amex=0.7813043016780714 ===
time to run this fold: 16s
time to run folds: 1m, 20s
iteration logloss: 0.22163678470759085, iteration acc: 0.9020184631628496, Iteration amex: 0.7862605469898936
STARTING OPTIMIZATION iteration 7, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 2000, 'learning_rate': 0.142549577649394, 'num_leaves': 2800, 'max_depth': 3, 'min_data_in_leaf': 600, 'max_bin': 224, 'lambda_l1': 30, 'lambda_l2': 45, 'min_gain_to_split': 8.327826924074271, 'bagging_fraction': 0.55, 'bagging_freq': 1, 'feature_fraction': 0.9000000000000001, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'dart'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting




[100]	valid_0's binary_logloss: 0.241751
[200]	valid_0's binary_logloss: 0.232607
[300]	valid_0's binary_logloss: 0.227715
[400]	valid_0's binary_logloss: 0.22481
[500]	valid_0's binary_logloss: 0.22308
[600]	valid_0's binary_logloss: 0.222777
[700]	valid_0's binary_logloss: 0.22167
[800]	valid_0's binary_logloss: 0.221478
[900]	valid_0's binary_logloss: 0.220882
[1000]	valid_0's binary_logloss: 0.220679
[1100]	valid_0's binary_logloss: 0.220341
[1200]	valid_0's binary_logloss: 0.220274
[1300]	valid_0's binary_logloss: 0.220161
[1400]	valid_0's binary_logloss: 0.219928
[1500]	valid_0's binary_logloss: 0.22005
[1600]	valid_0's binary_logloss: 0.219812
[1700]	valid_0's binary_logloss: 0.219685
[1800]	valid_0's binary_logloss: 0.219694
[1900]	valid_0's binary_logloss: 0.219668
[2000]	valid_0's binary_logloss: 0.219736
predicting
count zerO: 91783
amex: 0.7914212593788614
=== FINISHED FOLD 1/5 (5), log loss=0.21973607187019378, accuracy=0.9033372193107656, amex=0.7914212593788614 ===
time 



[100]	valid_0's binary_logloss: 0.241468
[200]	valid_0's binary_logloss: 0.231628
[300]	valid_0's binary_logloss: 0.22664
[400]	valid_0's binary_logloss: 0.223549
[500]	valid_0's binary_logloss: 0.221802
[600]	valid_0's binary_logloss: 0.221453
[700]	valid_0's binary_logloss: 0.220442
[800]	valid_0's binary_logloss: 0.219957
[900]	valid_0's binary_logloss: 0.219421
[1000]	valid_0's binary_logloss: 0.219217
[1100]	valid_0's binary_logloss: 0.218999
[1200]	valid_0's binary_logloss: 0.21891
[1300]	valid_0's binary_logloss: 0.218778
[1400]	valid_0's binary_logloss: 0.218442
[1500]	valid_0's binary_logloss: 0.218588
[1600]	valid_0's binary_logloss: 0.218411
[1700]	valid_0's binary_logloss: 0.218254
[1800]	valid_0's binary_logloss: 0.21826
[1900]	valid_0's binary_logloss: 0.218198
[2000]	valid_0's binary_logloss: 0.218211
predicting
count zerO: 183566
amex: 0.7900524343792136
=== FINISHED FOLD 2/5 (5), log loss=0.21821066364572855, accuracy=0.9034570672128825, amex=0.7900524343792136 ===
tim



[100]	valid_0's binary_logloss: 0.242164
[200]	valid_0's binary_logloss: 0.232751
[300]	valid_0's binary_logloss: 0.227668
[400]	valid_0's binary_logloss: 0.224866
[500]	valid_0's binary_logloss: 0.222913
[600]	valid_0's binary_logloss: 0.222735
[700]	valid_0's binary_logloss: 0.221771
[800]	valid_0's binary_logloss: 0.221416
[900]	valid_0's binary_logloss: 0.220825
[1000]	valid_0's binary_logloss: 0.220597
[1100]	valid_0's binary_logloss: 0.220341
[1200]	valid_0's binary_logloss: 0.22022
[1300]	valid_0's binary_logloss: 0.220122
[1400]	valid_0's binary_logloss: 0.219847
[1500]	valid_0's binary_logloss: 0.219875
[1600]	valid_0's binary_logloss: 0.219604
[1700]	valid_0's binary_logloss: 0.219443
[1800]	valid_0's binary_logloss: 0.219367
[1900]	valid_0's binary_logloss: 0.219394
[2000]	valid_0's binary_logloss: 0.219343
predicting
count zerO: 275349
amex: 0.7895136674583785
=== FINISHED FOLD 3/5 (5), log loss=0.21934328169778278, accuracy=0.9023457503023435, amex=0.7895136674583785 ===
t



[100]	valid_0's binary_logloss: 0.241773
[200]	valid_0's binary_logloss: 0.232076
[300]	valid_0's binary_logloss: 0.226875
[400]	valid_0's binary_logloss: 0.224084
[500]	valid_0's binary_logloss: 0.222162
[600]	valid_0's binary_logloss: 0.22186
[700]	valid_0's binary_logloss: 0.220813
[800]	valid_0's binary_logloss: 0.220299
[900]	valid_0's binary_logloss: 0.219742
[1000]	valid_0's binary_logloss: 0.219448
[1100]	valid_0's binary_logloss: 0.219215
[1200]	valid_0's binary_logloss: 0.219231
[1300]	valid_0's binary_logloss: 0.219106
[1400]	valid_0's binary_logloss: 0.218845
[1500]	valid_0's binary_logloss: 0.218834
[1600]	valid_0's binary_logloss: 0.218654
[1700]	valid_0's binary_logloss: 0.21855
[1800]	valid_0's binary_logloss: 0.218595
[1900]	valid_0's binary_logloss: 0.218589
[2000]	valid_0's binary_logloss: 0.218565
predicting
count zerO: 367131
amex: 0.7937313743356638
=== FINISHED FOLD 4/5 (5), log loss=0.21856486809284933, accuracy=0.9026715477980432, amex=0.7937313743356638 ===
ti



[100]	valid_0's binary_logloss: 0.243649
[200]	valid_0's binary_logloss: 0.234211
[300]	valid_0's binary_logloss: 0.229138
[400]	valid_0's binary_logloss: 0.226372
[500]	valid_0's binary_logloss: 0.224309
[600]	valid_0's binary_logloss: 0.224081
[700]	valid_0's binary_logloss: 0.223126
[800]	valid_0's binary_logloss: 0.222862
[900]	valid_0's binary_logloss: 0.222339
[1000]	valid_0's binary_logloss: 0.222022
[1100]	valid_0's binary_logloss: 0.221665
[1200]	valid_0's binary_logloss: 0.221549
[1300]	valid_0's binary_logloss: 0.221333
[1400]	valid_0's binary_logloss: 0.221163
[1500]	valid_0's binary_logloss: 0.221235
[1600]	valid_0's binary_logloss: 0.221066
[1700]	valid_0's binary_logloss: 0.220927
[1800]	valid_0's binary_logloss: 0.220853
[1900]	valid_0's binary_logloss: 0.220756
[2000]	valid_0's binary_logloss: 0.220834
predicting


[32m[I 2022-09-09 11:08:27,426][0m Trial 6 finished with value: 0.7899665073129227 and parameters: {'n_estimators': 12000, 'learning_rate': 0.142549577649394, 'num_leaves': 2800, 'max_depth': 3, 'min_data_in_leaf': 600, 'max_bin': 224, 'lambda_l1': 30, 'lambda_l2': 45, 'min_gain_to_split': 8.327826924074271, 'feature_fraction': 0.9000000000000001, 'boosting_type': 'dart', 'bagging_fraction': 0.55, 'bagging_freq': 1}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 458913
amex: 0.7851138010124958
=== FINISHED FOLD 5/5 (5), log loss=0.22083352380785282, accuracy=0.9028785600662439, amex=0.7851138010124958 ===
time to run this fold: 1m, 18s
time to run folds: 6m, 34s
iteration logloss: 0.21933768182288144, iteration acc: 0.9029380289380559, Iteration amex: 0.7899665073129227
STARTING OPTIMIZATION iteration 8, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 2000, 'learning_rate': 0.07184862285602481, 'num_leaves': 880, 'max_depth': 4, 'min_data_in_leaf': 600, 'max_bin': 103, 'lambda_l1': 100, 'lambda_l2': 80, 'min_gain_to_split': 3.943091149952671, 'bagging_fraction': 0.65, 'bagging_freq': 5, 'feature_fraction': 0.45, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'dart'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting




[100]	valid_0's binary_logloss: 0.262589
[200]	valid_0's binary_logloss: 0.24419
[300]	valid_0's binary_logloss: 0.235214
[400]	valid_0's binary_logloss: 0.230817
[500]	valid_0's binary_logloss: 0.228001
[600]	valid_0's binary_logloss: 0.227521
[700]	valid_0's binary_logloss: 0.226031
[800]	valid_0's binary_logloss: 0.225301
[900]	valid_0's binary_logloss: 0.224377
[1000]	valid_0's binary_logloss: 0.223974
[1100]	valid_0's binary_logloss: 0.223467
[1200]	valid_0's binary_logloss: 0.223308
[1300]	valid_0's binary_logloss: 0.222998
[1400]	valid_0's binary_logloss: 0.222394
[1500]	valid_0's binary_logloss: 0.222387
[1600]	valid_0's binary_logloss: 0.221982
[1700]	valid_0's binary_logloss: 0.221692
[1800]	valid_0's binary_logloss: 0.221556
[1900]	valid_0's binary_logloss: 0.221412
[2000]	valid_0's binary_logloss: 0.221343
predicting


[32m[I 2022-09-09 11:10:27,267][0m Trial 7 finished with value: 0.7878625125723913 and parameters: {'n_estimators': 12000, 'learning_rate': 0.07184862285602481, 'num_leaves': 880, 'max_depth': 4, 'min_data_in_leaf': 600, 'max_bin': 103, 'lambda_l1': 100, 'lambda_l2': 80, 'min_gain_to_split': 3.943091149952671, 'feature_fraction': 0.45, 'boosting_type': 'dart', 'bagging_fraction': 0.65, 'bagging_freq': 5}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 91783
amex: 0.7878625125723913
=== FINISHED FOLD 1/5 (5), log loss=0.22134282959475213, accuracy=0.9032609524639639, amex=0.7878625125723913 ===
time to run this fold: 1m, 59s
low score on amex, dropping loop
STARTING OPTIMIZATION iteration 9, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 12000, 'learning_rate': 0.2753927301710471, 'num_leaves': 500, 'max_depth': 11, 'min_data_in_leaf': 7700, 'max_bin': 244, 'lambda_l1': 30, 'lambda_l2': 100, 'min_gain_to_split': 4.4505702920684325, 'bagging_fraction': 0.2, 'bagging_freq': 10, 'feature_fraction': 0.8, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'gbdt'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting
[100]	valid_0's binary_logloss: 0.229446
[200]	valid_0's binary_logloss: 0.228077
predicting
count zerO: 91783
amex: 0.77741323572356

[32m[I 2022-09-09 11:11:30,334][0m Trial 8 finished with value: 0.7772901900930114 and parameters: {'n_estimators': 12000, 'learning_rate': 0.2753927301710471, 'num_leaves': 500, 'max_depth': 11, 'min_data_in_leaf': 7700, 'max_bin': 244, 'lambda_l1': 30, 'lambda_l2': 100, 'min_gain_to_split': 4.4505702920684325, 'feature_fraction': 0.8, 'boosting_type': 'gbdt', 'bagging_fraction': 0.2, 'bagging_freq': 10}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 458913
amex: 0.7726890288676567
=== FINISHED FOLD 5/5 (5), log loss=0.2299031174224526, accuracy=0.8980519056024057, amex=0.7726890288676567 ===
time to run this fold: 12s
time to run folds: 1m, 3s
iteration logloss: 0.2275591815754078, iteration acc: 0.8988457504413507, Iteration amex: 0.7772901900930114
STARTING OPTIMIZATION iteration 10, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 2000, 'learning_rate': 0.24808427782994535, 'num_leaves': 1400, 'max_depth': 10, 'min_data_in_leaf': 1400, 'max_bin': 229, 'lambda_l1': 35, 'lambda_l2': 25, 'min_gain_to_split': 13.280226185266113, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.65, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'dart'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting




[100]	valid_0's binary_logloss: 0.226735
[200]	valid_0's binary_logloss: 0.223488
[300]	valid_0's binary_logloss: 0.222417
[400]	valid_0's binary_logloss: 0.221624
[500]	valid_0's binary_logloss: 0.221269
[600]	valid_0's binary_logloss: 0.221304
[700]	valid_0's binary_logloss: 0.221045
[800]	valid_0's binary_logloss: 0.221159
[900]	valid_0's binary_logloss: 0.221127
[1000]	valid_0's binary_logloss: 0.220972
[1100]	valid_0's binary_logloss: 0.220967
[1200]	valid_0's binary_logloss: 0.221123
[1300]	valid_0's binary_logloss: 0.220989
[1400]	valid_0's binary_logloss: 0.221128
[1500]	valid_0's binary_logloss: 0.221069
[1600]	valid_0's binary_logloss: 0.221209
[1700]	valid_0's binary_logloss: 0.221079
[1800]	valid_0's binary_logloss: 0.221124
[1900]	valid_0's binary_logloss: 0.22111
[2000]	valid_0's binary_logloss: 0.221084
predicting


[32m[I 2022-09-09 11:12:44,890][0m Trial 9 finished with value: 0.788966496126196 and parameters: {'n_estimators': 12000, 'learning_rate': 0.24808427782994535, 'num_leaves': 1400, 'max_depth': 10, 'min_data_in_leaf': 1400, 'max_bin': 229, 'lambda_l1': 35, 'lambda_l2': 25, 'min_gain_to_split': 13.280226185266113, 'feature_fraction': 0.65, 'boosting_type': 'dart', 'bagging_fraction': 0.5, 'bagging_freq': 1}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 91783
amex: 0.788966496126196
=== FINISHED FOLD 1/5 (5), log loss=0.22108413329472892, accuracy=0.9024764934682893, amex=0.788966496126196 ===
time to run this fold: 1m, 14s
low score on amex, dropping loop
STARTING OPTIMIZATION iteration 11, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 2000, 'learning_rate': 0.297927410180956, 'num_leaves': 120, 'max_depth': 8, 'min_data_in_leaf': 10000, 'max_bin': 149, 'lambda_l1': 5, 'lambda_l2': 0, 'min_gain_to_split': 0.5879978189714938, 'bagging_fraction': 0.95, 'bagging_freq': 10, 'feature_fraction': 0.2, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'dart'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting




[100]	valid_0's binary_logloss: 0.224422
[200]	valid_0's binary_logloss: 0.220871
[300]	valid_0's binary_logloss: 0.21913
[400]	valid_0's binary_logloss: 0.218511
[500]	valid_0's binary_logloss: 0.218303
[600]	valid_0's binary_logloss: 0.218117
[700]	valid_0's binary_logloss: 0.217941
[800]	valid_0's binary_logloss: 0.218146
[900]	valid_0's binary_logloss: 0.218236
[1000]	valid_0's binary_logloss: 0.218375
[1100]	valid_0's binary_logloss: 0.218534
[1200]	valid_0's binary_logloss: 0.218293
[1300]	valid_0's binary_logloss: 0.218387
[1400]	valid_0's binary_logloss: 0.218583
[1500]	valid_0's binary_logloss: 0.218461
[1600]	valid_0's binary_logloss: 0.218645
[1700]	valid_0's binary_logloss: 0.218974
[1800]	valid_0's binary_logloss: 0.219031
[1900]	valid_0's binary_logloss: 0.219218
[2000]	valid_0's binary_logloss: 0.219212
predicting
count zerO: 91783
amex: 0.7915647647797959
=== FINISHED FOLD 1/5 (5), log loss=0.21921245568870207, accuracy=0.9040345161958097, amex=0.7915647647797959 ===
ti



[100]	valid_0's binary_logloss: 0.22247
[200]	valid_0's binary_logloss: 0.218602
[300]	valid_0's binary_logloss: 0.216777
[400]	valid_0's binary_logloss: 0.215957
[500]	valid_0's binary_logloss: 0.215574
[600]	valid_0's binary_logloss: 0.215549
[700]	valid_0's binary_logloss: 0.215467
[800]	valid_0's binary_logloss: 0.215541
[900]	valid_0's binary_logloss: 0.216109
[1000]	valid_0's binary_logloss: 0.2164
[1100]	valid_0's binary_logloss: 0.216416
[1200]	valid_0's binary_logloss: 0.216376
[1300]	valid_0's binary_logloss: 0.21665
[1400]	valid_0's binary_logloss: 0.217196
[1500]	valid_0's binary_logloss: 0.217215
[1600]	valid_0's binary_logloss: 0.217403
[1700]	valid_0's binary_logloss: 0.217726
[1800]	valid_0's binary_logloss: 0.217861
[1900]	valid_0's binary_logloss: 0.218061
[2000]	valid_0's binary_logloss: 0.218135
predicting
count zerO: 183566
amex: 0.7897935019271123
=== FINISHED FOLD 2/5 (5), log loss=0.2181349430112848, accuracy=0.9040998877787826, amex=0.7897935019271123 ===
time 



[100]	valid_0's binary_logloss: 0.224038
[200]	valid_0's binary_logloss: 0.220265
[300]	valid_0's binary_logloss: 0.218357
[400]	valid_0's binary_logloss: 0.217403
[500]	valid_0's binary_logloss: 0.216777
[600]	valid_0's binary_logloss: 0.216641
[700]	valid_0's binary_logloss: 0.216836
[800]	valid_0's binary_logloss: 0.216913
[900]	valid_0's binary_logloss: 0.216965
[1000]	valid_0's binary_logloss: 0.217234
[1100]	valid_0's binary_logloss: 0.217293
[1200]	valid_0's binary_logloss: 0.217337
[1300]	valid_0's binary_logloss: 0.217444
[1400]	valid_0's binary_logloss: 0.217508
[1500]	valid_0's binary_logloss: 0.217657
[1600]	valid_0's binary_logloss: 0.217794
[1700]	valid_0's binary_logloss: 0.218023
[1800]	valid_0's binary_logloss: 0.218229
[1900]	valid_0's binary_logloss: 0.218583
[2000]	valid_0's binary_logloss: 0.218743
predicting
count zerO: 275349
amex: 0.7896041565355156
=== FINISHED FOLD 3/5 (5), log loss=0.21874255935236972, accuracy=0.9028033513831537, amex=0.7896041565355156 ===




[100]	valid_0's binary_logloss: 0.223345
[200]	valid_0's binary_logloss: 0.219123
[300]	valid_0's binary_logloss: 0.21783
[400]	valid_0's binary_logloss: 0.216809
[500]	valid_0's binary_logloss: 0.216458
[600]	valid_0's binary_logloss: 0.216458
[700]	valid_0's binary_logloss: 0.216247
[800]	valid_0's binary_logloss: 0.216414
[900]	valid_0's binary_logloss: 0.216723
[1000]	valid_0's binary_logloss: 0.216586
[1100]	valid_0's binary_logloss: 0.216875
[1200]	valid_0's binary_logloss: 0.216865
[1300]	valid_0's binary_logloss: 0.216988
[1400]	valid_0's binary_logloss: 0.217199
[1500]	valid_0's binary_logloss: 0.217434
[1600]	valid_0's binary_logloss: 0.217674
[1700]	valid_0's binary_logloss: 0.21782
[1800]	valid_0's binary_logloss: 0.218071
[1900]	valid_0's binary_logloss: 0.21819
[2000]	valid_0's binary_logloss: 0.218316
predicting
count zerO: 367131
amex: 0.7937820733220686
=== FINISHED FOLD 4/5 (5), log loss=0.2183155475424911, accuracy=0.903880935259637, amex=0.7937820733220686 ===
time 



[100]	valid_0's binary_logloss: 0.225388
[200]	valid_0's binary_logloss: 0.221627
[300]	valid_0's binary_logloss: 0.220041
[400]	valid_0's binary_logloss: 0.219238
[500]	valid_0's binary_logloss: 0.218939
[600]	valid_0's binary_logloss: 0.218665
[700]	valid_0's binary_logloss: 0.21883
[800]	valid_0's binary_logloss: 0.219069
[900]	valid_0's binary_logloss: 0.219533
[1000]	valid_0's binary_logloss: 0.21955
[1100]	valid_0's binary_logloss: 0.21957
[1200]	valid_0's binary_logloss: 0.219673
[1300]	valid_0's binary_logloss: 0.219809
[1400]	valid_0's binary_logloss: 0.219968
[1500]	valid_0's binary_logloss: 0.219926
[1600]	valid_0's binary_logloss: 0.220229
[1700]	valid_0's binary_logloss: 0.220445
[1800]	valid_0's binary_logloss: 0.220636
[1900]	valid_0's binary_logloss: 0.220722
[2000]	valid_0's binary_logloss: 0.220966
predicting


[32m[I 2022-09-09 11:26:28,720][0m Trial 10 finished with value: 0.7901124226536633 and parameters: {'n_estimators': 12000, 'learning_rate': 0.297927410180956, 'num_leaves': 120, 'max_depth': 8, 'min_data_in_leaf': 10000, 'max_bin': 149, 'lambda_l1': 5, 'lambda_l2': 0, 'min_gain_to_split': 0.5879978189714938, 'feature_fraction': 0.2, 'boosting_type': 'dart', 'bagging_fraction': 0.95, 'bagging_freq': 10}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 458913
amex: 0.7858176167038242
=== FINISHED FOLD 5/5 (5), log loss=0.22096625127341493, accuracy=0.902497221677453, amex=0.7858176167038242 ===
time to run this fold: 2m, 46s
time to run folds: 13m, 43s
iteration logloss: 0.21907435137365253, iteration acc: 0.9034631824589672, Iteration amex: 0.7901124226536633
STARTING OPTIMIZATION iteration 12, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 2000, 'learning_rate': 0.2957245449189293, 'num_leaves': 20, 'max_depth': 8, 'min_data_in_leaf': 9900, 'max_bin': 156, 'lambda_l1': 0, 'lambda_l2': 0, 'min_gain_to_split': 0.20312932401004247, 'bagging_fraction': 0.95, 'bagging_freq': 10, 'feature_fraction': 0.35000000000000003, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'dart'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting




[100]	valid_0's binary_logloss: 0.223672
[200]	valid_0's binary_logloss: 0.22018
[300]	valid_0's binary_logloss: 0.218827
[400]	valid_0's binary_logloss: 0.218058
[500]	valid_0's binary_logloss: 0.217839
[600]	valid_0's binary_logloss: 0.217779
[700]	valid_0's binary_logloss: 0.217668
[800]	valid_0's binary_logloss: 0.217614
[900]	valid_0's binary_logloss: 0.217676
[1000]	valid_0's binary_logloss: 0.217655
[1100]	valid_0's binary_logloss: 0.217642
[1200]	valid_0's binary_logloss: 0.21803
[1300]	valid_0's binary_logloss: 0.218253
[1400]	valid_0's binary_logloss: 0.218605
[1500]	valid_0's binary_logloss: 0.218734
[1600]	valid_0's binary_logloss: 0.218871
[1700]	valid_0's binary_logloss: 0.219149
[1800]	valid_0's binary_logloss: 0.219413
[1900]	valid_0's binary_logloss: 0.219758
[2000]	valid_0's binary_logloss: 0.219975
predicting


[32m[I 2022-09-09 11:29:13,414][0m Trial 11 finished with value: 0.7886049642789308 and parameters: {'n_estimators': 12000, 'learning_rate': 0.2957245449189293, 'num_leaves': 20, 'max_depth': 8, 'min_data_in_leaf': 9900, 'max_bin': 156, 'lambda_l1': 0, 'lambda_l2': 0, 'min_gain_to_split': 0.20312932401004247, 'feature_fraction': 0.35000000000000003, 'boosting_type': 'dart', 'bagging_fraction': 0.95, 'bagging_freq': 10}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 91783
amex: 0.7886049642789308
=== FINISHED FOLD 1/5 (5), log loss=0.21997532693873334, accuracy=0.9035333340596843, amex=0.7886049642789308 ===
time to run this fold: 2m, 44s
low score on amex, dropping loop
STARTING OPTIMIZATION iteration 13, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 2000, 'learning_rate': 0.2153747549609356, 'num_leaves': 40, 'max_depth': 8, 'min_data_in_leaf': 10000, 'max_bin': 133, 'lambda_l1': 10, 'lambda_l2': 5, 'min_gain_to_split': 0.058912015315847555, 'bagging_fraction': 0.8, 'bagging_freq': 10, 'feature_fraction': 0.2, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'dart'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting




[100]	valid_0's binary_logloss: 0.22777
[200]	valid_0's binary_logloss: 0.223168
[300]	valid_0's binary_logloss: 0.220625
[400]	valid_0's binary_logloss: 0.219745
[500]	valid_0's binary_logloss: 0.219143
[600]	valid_0's binary_logloss: 0.218842
[700]	valid_0's binary_logloss: 0.218358
[800]	valid_0's binary_logloss: 0.217955
[900]	valid_0's binary_logloss: 0.217889
[1000]	valid_0's binary_logloss: 0.217758
[1100]	valid_0's binary_logloss: 0.217783
[1200]	valid_0's binary_logloss: 0.217667
[1300]	valid_0's binary_logloss: 0.217601
[1400]	valid_0's binary_logloss: 0.217576
[1500]	valid_0's binary_logloss: 0.217657
[1600]	valid_0's binary_logloss: 0.217486
[1700]	valid_0's binary_logloss: 0.217761
[1800]	valid_0's binary_logloss: 0.217904
[1900]	valid_0's binary_logloss: 0.218049
[2000]	valid_0's binary_logloss: 0.218088
predicting
count zerO: 91783
amex: 0.7931182983825031
=== FINISHED FOLD 1/5 (5), log loss=0.21808758767825315, accuracy=0.9046991272893673, amex=0.7931182983825031 ===
ti



[100]	valid_0's binary_logloss: 0.226239
[200]	valid_0's binary_logloss: 0.220779
[300]	valid_0's binary_logloss: 0.218275
[400]	valid_0's binary_logloss: 0.217125
[500]	valid_0's binary_logloss: 0.216161
[600]	valid_0's binary_logloss: 0.216147
[700]	valid_0's binary_logloss: 0.215769
[800]	valid_0's binary_logloss: 0.215675
[900]	valid_0's binary_logloss: 0.215615
[1000]	valid_0's binary_logloss: 0.215734
[1100]	valid_0's binary_logloss: 0.215725
[1200]	valid_0's binary_logloss: 0.215524
[1300]	valid_0's binary_logloss: 0.215543
[1400]	valid_0's binary_logloss: 0.215609
[1500]	valid_0's binary_logloss: 0.215701
[1600]	valid_0's binary_logloss: 0.21584
[1700]	valid_0's binary_logloss: 0.215861
[1800]	valid_0's binary_logloss: 0.21581
[1900]	valid_0's binary_logloss: 0.216055
[2000]	valid_0's binary_logloss: 0.216277
predicting
count zerO: 183566
amex: 0.7923013081169783
=== FINISHED FOLD 2/5 (5), log loss=0.21627734584408623, accuracy=0.9041216783064402, amex=0.7923013081169783 ===
ti



[100]	valid_0's binary_logloss: 0.227744
[200]	valid_0's binary_logloss: 0.222831
[300]	valid_0's binary_logloss: 0.220259
[400]	valid_0's binary_logloss: 0.218774
[500]	valid_0's binary_logloss: 0.217961
[600]	valid_0's binary_logloss: 0.217616
[700]	valid_0's binary_logloss: 0.216975
[800]	valid_0's binary_logloss: 0.216858
[900]	valid_0's binary_logloss: 0.216699
[1000]	valid_0's binary_logloss: 0.216821
[1100]	valid_0's binary_logloss: 0.216799
[1200]	valid_0's binary_logloss: 0.216913
[1300]	valid_0's binary_logloss: 0.217076
[1400]	valid_0's binary_logloss: 0.217091
[1500]	valid_0's binary_logloss: 0.217131
[1600]	valid_0's binary_logloss: 0.217191
[1700]	valid_0's binary_logloss: 0.217197
[1800]	valid_0's binary_logloss: 0.217291
[1900]	valid_0's binary_logloss: 0.217356
[2000]	valid_0's binary_logloss: 0.217487
predicting
count zerO: 275349
amex: 0.7929097649667705
=== FINISHED FOLD 3/5 (5), log loss=0.217486599520607, accuracy=0.9033045335192792, amex=0.7929097649667705 ===
ti



[100]	valid_0's binary_logloss: 0.227544
[200]	valid_0's binary_logloss: 0.221875
[300]	valid_0's binary_logloss: 0.219367
[400]	valid_0's binary_logloss: 0.217875
[500]	valid_0's binary_logloss: 0.217151
[600]	valid_0's binary_logloss: 0.217034
[700]	valid_0's binary_logloss: 0.216618
[800]	valid_0's binary_logloss: 0.21649
[900]	valid_0's binary_logloss: 0.216285
[1000]	valid_0's binary_logloss: 0.216266
[1100]	valid_0's binary_logloss: 0.216199
[1200]	valid_0's binary_logloss: 0.216228
[1300]	valid_0's binary_logloss: 0.216424
[1400]	valid_0's binary_logloss: 0.216183
[1500]	valid_0's binary_logloss: 0.21607
[1600]	valid_0's binary_logloss: 0.216215
[1700]	valid_0's binary_logloss: 0.216412
[1800]	valid_0's binary_logloss: 0.216548
[1900]	valid_0's binary_logloss: 0.21677
[2000]	valid_0's binary_logloss: 0.216832
predicting
count zerO: 367131
amex: 0.7961273657571479
=== FINISHED FOLD 4/5 (5), log loss=0.2168320760150427, accuracy=0.9043821228563335, amex=0.7961273657571479 ===
time



[100]	valid_0's binary_logloss: 0.229294
[200]	valid_0's binary_logloss: 0.224013
[300]	valid_0's binary_logloss: 0.221463
[400]	valid_0's binary_logloss: 0.220078
[500]	valid_0's binary_logloss: 0.21938
[600]	valid_0's binary_logloss: 0.219292
[700]	valid_0's binary_logloss: 0.218948
[800]	valid_0's binary_logloss: 0.218897
[900]	valid_0's binary_logloss: 0.218693
[1000]	valid_0's binary_logloss: 0.218611
[1100]	valid_0's binary_logloss: 0.218482
[1200]	valid_0's binary_logloss: 0.218353
[1300]	valid_0's binary_logloss: 0.218361
[1400]	valid_0's binary_logloss: 0.218327
[1500]	valid_0's binary_logloss: 0.21839
[1600]	valid_0's binary_logloss: 0.218411
[1700]	valid_0's binary_logloss: 0.218433
[1800]	valid_0's binary_logloss: 0.218594
[1900]	valid_0's binary_logloss: 0.218702
[2000]	valid_0's binary_logloss: 0.218834
predicting


[32m[I 2022-09-09 11:42:13,971][0m Trial 12 finished with value: 0.7923910740319678 and parameters: {'n_estimators': 12000, 'learning_rate': 0.2153747549609356, 'num_leaves': 40, 'max_depth': 8, 'min_data_in_leaf': 10000, 'max_bin': 133, 'lambda_l1': 10, 'lambda_l2': 5, 'min_gain_to_split': 0.058912015315847555, 'feature_fraction': 0.2, 'boosting_type': 'dart', 'bagging_fraction': 0.8, 'bagging_freq': 10}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 458913
amex: 0.7874986329364393
=== FINISHED FOLD 5/5 (5), log loss=0.21883365607722646, accuracy=0.903510492253383, amex=0.7874986329364393 ===
time to run this fold: 2m, 35s
time to run folds: 13m, 0s
iteration logloss: 0.21750345302704313, iteration acc: 0.9040035908449606, Iteration amex: 0.7923910740319678
STARTING OPTIMIZATION iteration 14, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 2000, 'learning_rate': 0.19710663826339306, 'num_leaves': 740, 'max_depth': 9, 'min_data_in_leaf': 8100, 'max_bin': 116, 'lambda_l1': 15, 'lambda_l2': 20, 'min_gain_to_split': 1.5366657578893324, 'bagging_fraction': 0.75, 'bagging_freq': 5, 'feature_fraction': 0.2, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'dart'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting




[100]	valid_0's binary_logloss: 0.228894
[200]	valid_0's binary_logloss: 0.223273
[300]	valid_0's binary_logloss: 0.220685
[400]	valid_0's binary_logloss: 0.219443
[500]	valid_0's binary_logloss: 0.218669
[600]	valid_0's binary_logloss: 0.218598
[700]	valid_0's binary_logloss: 0.217925
[800]	valid_0's binary_logloss: 0.217792
[900]	valid_0's binary_logloss: 0.217533
[1000]	valid_0's binary_logloss: 0.217441
[1100]	valid_0's binary_logloss: 0.217382
[1200]	valid_0's binary_logloss: 0.217389
[1300]	valid_0's binary_logloss: 0.217478
[1400]	valid_0's binary_logloss: 0.217357
[1500]	valid_0's binary_logloss: 0.217335
[1600]	valid_0's binary_logloss: 0.217373
[1700]	valid_0's binary_logloss: 0.217388
[1800]	valid_0's binary_logloss: 0.2174
[1900]	valid_0's binary_logloss: 0.217399
[2000]	valid_0's binary_logloss: 0.217433
predicting
count zerO: 91783
amex: 0.7933572769478512
=== FINISHED FOLD 1/5 (5), log loss=0.21743264622989536, accuracy=0.904132573570269, amex=0.7933572769478512 ===
time



[100]	valid_0's binary_logloss: 0.22773
[200]	valid_0's binary_logloss: 0.221527
[300]	valid_0's binary_logloss: 0.218475
[400]	valid_0's binary_logloss: 0.216928
[500]	valid_0's binary_logloss: 0.216287
[600]	valid_0's binary_logloss: 0.216292
[700]	valid_0's binary_logloss: 0.215729
[800]	valid_0's binary_logloss: 0.215668
[900]	valid_0's binary_logloss: 0.215487
[1000]	valid_0's binary_logloss: 0.2155
[1100]	valid_0's binary_logloss: 0.215237
[1200]	valid_0's binary_logloss: 0.215176
[1300]	valid_0's binary_logloss: 0.215143
[1400]	valid_0's binary_logloss: 0.215102
[1500]	valid_0's binary_logloss: 0.215025
[1600]	valid_0's binary_logloss: 0.21498
[1700]	valid_0's binary_logloss: 0.214974
[1800]	valid_0's binary_logloss: 0.214984
[1900]	valid_0's binary_logloss: 0.215
[2000]	valid_0's binary_logloss: 0.215108
predicting
count zerO: 183566
amex: 0.7935053350578316
=== FINISHED FOLD 2/5 (5), log loss=0.21510763217631107, accuracy=0.9056470152424742, amex=0.7935053350578316 ===
time to



[100]	valid_0's binary_logloss: 0.228849
[200]	valid_0's binary_logloss: 0.222657
[300]	valid_0's binary_logloss: 0.219914
[400]	valid_0's binary_logloss: 0.218526
[500]	valid_0's binary_logloss: 0.217672
[600]	valid_0's binary_logloss: 0.217559
[700]	valid_0's binary_logloss: 0.217259
[800]	valid_0's binary_logloss: 0.21722
[900]	valid_0's binary_logloss: 0.217121
[1000]	valid_0's binary_logloss: 0.216938
[1100]	valid_0's binary_logloss: 0.216908
[1200]	valid_0's binary_logloss: 0.216937
[1300]	valid_0's binary_logloss: 0.216868
[1400]	valid_0's binary_logloss: 0.21679
[1500]	valid_0's binary_logloss: 0.216791
[1600]	valid_0's binary_logloss: 0.216876
[1700]	valid_0's binary_logloss: 0.216886
[1800]	valid_0's binary_logloss: 0.216967
[1900]	valid_0's binary_logloss: 0.216983
[2000]	valid_0's binary_logloss: 0.21689
predicting
count zerO: 275349
amex: 0.7923756556733537
=== FINISHED FOLD 3/5 (5), log loss=0.2168904092877448, accuracy=0.9037076582809452, amex=0.7923756556733537 ===
time



[100]	valid_0's binary_logloss: 0.22778
[200]	valid_0's binary_logloss: 0.221807
[300]	valid_0's binary_logloss: 0.219001
[400]	valid_0's binary_logloss: 0.217852
[500]	valid_0's binary_logloss: 0.216903
[600]	valid_0's binary_logloss: 0.216594
[700]	valid_0's binary_logloss: 0.216102
[800]	valid_0's binary_logloss: 0.216046
[900]	valid_0's binary_logloss: 0.215795
[1000]	valid_0's binary_logloss: 0.215871
[1100]	valid_0's binary_logloss: 0.215651
[1200]	valid_0's binary_logloss: 0.215711
[1300]	valid_0's binary_logloss: 0.215716
[1400]	valid_0's binary_logloss: 0.215611
[1500]	valid_0's binary_logloss: 0.215811
[1600]	valid_0's binary_logloss: 0.215786
[1700]	valid_0's binary_logloss: 0.215742
[1800]	valid_0's binary_logloss: 0.215669
[1900]	valid_0's binary_logloss: 0.215554
[2000]	valid_0's binary_logloss: 0.215481
predicting
count zerO: 367131
amex: 0.7979877485893819
=== FINISHED FOLD 4/5 (5), log loss=0.2154810324337909, accuracy=0.9048397289228824, amex=0.7979877485893819 ===
ti



[100]	valid_0's binary_logloss: 0.230261
[200]	valid_0's binary_logloss: 0.224522
[300]	valid_0's binary_logloss: 0.221808
[400]	valid_0's binary_logloss: 0.220322
[500]	valid_0's binary_logloss: 0.219455
[600]	valid_0's binary_logloss: 0.219499
[700]	valid_0's binary_logloss: 0.21915
[800]	valid_0's binary_logloss: 0.219021
[900]	valid_0's binary_logloss: 0.21889
[1000]	valid_0's binary_logloss: 0.218781
[1100]	valid_0's binary_logloss: 0.218586
[1200]	valid_0's binary_logloss: 0.218373
[1300]	valid_0's binary_logloss: 0.218419
[1400]	valid_0's binary_logloss: 0.21825
[1500]	valid_0's binary_logloss: 0.218152
[1600]	valid_0's binary_logloss: 0.21818
[1700]	valid_0's binary_logloss: 0.218149
[1800]	valid_0's binary_logloss: 0.218192
[1900]	valid_0's binary_logloss: 0.21822
[2000]	valid_0's binary_logloss: 0.218324
predicting


[32m[I 2022-09-09 11:55:31,892][0m Trial 13 finished with value: 0.793005982382992 and parameters: {'n_estimators': 12000, 'learning_rate': 0.19710663826339306, 'num_leaves': 740, 'max_depth': 9, 'min_data_in_leaf': 8100, 'max_bin': 116, 'lambda_l1': 15, 'lambda_l2': 20, 'min_gain_to_split': 1.5366657578893324, 'feature_fraction': 0.2, 'boosting_type': 'dart', 'bagging_fraction': 0.75, 'bagging_freq': 5}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 458913
amex: 0.7878038956465416
=== FINISHED FOLD 5/5 (5), log loss=0.2183239682773132, accuracy=0.904153319823059, amex=0.7878038956465416 ===
time to run this fold: 2m, 38s
time to run folds: 13m, 17s
iteration logloss: 0.21664713768101107, iteration acc: 0.904496059167926, Iteration amex: 0.793005982382992
STARTING OPTIMIZATION iteration 15, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 2000, 'learning_rate': 0.20649827432251577, 'num_leaves': 820, 'max_depth': 10, 'min_data_in_leaf': 7600, 'max_bin': 109, 'lambda_l1': 65, 'lambda_l2': 25, 'min_gain_to_split': 6.259655318535742, 'bagging_fraction': 0.75, 'bagging_freq': 5, 'feature_fraction': 0.95, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'dart'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting




[100]	valid_0's binary_logloss: 0.229027
[200]	valid_0's binary_logloss: 0.224179
[300]	valid_0's binary_logloss: 0.221895
[400]	valid_0's binary_logloss: 0.220813
[500]	valid_0's binary_logloss: 0.220361
[600]	valid_0's binary_logloss: 0.220549
[700]	valid_0's binary_logloss: 0.220314
[800]	valid_0's binary_logloss: 0.220203
[900]	valid_0's binary_logloss: 0.220094
[1000]	valid_0's binary_logloss: 0.220049
[1100]	valid_0's binary_logloss: 0.22002
[1200]	valid_0's binary_logloss: 0.219925
[1300]	valid_0's binary_logloss: 0.219855
[1400]	valid_0's binary_logloss: 0.219918
[1500]	valid_0's binary_logloss: 0.219852
[1600]	valid_0's binary_logloss: 0.219873
[1700]	valid_0's binary_logloss: 0.219907
[1800]	valid_0's binary_logloss: 0.219933
[1900]	valid_0's binary_logloss: 0.219854
[2000]	valid_0's binary_logloss: 0.219806
predicting
count zerO: 91783
amex: 0.7908257912424128
=== FINISHED FOLD 1/5 (5), log loss=0.21980572209447646, accuracy=0.9035333340596843, amex=0.7908257912424128 ===
ti



[100]	valid_0's binary_logloss: 0.22776
[200]	valid_0's binary_logloss: 0.222137
[300]	valid_0's binary_logloss: 0.219754
[400]	valid_0's binary_logloss: 0.218731
[500]	valid_0's binary_logloss: 0.218351
[600]	valid_0's binary_logloss: 0.218703
[700]	valid_0's binary_logloss: 0.218234
[800]	valid_0's binary_logloss: 0.218202
[900]	valid_0's binary_logloss: 0.218105
[1000]	valid_0's binary_logloss: 0.218102
[1100]	valid_0's binary_logloss: 0.217991
[1200]	valid_0's binary_logloss: 0.218024
[1300]	valid_0's binary_logloss: 0.217984
[1400]	valid_0's binary_logloss: 0.218022
[1500]	valid_0's binary_logloss: 0.218096
[1600]	valid_0's binary_logloss: 0.218075
[1700]	valid_0's binary_logloss: 0.218027
[1800]	valid_0's binary_logloss: 0.218053
[1900]	valid_0's binary_logloss: 0.218044
[2000]	valid_0's binary_logloss: 0.21804
predicting
count zerO: 183566
amex: 0.7913691529971241
=== FINISHED FOLD 2/5 (5), log loss=0.21803992673922598, accuracy=0.9039146682936927, amex=0.7913691529971241 ===
ti



[100]	valid_0's binary_logloss: 0.228593
[200]	valid_0's binary_logloss: 0.223408
[300]	valid_0's binary_logloss: 0.221231
[400]	valid_0's binary_logloss: 0.220279
[500]	valid_0's binary_logloss: 0.219946
[600]	valid_0's binary_logloss: 0.220089
[700]	valid_0's binary_logloss: 0.219758
[800]	valid_0's binary_logloss: 0.219705
[900]	valid_0's binary_logloss: 0.219539
[1000]	valid_0's binary_logloss: 0.219529
[1100]	valid_0's binary_logloss: 0.219504
[1200]	valid_0's binary_logloss: 0.219543
[1300]	valid_0's binary_logloss: 0.219434
[1400]	valid_0's binary_logloss: 0.219398
[1500]	valid_0's binary_logloss: 0.219452
[1600]	valid_0's binary_logloss: 0.219518
[1700]	valid_0's binary_logloss: 0.219544
[1800]	valid_0's binary_logloss: 0.219382
[1900]	valid_0's binary_logloss: 0.219501
[2000]	valid_0's binary_logloss: 0.21954
predicting
count zerO: 275349
amex: 0.7889925224386913
=== FINISHED FOLD 3/5 (5), log loss=0.21954030514557413, accuracy=0.9030539424512165, amex=0.7889925224386913 ===
t



[100]	valid_0's binary_logloss: 0.228755
[200]	valid_0's binary_logloss: 0.223502
[300]	valid_0's binary_logloss: 0.221008
[400]	valid_0's binary_logloss: 0.219848
[500]	valid_0's binary_logloss: 0.219415
[600]	valid_0's binary_logloss: 0.21958
[700]	valid_0's binary_logloss: 0.219214
[800]	valid_0's binary_logloss: 0.219192
[900]	valid_0's binary_logloss: 0.219002
[1000]	valid_0's binary_logloss: 0.218971
[1100]	valid_0's binary_logloss: 0.218852
[1200]	valid_0's binary_logloss: 0.218774
[1300]	valid_0's binary_logloss: 0.218729
[1400]	valid_0's binary_logloss: 0.218756
[1500]	valid_0's binary_logloss: 0.218765
[1600]	valid_0's binary_logloss: 0.218734
[1700]	valid_0's binary_logloss: 0.218758
[1800]	valid_0's binary_logloss: 0.218796
[1900]	valid_0's binary_logloss: 0.218766
[2000]	valid_0's binary_logloss: 0.218768
predicting
count zerO: 367131
amex: 0.7927763002465171
=== FINISHED FOLD 4/5 (5), log loss=0.21876801348978786, accuracy=0.9027260247107276, amex=0.7927763002465171 ===
t



[100]	valid_0's binary_logloss: 0.230549
[200]	valid_0's binary_logloss: 0.225209
[300]	valid_0's binary_logloss: 0.223072
[400]	valid_0's binary_logloss: 0.222056
[500]	valid_0's binary_logloss: 0.221685
[600]	valid_0's binary_logloss: 0.221831
[700]	valid_0's binary_logloss: 0.221435
[800]	valid_0's binary_logloss: 0.221288
[900]	valid_0's binary_logloss: 0.221282
[1000]	valid_0's binary_logloss: 0.22122
[1100]	valid_0's binary_logloss: 0.221163
[1200]	valid_0's binary_logloss: 0.221055
[1300]	valid_0's binary_logloss: 0.221118
[1400]	valid_0's binary_logloss: 0.221136
[1500]	valid_0's binary_logloss: 0.22112
[1600]	valid_0's binary_logloss: 0.221053
[1700]	valid_0's binary_logloss: 0.221102
[1800]	valid_0's binary_logloss: 0.221117
[1900]	valid_0's binary_logloss: 0.221042
[2000]	valid_0's binary_logloss: 0.221017
predicting


[32m[I 2022-09-09 12:03:41,181][0m Trial 14 finished with value: 0.7896969967905139 and parameters: {'n_estimators': 12000, 'learning_rate': 0.20649827432251577, 'num_leaves': 820, 'max_depth': 10, 'min_data_in_leaf': 7600, 'max_bin': 109, 'lambda_l1': 65, 'lambda_l2': 25, 'min_gain_to_split': 6.259655318535742, 'feature_fraction': 0.95, 'boosting_type': 'dart', 'bagging_fraction': 0.75, 'bagging_freq': 5}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 458913
amex: 0.7845212170278235
=== FINISHED FOLD 5/5 (5), log loss=0.22101676425575387, accuracy=0.9023446863219368, amex=0.7845212170278235 ===
time to run this fold: 1m, 36s
time to run folds: 8m, 9s
iteration logloss: 0.21943414634496367, iteration acc: 0.9031145311674516, Iteration amex: 0.7896969967905139
STARTING OPTIMIZATION iteration 16, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 12000, 'learning_rate': 0.19925325946768593, 'num_leaves': 780, 'max_depth': 6, 'min_data_in_leaf': 5600, 'max_bin': 121, 'lambda_l1': 20, 'lambda_l2': 15, 'min_gain_to_split': 10.898747185655086, 'bagging_fraction': None, 'bagging_freq': None, 'feature_fraction': 0.5, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'goss'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting
[100]	valid_0's binary_log

[32m[I 2022-09-09 12:07:54,309][0m Trial 15 finished with value: 0.7864754362400911 and parameters: {'n_estimators': 12000, 'learning_rate': 0.19925325946768593, 'num_leaves': 780, 'max_depth': 6, 'min_data_in_leaf': 5600, 'max_bin': 121, 'lambda_l1': 20, 'lambda_l2': 15, 'min_gain_to_split': 10.898747185655086, 'feature_fraction': 0.5, 'boosting_type': 'goss'}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 458913
amex: 0.7836450304778164
=== FINISHED FOLD 5/5 (5), log loss=0.22263383322007468, accuracy=0.9018979756379246, amex=0.7836450304778164 ===
time to run this fold: 59s
time to run folds: 4m, 13s
iteration logloss: 0.22138198453926677, iteration acc: 0.9024608149815986, Iteration amex: 0.7864754362400911
STARTING OPTIMIZATION iteration 17, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 2000, 'learning_rate': 0.1792069686497563, 'num_leaves': 600, 'max_depth': 9, 'min_data_in_leaf': 4000, 'max_bin': 172, 'lambda_l1': 60, 'lambda_l2': 35, 'min_gain_to_split': 2.205514337418423, 'bagging_fraction': 0.75, 'bagging_freq': 5, 'feature_fraction': 0.30000000000000004, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'dart'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting




[100]	valid_0's binary_logloss: 0.229818
[200]	valid_0's binary_logloss: 0.224267
[300]	valid_0's binary_logloss: 0.221599
[400]	valid_0's binary_logloss: 0.22025
[500]	valid_0's binary_logloss: 0.219532
[600]	valid_0's binary_logloss: 0.219425
[700]	valid_0's binary_logloss: 0.218934
[800]	valid_0's binary_logloss: 0.218773
[900]	valid_0's binary_logloss: 0.218585
[1000]	valid_0's binary_logloss: 0.218582
[1100]	valid_0's binary_logloss: 0.218572
[1200]	valid_0's binary_logloss: 0.218611
[1300]	valid_0's binary_logloss: 0.218553
[1400]	valid_0's binary_logloss: 0.218424
[1500]	valid_0's binary_logloss: 0.218416
[1600]	valid_0's binary_logloss: 0.218401
[1700]	valid_0's binary_logloss: 0.218367
[1800]	valid_0's binary_logloss: 0.218287
[1900]	valid_0's binary_logloss: 0.218245
[2000]	valid_0's binary_logloss: 0.218181
predicting
count zerO: 91783
amex: 0.7930023765185292
=== FINISHED FOLD 1/5 (5), log loss=0.2181806906709857, accuracy=0.9042306309447283, amex=0.7930023765185292 ===
tim



[100]	valid_0's binary_logloss: 0.22884
[200]	valid_0's binary_logloss: 0.222697
[300]	valid_0's binary_logloss: 0.219333
[400]	valid_0's binary_logloss: 0.217725
[500]	valid_0's binary_logloss: 0.217109
[600]	valid_0's binary_logloss: 0.217195
[700]	valid_0's binary_logloss: 0.216751
[800]	valid_0's binary_logloss: 0.216722
[900]	valid_0's binary_logloss: 0.216498
[1000]	valid_0's binary_logloss: 0.216435
[1100]	valid_0's binary_logloss: 0.216252
[1200]	valid_0's binary_logloss: 0.216259
[1300]	valid_0's binary_logloss: 0.216229
[1400]	valid_0's binary_logloss: 0.216109
[1500]	valid_0's binary_logloss: 0.216184
[1600]	valid_0's binary_logloss: 0.216035
[1700]	valid_0's binary_logloss: 0.216017
[1800]	valid_0's binary_logloss: 0.216009
[1900]	valid_0's binary_logloss: 0.215962
[2000]	valid_0's binary_logloss: 0.215925
predicting
count zerO: 183566
amex: 0.7919441583468543
=== FINISHED FOLD 2/5 (5), log loss=0.21592543092574035, accuracy=0.9048080799276554, amex=0.7919441583468543 ===
t



[100]	valid_0's binary_logloss: 0.229879
[200]	valid_0's binary_logloss: 0.224009
[300]	valid_0's binary_logloss: 0.221204
[400]	valid_0's binary_logloss: 0.219772
[500]	valid_0's binary_logloss: 0.219015
[600]	valid_0's binary_logloss: 0.218931
[700]	valid_0's binary_logloss: 0.218608
[800]	valid_0's binary_logloss: 0.218495
[900]	valid_0's binary_logloss: 0.218218
[1000]	valid_0's binary_logloss: 0.218051
[1100]	valid_0's binary_logloss: 0.217918
[1200]	valid_0's binary_logloss: 0.217925
[1300]	valid_0's binary_logloss: 0.217846
[1400]	valid_0's binary_logloss: 0.217624
[1500]	valid_0's binary_logloss: 0.217671
[1600]	valid_0's binary_logloss: 0.217534
[1700]	valid_0's binary_logloss: 0.217515
[1800]	valid_0's binary_logloss: 0.217463
[1900]	valid_0's binary_logloss: 0.217393
[2000]	valid_0's binary_logloss: 0.217375
predicting
count zerO: 275349
amex: 0.7925217688027115
=== FINISHED FOLD 3/5 (5), log loss=0.21737455799692992, accuracy=0.9036313914341436, amex=0.7925217688027115 ===




[100]	valid_0's binary_logloss: 0.229752
[200]	valid_0's binary_logloss: 0.223451
[300]	valid_0's binary_logloss: 0.220524
[400]	valid_0's binary_logloss: 0.219021
[500]	valid_0's binary_logloss: 0.218168
[600]	valid_0's binary_logloss: 0.218202
[700]	valid_0's binary_logloss: 0.217632
[800]	valid_0's binary_logloss: 0.217448
[900]	valid_0's binary_logloss: 0.217174
[1000]	valid_0's binary_logloss: 0.21713
[1100]	valid_0's binary_logloss: 0.217068
[1200]	valid_0's binary_logloss: 0.217038
[1300]	valid_0's binary_logloss: 0.217045
[1400]	valid_0's binary_logloss: 0.216848
[1500]	valid_0's binary_logloss: 0.217025
[1600]	valid_0's binary_logloss: 0.216926
[1700]	valid_0's binary_logloss: 0.216824
[1800]	valid_0's binary_logloss: 0.216846
[1900]	valid_0's binary_logloss: 0.216772
[2000]	valid_0's binary_logloss: 0.216748
predicting
count zerO: 367131
amex: 0.7964007812290314
=== FINISHED FOLD 4/5 (5), log loss=0.21674835097092857, accuracy=0.9038373537294895, amex=0.7964007812290314 ===
t



[100]	valid_0's binary_logloss: 0.2315
[200]	valid_0's binary_logloss: 0.225373
[300]	valid_0's binary_logloss: 0.222791
[400]	valid_0's binary_logloss: 0.221311
[500]	valid_0's binary_logloss: 0.220513
[600]	valid_0's binary_logloss: 0.220657
[700]	valid_0's binary_logloss: 0.220177
[800]	valid_0's binary_logloss: 0.220065
[900]	valid_0's binary_logloss: 0.219825
[1000]	valid_0's binary_logloss: 0.21969
[1100]	valid_0's binary_logloss: 0.219501
[1200]	valid_0's binary_logloss: 0.21948
[1300]	valid_0's binary_logloss: 0.21944
[1400]	valid_0's binary_logloss: 0.219301
[1500]	valid_0's binary_logloss: 0.219305
[1600]	valid_0's binary_logloss: 0.219141
[1700]	valid_0's binary_logloss: 0.219069
[1800]	valid_0's binary_logloss: 0.21905
[1900]	valid_0's binary_logloss: 0.219038
[2000]	valid_0's binary_logloss: 0.219088
predicting


[32m[I 2022-09-09 12:20:33,971][0m Trial 16 finished with value: 0.7923011097773573 and parameters: {'n_estimators': 12000, 'learning_rate': 0.1792069686497563, 'num_leaves': 600, 'max_depth': 9, 'min_data_in_leaf': 4000, 'max_bin': 172, 'lambda_l1': 60, 'lambda_l2': 35, 'min_gain_to_split': 2.205514337418423, 'feature_fraction': 0.30000000000000004, 'boosting_type': 'dart', 'bagging_fraction': 0.75, 'bagging_freq': 5}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 458913
amex: 0.78763646398966
=== FINISHED FOLD 5/5 (5), log loss=0.21908787684255054, accuracy=0.9037610860517312, amex=0.78763646398966 ===
time to run this fold: 2m, 30s
time to run folds: 12m, 39s
iteration logloss: 0.21746338148142702, iteration acc: 0.9040537084175495, Iteration amex: 0.7923011097773573
STARTING OPTIMIZATION iteration 18, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 2000, 'learning_rate': 0.2519559845717807, 'num_leaves': 1100, 'max_depth': 7, 'min_data_in_leaf': 6900, 'max_bin': 185, 'lambda_l1': 20, 'lambda_l2': 10, 'min_gain_to_split': 3.085752106751605, 'bagging_fraction': 0.8, 'bagging_freq': 5, 'feature_fraction': 0.55, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'dart'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting




[100]	valid_0's binary_logloss: 0.225331
[200]	valid_0's binary_logloss: 0.221219
[300]	valid_0's binary_logloss: 0.219422
[400]	valid_0's binary_logloss: 0.218383
[500]	valid_0's binary_logloss: 0.217806
[600]	valid_0's binary_logloss: 0.217852
[700]	valid_0's binary_logloss: 0.217654
[800]	valid_0's binary_logloss: 0.21756
[900]	valid_0's binary_logloss: 0.217356
[1000]	valid_0's binary_logloss: 0.217323
[1100]	valid_0's binary_logloss: 0.217225
[1200]	valid_0's binary_logloss: 0.217249
[1300]	valid_0's binary_logloss: 0.217166
[1400]	valid_0's binary_logloss: 0.217286
[1500]	valid_0's binary_logloss: 0.217308
[1600]	valid_0's binary_logloss: 0.217309
[1700]	valid_0's binary_logloss: 0.217352
[1800]	valid_0's binary_logloss: 0.217256
[1900]	valid_0's binary_logloss: 0.217312
[2000]	valid_0's binary_logloss: 0.217203
predicting
count zerO: 91783
amex: 0.7930995298531127
=== FINISHED FOLD 1/5 (5), log loss=0.21720335807072805, accuracy=0.9051240425786911, amex=0.7930995298531127 ===
ti



[100]	valid_0's binary_logloss: 0.22388
[200]	valid_0's binary_logloss: 0.219361
[300]	valid_0's binary_logloss: 0.217409
[400]	valid_0's binary_logloss: 0.216346
[500]	valid_0's binary_logloss: 0.215732
[600]	valid_0's binary_logloss: 0.215735
[700]	valid_0's binary_logloss: 0.215265
[800]	valid_0's binary_logloss: 0.215087
[900]	valid_0's binary_logloss: 0.215035
[1000]	valid_0's binary_logloss: 0.215075
[1100]	valid_0's binary_logloss: 0.214967
[1200]	valid_0's binary_logloss: 0.214966
[1300]	valid_0's binary_logloss: 0.214984
[1400]	valid_0's binary_logloss: 0.214989
[1500]	valid_0's binary_logloss: 0.21503
[1600]	valid_0's binary_logloss: 0.214921
[1700]	valid_0's binary_logloss: 0.215071
[1800]	valid_0's binary_logloss: 0.215081
[1900]	valid_0's binary_logloss: 0.21508
[2000]	valid_0's binary_logloss: 0.21505
predicting
count zerO: 183566
amex: 0.7938383134324931
=== FINISHED FOLD 2/5 (5), log loss=0.21505012372757593, accuracy=0.905243890480808, amex=0.7938383134324931 ===
time 



[100]	valid_0's binary_logloss: 0.224926
[200]	valid_0's binary_logloss: 0.220625
[300]	valid_0's binary_logloss: 0.218554
[400]	valid_0's binary_logloss: 0.21776
[500]	valid_0's binary_logloss: 0.217322
[600]	valid_0's binary_logloss: 0.217227
[700]	valid_0's binary_logloss: 0.216873
[800]	valid_0's binary_logloss: 0.216734
[900]	valid_0's binary_logloss: 0.216769
[1000]	valid_0's binary_logloss: 0.216668
[1100]	valid_0's binary_logloss: 0.216686
[1200]	valid_0's binary_logloss: 0.216778
[1300]	valid_0's binary_logloss: 0.21676
[1400]	valid_0's binary_logloss: 0.216763
[1500]	valid_0's binary_logloss: 0.21668
[1600]	valid_0's binary_logloss: 0.216622
[1700]	valid_0's binary_logloss: 0.216663
[1800]	valid_0's binary_logloss: 0.216668
[1900]	valid_0's binary_logloss: 0.216755
[2000]	valid_0's binary_logloss: 0.216646
predicting
count zerO: 275349
amex: 0.7923433595124041
=== FINISHED FOLD 3/5 (5), log loss=0.21664587605991376, accuracy=0.9037621346000894, amex=0.7923433595124041 ===
tim



[100]	valid_0's binary_logloss: 0.22451
[200]	valid_0's binary_logloss: 0.220018
[300]	valid_0's binary_logloss: 0.218045
[400]	valid_0's binary_logloss: 0.217209
[500]	valid_0's binary_logloss: 0.216639
[600]	valid_0's binary_logloss: 0.216558
[700]	valid_0's binary_logloss: 0.216293
[800]	valid_0's binary_logloss: 0.216269
[900]	valid_0's binary_logloss: 0.216014
[1000]	valid_0's binary_logloss: 0.215964
[1100]	valid_0's binary_logloss: 0.215853
[1200]	valid_0's binary_logloss: 0.215729
[1300]	valid_0's binary_logloss: 0.21581
[1400]	valid_0's binary_logloss: 0.215671
[1500]	valid_0's binary_logloss: 0.215718
[1600]	valid_0's binary_logloss: 0.215782
[1700]	valid_0's binary_logloss: 0.215784
[1800]	valid_0's binary_logloss: 0.215819
[1900]	valid_0's binary_logloss: 0.215844
[2000]	valid_0's binary_logloss: 0.215749
predicting
count zerO: 367131
amex: 0.7977138072376417
=== FINISHED FOLD 4/5 (5), log loss=0.2157493638354905, accuracy=0.9049268919831776, amex=0.7977138072376417 ===
tim



[100]	valid_0's binary_logloss: 0.226287
[200]	valid_0's binary_logloss: 0.222094
[300]	valid_0's binary_logloss: 0.220291
[400]	valid_0's binary_logloss: 0.219401
[500]	valid_0's binary_logloss: 0.218877
[600]	valid_0's binary_logloss: 0.218856
[700]	valid_0's binary_logloss: 0.218783
[800]	valid_0's binary_logloss: 0.218662
[900]	valid_0's binary_logloss: 0.218662
[1000]	valid_0's binary_logloss: 0.218626
[1100]	valid_0's binary_logloss: 0.218425
[1200]	valid_0's binary_logloss: 0.218339
[1300]	valid_0's binary_logloss: 0.218344
[1400]	valid_0's binary_logloss: 0.21825
[1500]	valid_0's binary_logloss: 0.218252
[1600]	valid_0's binary_logloss: 0.218197
[1700]	valid_0's binary_logloss: 0.218219
[1800]	valid_0's binary_logloss: 0.218296
[1900]	valid_0's binary_logloss: 0.218225
[2000]	valid_0's binary_logloss: 0.218302
predicting


[32m[I 2022-09-09 12:32:59,206][0m Trial 17 finished with value: 0.7930696238566487 and parameters: {'n_estimators': 12000, 'learning_rate': 0.2519559845717807, 'num_leaves': 1100, 'max_depth': 7, 'min_data_in_leaf': 6900, 'max_bin': 185, 'lambda_l1': 20, 'lambda_l2': 10, 'min_gain_to_split': 3.085752106751605, 'feature_fraction': 0.55, 'boosting_type': 'dart', 'bagging_fraction': 0.8, 'bagging_freq': 5}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 458913
amex: 0.7883531092475912
=== FINISHED FOLD 5/5 (5), log loss=0.21830215181657397, accuracy=0.903880935259637, amex=0.7883531092475912 ===
time to run this fold: 2m, 27s
time to run folds: 12m, 25s
iteration logloss: 0.21659017470205644, iteration acc: 0.9045875789804807, Iteration amex: 0.7930696238566487
STARTING OPTIMIZATION iteration 19, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 12000, 'learning_rate': 0.2530154318515711, 'num_leaves': 1140, 'max_depth': 6, 'min_data_in_leaf': 6500, 'max_bin': 190, 'lambda_l1': 55, 'lambda_l2': 10, 'min_gain_to_split': 5.3843132626962475, 'bagging_fraction': None, 'bagging_freq': None, 'feature_fraction': 0.6000000000000001, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'goss'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting
[100]	vali

[32m[I 2022-09-09 12:36:39,724][0m Trial 18 finished with value: 0.7858538314512433 and parameters: {'n_estimators': 12000, 'learning_rate': 0.2530154318515711, 'num_leaves': 1140, 'max_depth': 6, 'min_data_in_leaf': 6500, 'max_bin': 190, 'lambda_l1': 55, 'lambda_l2': 10, 'min_gain_to_split': 5.3843132626962475, 'feature_fraction': 0.6000000000000001, 'boosting_type': 'goss'}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 458913
amex: 0.7799241803066352
=== FINISHED FOLD 5/5 (5), log loss=0.2243157882712578, accuracy=0.9007539604715522, amex=0.7799241803066352 ===
time to run this fold: 36s
time to run folds: 3m, 40s
iteration logloss: 0.22191976487962303, iteration acc: 0.9017787663377262, Iteration amex: 0.7858538314512433
STARTING OPTIMIZATION iteration 20, data size: (458913, 1580), params={'verbosity': -1, 'n_estimators': 12000, 'learning_rate': 0.09469158229493857, 'num_leaves': 1840, 'max_depth': 7, 'min_data_in_leaf': 4000, 'max_bin': 177, 'lambda_l1': 20, 'lambda_l2': 35, 'min_gain_to_split': 2.823134449326043, 'bagging_fraction': 0.35000000000000003, 'bagging_freq': 5, 'feature_fraction': 0.55, 'subsample': None, 'reg_alpha': None, 'reg_lambda': None, 'min_sum_hessian_in_leaf': None, 'min_child_samples': None, 'colsample_bytree': None, 'subsample_freq': None, 'min_split_gain': None, 'boosting_type': 'gbdt'}
=== STARTING FOLD 1/5 (5) ===
creating classifier
fitting
[100]	valid_0's b

[32m[I 2022-09-09 12:39:04,007][0m Trial 19 finished with value: 0.7889612678747275 and parameters: {'n_estimators': 12000, 'learning_rate': 0.09469158229493857, 'num_leaves': 1840, 'max_depth': 7, 'min_data_in_leaf': 4000, 'max_bin': 177, 'lambda_l1': 20, 'lambda_l2': 35, 'min_gain_to_split': 2.823134449326043, 'feature_fraction': 0.55, 'boosting_type': 'gbdt', 'bagging_fraction': 0.35000000000000003, 'bagging_freq': 5}. Best is trial 2 with value: 0.7932202487793407.[0m


count zerO: 458913
amex: 0.7848318326750809
=== FINISHED FOLD 5/5 (5), log loss=0.2214314717300738, accuracy=0.9028785600662439, amex=0.7848318326750809 ===
time to run this fold: 30s
time to run folds: 2m, 24s
iteration logloss: 0.21950076865634038, iteration acc: 0.9032801425014754, Iteration amex: 0.7889612678747275


# Check Optimizer Results

In [15]:
df_params = pd.DataFrame(optuna_parameters)
df_params["loss"] = optuna_losses
df_params["accuracy"] = optuna_accuracies
df_params["amex"] = optuna_amexes
df_params["time"] = optuna_seconds


In [16]:
df_params.to_csv("optimizer_results_lgbm_lagged.csv")

In [17]:
df_params = pd.read_csv("optimizer_results_lgbm_lagged.csv")

In [18]:
df_params.head(5)

Unnamed: 0.1,Unnamed: 0,verbosity,n_estimators,learning_rate,num_leaves,max_depth,min_data_in_leaf,max_bin,lambda_l1,lambda_l2,...,min_sum_hessian_in_leaf,min_child_samples,colsample_bytree,subsample_freq,min_split_gain,boosting_type,loss,accuracy,amex,time
0,0,-1,12000,0.036808,2040,4,300,157,90,85,...,,,,,,goss,0.218847,0.903561,0.789976,1464
1,1,-1,12000,0.220774,2060,12,7700,209,90,20,...,,,,,,goss,0.221447,0.902304,0.786098,289
2,2,-1,2000,0.252294,620,10,7500,146,25,0,...,,,,,,dart,0.216523,0.904618,0.79322,689
3,3,-1,12000,0.155771,1440,3,5900,251,40,80,...,,,,,,gbdt,0.219392,0.903197,0.789396,103
4,4,-1,12000,0.139857,2580,11,800,190,45,60,...,,,,,,gbdt,0.221637,0.902018,0.786261,80


In [19]:
df_params.sort_values(by="amex", ascending=False).head(10)

Unnamed: 0.1,Unnamed: 0,verbosity,n_estimators,learning_rate,num_leaves,max_depth,min_data_in_leaf,max_bin,lambda_l1,lambda_l2,...,min_sum_hessian_in_leaf,min_child_samples,colsample_bytree,subsample_freq,min_split_gain,boosting_type,loss,accuracy,amex,time
2,2,-1,2000,0.252294,620,10,7500,146,25,0,...,,,,,,dart,0.216523,0.904618,0.79322,689
13,13,-1,2000,0.251956,1100,7,6900,185,20,10,...,,,,,,dart,0.21659,0.904588,0.79307,745
9,9,-1,2000,0.197107,740,9,8100,116,15,20,...,,,,,,dart,0.216647,0.904496,0.793006,797
8,8,-1,2000,0.215375,40,8,10000,133,10,5,...,,,,,,dart,0.217503,0.904004,0.792391,780
12,12,-1,2000,0.179207,600,9,4000,172,60,35,...,,,,,,dart,0.217463,0.904054,0.792301,759
7,7,-1,2000,0.297927,120,8,10000,149,5,0,...,,,,,,dart,0.219074,0.903463,0.790112,823
0,0,-1,12000,0.036808,2040,4,300,157,90,85,...,,,,,,goss,0.218847,0.903561,0.789976,1464
5,5,-1,2000,0.14255,2800,3,600,224,30,45,...,,,,,,dart,0.219338,0.902938,0.789967,394
10,10,-1,2000,0.206498,820,10,7600,109,65,25,...,,,,,,dart,0.219434,0.903115,0.789697,489
3,3,-1,12000,0.155771,1440,3,5900,251,40,80,...,,,,,,gbdt,0.219392,0.903197,0.789396,103


In [20]:
best_idx = np.argmax(df_params["amex"])
best_idx

2

In [21]:
#df_train = df_train.reset_index(drop=True)
#df_train

In [22]:
#optuna_parameters

In [23]:
optuna_params = df_params.iloc[best_idx]
optuna_params = dict(optuna_params)
optuna_params

{'Unnamed: 0': 2,
 'verbosity': -1,
 'n_estimators': 2000,
 'learning_rate': 0.2522936172843501,
 'num_leaves': 620,
 'max_depth': 10,
 'min_data_in_leaf': 7500,
 'max_bin': 146,
 'lambda_l1': 25,
 'lambda_l2': 0,
 'min_gain_to_split': 2.7806868606837614,
 'bagging_fraction': 0.8500000000000001,
 'bagging_freq': 5.0,
 'feature_fraction': 0.8,
 'subsample': nan,
 'reg_alpha': nan,
 'reg_lambda': nan,
 'min_sum_hessian_in_leaf': nan,
 'min_child_samples': nan,
 'colsample_bytree': nan,
 'subsample_freq': nan,
 'min_split_gain': nan,
 'boosting_type': 'dart',
 'loss': 0.2165227464374611,
 'accuracy': 0.9046180865264162,
 'amex': 0.7932202487793407,
 'time': 689}

In [24]:
df_params = pd.read_csv("optimizer_results_lgbm_lagged.csv")

# Clean Up Stored Optmizer Best Params for Use

Because the code above sometimes adds a few params that were not used, the classifier might complain

In [25]:
optuna_params = df_params.sort_values(by="amex", ascending=False).iloc[0]
optuna_params = dict(optuna_params)

keys = list(optuna_params.keys())
for param in keys:
    if isinstance(optuna_params[param], (np.number)) and np.isnan(optuna_params[param]):
        print(f"delete: {param}")
        del optuna_params[param]

if "bagging_freq" in optuna_params:
    optuna_params["bagging_freq"] = int(optuna_params["bagging_freq"])
del optuna_params["loss"]
del optuna_params["Unnamed: 0"]
del optuna_params["accuracy"]
del optuna_params["time"]
del optuna_params["amex"]


delete: subsample
delete: reg_alpha
delete: reg_lambda
delete: min_sum_hessian_in_leaf
delete: min_child_samples
delete: colsample_bytree
delete: subsample_freq
delete: min_split_gain


In [26]:
optuna_params

{'verbosity': -1,
 'n_estimators': 2000,
 'learning_rate': 0.2522936172843501,
 'num_leaves': 620,
 'max_depth': 10,
 'min_data_in_leaf': 7500,
 'max_bin': 146,
 'lambda_l1': 25,
 'lambda_l2': 0,
 'min_gain_to_split': 2.7806868606837614,
 'bagging_fraction': 0.8500000000000001,
 'bagging_freq': 5,
 'feature_fraction': 0.8,
 'boosting_type': 'dart'}

In [27]:
#dart is slow with large number of estimators (iterations), so limiting the max here
if optuna_params["boosting_type"] == "dart":
    optuna_params["n_estimators"] = 3000


# Run Actual Train and Predict Using Found Optimized Params

On the training data


In [28]:
#todo: max iterations 10k
#sometimes it is useful to run on smaller subset of data to get initial results faster
#final_models, final_preds = train_and_predict(optuna_params, df_full_train[:50000])
final_models, final_preds = train_and_predict(optuna_params, df_train, strat)


=== STARTING FOLD 1/5 ===
creating classifier
fitting




[100]	valid_0's binary_logloss: 0.224798
[200]	valid_0's binary_logloss: 0.220708
[300]	valid_0's binary_logloss: 0.218625
[400]	valid_0's binary_logloss: 0.217829
[500]	valid_0's binary_logloss: 0.217437
[600]	valid_0's binary_logloss: 0.217381
[700]	valid_0's binary_logloss: 0.217128
[800]	valid_0's binary_logloss: 0.217094
[900]	valid_0's binary_logloss: 0.216946
[1000]	valid_0's binary_logloss: 0.216867
[1100]	valid_0's binary_logloss: 0.216733
[1200]	valid_0's binary_logloss: 0.216645
[1300]	valid_0's binary_logloss: 0.216784
[1400]	valid_0's binary_logloss: 0.216734
[1500]	valid_0's binary_logloss: 0.216787
[1600]	valid_0's binary_logloss: 0.216793
[1700]	valid_0's binary_logloss: 0.216786
[1800]	valid_0's binary_logloss: 0.216809
[1900]	valid_0's binary_logloss: 0.216846
[2000]	valid_0's binary_logloss: 0.216877
[2100]	valid_0's binary_logloss: 0.216865
[2200]	valid_0's binary_logloss: 0.21688
[2300]	valid_0's binary_logloss: 0.21687
[2400]	valid_0's binary_logloss: 0.216802
[25



[100]	valid_0's binary_logloss: 0.224663
[200]	valid_0's binary_logloss: 0.220955
[300]	valid_0's binary_logloss: 0.219001
[400]	valid_0's binary_logloss: 0.218211
[500]	valid_0's binary_logloss: 0.218021
[600]	valid_0's binary_logloss: 0.217894
[700]	valid_0's binary_logloss: 0.217591
[800]	valid_0's binary_logloss: 0.217605
[900]	valid_0's binary_logloss: 0.217472
[1000]	valid_0's binary_logloss: 0.217343
[1100]	valid_0's binary_logloss: 0.217343
[1200]	valid_0's binary_logloss: 0.217322
[1300]	valid_0's binary_logloss: 0.217307
[1400]	valid_0's binary_logloss: 0.217222
[1500]	valid_0's binary_logloss: 0.217259
[1600]	valid_0's binary_logloss: 0.217215
[1700]	valid_0's binary_logloss: 0.217204
[1800]	valid_0's binary_logloss: 0.21734
[1900]	valid_0's binary_logloss: 0.217297
[2000]	valid_0's binary_logloss: 0.217342
[2100]	valid_0's binary_logloss: 0.217244
[2200]	valid_0's binary_logloss: 0.217137
[2300]	valid_0's binary_logloss: 0.217121
[2400]	valid_0's binary_logloss: 0.217062
[2



[100]	valid_0's binary_logloss: 0.222895
[200]	valid_0's binary_logloss: 0.218318
[300]	valid_0's binary_logloss: 0.216428
[400]	valid_0's binary_logloss: 0.21554
[500]	valid_0's binary_logloss: 0.215202
[600]	valid_0's binary_logloss: 0.215305
[700]	valid_0's binary_logloss: 0.215164
[800]	valid_0's binary_logloss: 0.215023
[900]	valid_0's binary_logloss: 0.214875
[1000]	valid_0's binary_logloss: 0.214677
[1100]	valid_0's binary_logloss: 0.214653
[1200]	valid_0's binary_logloss: 0.214551
[1300]	valid_0's binary_logloss: 0.214517
[1400]	valid_0's binary_logloss: 0.214438
[1500]	valid_0's binary_logloss: 0.214489
[1600]	valid_0's binary_logloss: 0.214366
[1700]	valid_0's binary_logloss: 0.214376
[1800]	valid_0's binary_logloss: 0.214385
[1900]	valid_0's binary_logloss: 0.214396
[2000]	valid_0's binary_logloss: 0.214355
[2100]	valid_0's binary_logloss: 0.214352
[2200]	valid_0's binary_logloss: 0.214318
[2300]	valid_0's binary_logloss: 0.214303
[2400]	valid_0's binary_logloss: 0.214359
[2



[100]	valid_0's binary_logloss: 0.223752
[200]	valid_0's binary_logloss: 0.219598
[300]	valid_0's binary_logloss: 0.21803
[400]	valid_0's binary_logloss: 0.217307
[500]	valid_0's binary_logloss: 0.216936
[600]	valid_0's binary_logloss: 0.216858
[700]	valid_0's binary_logloss: 0.216568
[800]	valid_0's binary_logloss: 0.216585
[900]	valid_0's binary_logloss: 0.216519
[1000]	valid_0's binary_logloss: 0.216373
[1100]	valid_0's binary_logloss: 0.216396
[1200]	valid_0's binary_logloss: 0.216521
[1300]	valid_0's binary_logloss: 0.216577
[1400]	valid_0's binary_logloss: 0.216491
[1500]	valid_0's binary_logloss: 0.216593
[1600]	valid_0's binary_logloss: 0.216528
[1700]	valid_0's binary_logloss: 0.216539
[1800]	valid_0's binary_logloss: 0.216577
[1900]	valid_0's binary_logloss: 0.216577
[2000]	valid_0's binary_logloss: 0.216517
[2100]	valid_0's binary_logloss: 0.216523
[2200]	valid_0's binary_logloss: 0.216519
[2300]	valid_0's binary_logloss: 0.216521
[2400]	valid_0's binary_logloss: 0.216541
[2



[100]	valid_0's binary_logloss: 0.226351
[200]	valid_0's binary_logloss: 0.222044
[300]	valid_0's binary_logloss: 0.219996
[400]	valid_0's binary_logloss: 0.219142
[500]	valid_0's binary_logloss: 0.218923
[600]	valid_0's binary_logloss: 0.218881
[700]	valid_0's binary_logloss: 0.218575
[800]	valid_0's binary_logloss: 0.218607
[900]	valid_0's binary_logloss: 0.218561
[1000]	valid_0's binary_logloss: 0.218532
[1100]	valid_0's binary_logloss: 0.21847
[1200]	valid_0's binary_logloss: 0.218425
[1300]	valid_0's binary_logloss: 0.218373
[1400]	valid_0's binary_logloss: 0.218263
[1500]	valid_0's binary_logloss: 0.218174
[1600]	valid_0's binary_logloss: 0.218194
[1700]	valid_0's binary_logloss: 0.218116
[1800]	valid_0's binary_logloss: 0.218121
[1900]	valid_0's binary_logloss: 0.218039
[2000]	valid_0's binary_logloss: 0.218038
[2100]	valid_0's binary_logloss: 0.217989
[2200]	valid_0's binary_logloss: 0.218035
[2300]	valid_0's binary_logloss: 0.21801
[2400]	valid_0's binary_logloss: 0.218046
[25

In [29]:
df_preds = pd.DataFrame()
df_preds["customer_ID"] = df_train["customer_ID"]
df_preds["prediction"] = final_preds

In [30]:
amex_metric_mod(y, final_preds[:, 0])    

0.793689046981534

In [31]:
df_preds.to_csv("predictions_lgbm_lagged.csv")

# Predict the Kaggle Test Set

In [32]:
#X = df_test.drop(["customer_ID", "S_2"], axis=1)
X = df_test.drop("customer_ID", axis=1)

In [33]:
from tqdm import tqdm

n_classes = 1
sub_preds = np.zeros((df_test.shape[0], n_classes))

#tqdm.auto()
tqdm.pandas()

for model in tqdm(final_models): #lqtm
    #could define num_iteration but default values should cover it
    #https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMClassifier.html#lightgbm.LGBMClassifier.predict_proba
    preds = model.predict_proba(X)
    preds_true = preds[:, 1]
    preds_true.shape = (X.shape[0], 1)
    sub_preds += preds_true
sub_preds /= N_FOLDS


100%|████████████████████████████████████████████████████████████████████| 5/5 [01:59<00:00, 23.81s/it]


In [34]:
submission = pd.DataFrame()
submission["customer_ID"] = df_test["customer_ID"]
submission["prediction"] = sub_preds
submission

Unnamed: 0,customer_ID,prediction
0,00000469ba478561f23a92a868bd366de6f6527a684c9a...,0.021787
1,00001bf2e77ff879fab36aa4fac689b9ba411dae63ae39...,0.000869
2,0000210045da4f81e5f122c6bde5c2a617d03eef67f82c...,0.052829
3,00003b41e58ede33b8daf61ab56d9952f17c9ad1c3976c...,0.196066
4,00004b22eaeeeb0ec976890c1d9bfc14fd9427e98c4ee9...,0.862293
...,...,...
924616,ffff952c631f2c911b8a2a8ca56ea6e656309a83d2f64c...,0.008613
924617,ffffcf5df59e5e0bba2a5ac4578a34e2b5aa64a1546cd3...,0.772294
924618,ffffd61f098cc056dbd7d2a21380c4804bbfe60856f475...,0.556056
924619,ffffddef1fc3643ea179c93245b68dca0f36941cd83977...,0.251052


In [35]:
submission.to_csv("submission_lgbm_lagged_seed2.csv", index=False)

In [36]:
#0.794