In [1]:
%pip install matplotlib pandas numpy seaborn catboost xfeat

Note: you may need to restart the kernel to use updated packages.


In [2]:
# ========================================
# Library
# ========================================
import math
import random
import pickle
import itertools
import warnings
warnings.filterwarnings('ignore')
import time
import os

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import japanize_matplotlib
# import jpholiday
from glob import glob
from tqdm import tqdm
from sklearn.model_selection import (
    TimeSeriesSplit,
    StratifiedKFold,
    KFold,
    GroupKFold,
    StratifiedGroupKFold,
)
from xfeat import SelectCategorical, LabelEncoder, LambdaEncoder, Pipeline, ConcatCombination, SelectNumerical, \
    ArithmeticCombinations, TargetEncoder, aggregation, GBDTFeatureSelector, GBDTFeatureExplorer 
# ←←←いくつかいらないものがありそう。xfeatの使い方をおさらいしないと
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score, roc_auc_score, classification_report
from scipy.optimize import minimize
import lightgbm as lgb
import optuna
# catBoost
from catboost import CatBoost, CatBoostClassifier, Pool
# XGBoost

from expressway_router import ExpresswayRouter

from collections import Counter

In [3]:
i_path = '../../train/'
o_path = '../model/'

TARGET = ['is_congestion']

threshold = 0.5

## Load datas

In [4]:
train_df = pd.read_csv(i_path + 'train.csv')
road_df = pd.read_csv(i_path + 'road_local.csv')
search_spec_df = pd.read_csv(i_path + 'search_specified.csv')
search_unspec_df = pd.read_csv(i_path + 'search_unspecified.csv')

## Preprocessing

In [5]:
def expand_datetime(df):
    if 'datetime' in df.columns:
        df['year'] = df['datetime'].dt.year
        df['month'] = df['datetime'].dt.month
        df['day'] = df['datetime'].dt.day
        df['hour'] = df['datetime'].dt.hour
    if 'date' in df.columns:
        df['year'] = df['date'].dt.year
        df['month'] = df['date'].dt.month
        df['day'] = df['date'].dt.day
    return df

In [6]:
def preprocess_fit(train_df, search_spec_df, search_unspec_df):
    train_df['datetime'] = pd.to_datetime(train_df['datetime'])
    search_spec_df['datetime'] = pd.to_datetime(search_spec_df['datetime'])
    search_unspec_df['date'] = pd.to_datetime(search_unspec_df['date'])

    train_df = expand_datetime(train_df)
    search_unspec_df = expand_datetime(search_unspec_df)

    train_df = train_df.merge(search_spec_df, on=['datetime', 'start_code', 'end_code'], how='left')
    train_df = train_df.merge(search_unspec_df, on=['year', 'month', 'day', 'start_code', 'end_code'], how='left')
    train_df = train_df.merge(road_df.drop(['start_name', 'end_name'], axis=1), on=['start_code', 'end_code'], how='left')

    train_df['dayofweek'] = train_df['datetime'].dt.weekday

    train_df['section'] = train_df['start_code'].astype(str) + '_' + train_df['KP'].astype(str) + '_' + train_df['end_code'].astype(str)

    return train_df

In [7]:
train = preprocess_fit(train_df, search_spec_df, search_unspec_df)

In [8]:
train.head(5)

Unnamed: 0,datetime,start_code,end_code,KP,OCC,allCars,speed,is_congestion,year,month,day,hour,search_specified,date,search_unspecified,road_code,direction,limit_speed,start_KP,end_KP,start_pref_code,end_pref_code,start_lat,end_lat,start_lng,end_lng,start_degree,end_degree,dayofweek,section
0,2021-04-08 00:00:00,1110210,1800006,2.48,1.833333,507,94.208661,0,2021,4,8,0,15.0,2021-04-08,3419.0,1800,下り,100.0,0.8,9.4,13,11,35.75582,35.80615,139.601514,139.535511,4.0,2.0,3,1110210_2.48_1800006
1,2021-04-08 01:00:00,1110210,1800006,2.48,1.75,444,94.469663,0,2021,4,8,1,6.0,2021-04-08,3419.0,1800,下り,100.0,0.8,9.4,13,11,35.75582,35.80615,139.601514,139.535511,4.0,2.0,3,1110210_2.48_1800006
2,2021-04-08 02:00:00,1110210,1800006,2.48,1.5,363,92.593407,0,2021,4,8,2,3.0,2021-04-08,3419.0,1800,下り,100.0,0.8,9.4,13,11,35.75582,35.80615,139.601514,139.535511,4.0,2.0,3,1110210_2.48_1800006
3,2021-04-08 03:00:00,1110210,1800006,2.48,1.583333,430,94.50116,0,2021,4,8,3,26.0,2021-04-08,3419.0,1800,下り,100.0,0.8,9.4,13,11,35.75582,35.80615,139.601514,139.535511,4.0,2.0,3,1110210_2.48_1800006
4,2021-04-08 04:00:00,1110210,1800006,2.48,1.75,500,94.07984,0,2021,4,8,4,30.0,2021-04-08,3419.0,1800,下り,100.0,0.8,9.4,13,11,35.75582,35.80615,139.601514,139.535511,4.0,2.0,3,1110210_2.48_1800006


In [9]:
#########################################################
# ここからtrainのデータの整形などを実施する(ドラプラの元データとかは使わんでもある程度出来そうな気がしてる。。。。。。。。)
#########################################################


def preprocess(df):
    agg_dfs = []
    num_df = SelectNumerical().fit_transform(df)
    encoder = Pipeline([
        SelectCategorical(),
        LabelEncoder(output_suffix=""),
        ])
    le_df = encoder.fit_transform(df)
    # agg_dfs.append(get_agg_df()) # group byすることによるそれぞれの
    preprocessed_df = pd.concat([num_df,le_df]+agg_dfs, axis=1)
    return preprocessed_df

In [10]:
cat_cols = ['road_code', 'start_code', 'end_code', 'section', 'direction', 'dayofweek'] # この箇所については、select_numericalあたりを使ったほうが良さそう

num_cols = ['year', 'month', 'day', 'hour', 'search_specified', 'search_unspecified', 'KP', 'start_KP', 'end_KP', 'limit_speed', 'OCC']
feature_cols = cat_cols + num_cols

In [11]:
test = train.loc[(train['year']==2021) & ((train['month']==4) | (train['month']==5))]
train = train.loc[~((train['year']==2021) & ((train['month']==4) | (train['month']==5)))]

In [12]:
train[feature_cols].head(5)

Unnamed: 0,road_code,start_code,end_code,section,direction,dayofweek,year,month,day,hour,search_specified,search_unspecified,KP,start_KP,end_KP,limit_speed,OCC
1296,1800,1110210,1800006,1110210_2.48_1800006,下り,1,2021,6,1,0,12.0,2656.0,2.48,0.8,9.4,100.0,2.083333
1297,1800,1110210,1800006,1110210_2.48_1800006,下り,1,2021,6,1,1,3.0,2656.0,2.48,0.8,9.4,100.0,1.75
1298,1800,1110210,1800006,1110210_2.48_1800006,下り,1,2021,6,1,2,11.0,2656.0,2.48,0.8,9.4,100.0,1.25
1299,1800,1110210,1800006,1110210_2.48_1800006,下り,1,2021,6,1,3,28.0,2656.0,2.48,0.8,9.4,100.0,1.916667
1300,1800,1110210,1800006,1110210_2.48_1800006,下り,1,2021,6,1,4,15.0,2656.0,2.48,0.8,9.4,100.0,2.0


In [13]:
train.columns

Index(['datetime', 'start_code', 'end_code', 'KP', 'OCC', 'allCars', 'speed',
       'is_congestion', 'year', 'month', 'day', 'hour', 'search_specified',
       'date', 'search_unspecified', 'road_code', 'direction', 'limit_speed',
       'start_KP', 'end_KP', 'start_pref_code', 'end_pref_code', 'start_lat',
       'end_lat', 'start_lng', 'end_lng', 'start_degree', 'end_degree',
       'dayofweek', 'section'],
      dtype='object')

In [14]:
le_dict = {}
for c in tqdm(cat_cols):
    le = LabelEncoder()
    train[c] = le.fit_transform(train[c])
    le_dict[c] = le

100%|██████████| 6/6 [00:01<00:00,  3.82it/s]


## Training

In [15]:
def train_lgbm(X, y, cv, model_path=[], params={}, verbose=100):

    models = []
    n_records = len(X)
    oof_pred = np.zeros((n_records), dtype=np.float32)

    def objective(trial):
        lgb_params = {
            "objective": "binary",
            "metric": "binary_logloss",
            "boosting_type": "rf",
            "verbosity": -1,
            "boost_from_average": False,
            "random_seed": 42,
            "feature_pre_filter": False,
            "max_depth": trial.suggest_int('max_depth', 4, 8),
            "num_leaves": trial.suggest_int("num_leaves", 2, 100),
            "learning_rate": trial.suggest_float("learning_rate", 0.001, 0.1),
            "feature_fraction": trial.suggest_float("feature_fraction", 0.1, 1.0),
            "bagging_fraction": trial.suggest_float("bagging_fraction", 0.1, 1.0),
            "min_child_samples": trial.suggest_int("min_child_samples", 1, 25),
            "min_data_in_leaf": trial.suggest_int('min_data_in_leaf', 1, 4),
            "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
            "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
            "min_split_gain": trial.suggest_float("min_split_gain", 1e-4, 1e-1, log=True),
            "bagging_freq": trial.suggest_int("bagging_freq", 1, 10),
            "bagging_seed": 42,
            "verbosity": -1,
        }


        model = lgb.LGBMClassifier(**lgb_params)

        model.fit(x_train, y_train,
                  eval_set=[(x_valid, y_valid)],
                  callbacks=[
                      lgb.early_stopping(stopping_rounds=50, verbose=True),
                      lgb.log_evaluation(100)
                  ]
                  )

        pred_y = model.predict_proba(x_valid)[:, 1]
        auc = roc_auc_score(y_valid, pred_y)

        return auc

    for i, (idx_train, idx_valid) in enumerate(cv):
        x_train, y_train = X[idx_train], y[idx_train]
        x_valid, y_valid = X[idx_valid], y[idx_valid]

        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=10)

        best_params = study.best_params
        clf = lgb.LGBMClassifier(**best_params)

        clf.fit(x_train, y_train,
                eval_set=[(x_valid, y_valid)],
                callbacks=[
                    lgb.early_stopping(stopping_rounds=50, verbose=True),
                    lgb.log_evaluation(100)
                ]
                )

        pred_i = clf.predict_proba(x_valid)[:, 1]
        oof_pred[idx_valid] = pred_i
        models.append(clf)
        score = roc_auc_score(y_valid, pred_i)
        print(f" - fold{i + 1} - {score:.4f}")

    score = roc_auc_score(y, oof_pred)

    print("=" * 50)
    print(f"FINISH: CV Score: {score:.4f}")
    return score, oof_pred, models

In [16]:
N_SPLIT = 5
kf = StratifiedGroupKFold(N_SPLIT)
cv_list = list(kf.split(train, y=train[TARGET], groups=train['date']))

X = train[feature_cols].values
y = train[TARGET].values

print('train shape:', train.shape)

# training
lgb_score, lgb_oof_pred, lgb_models = train_lgbm(X, y=y, cv=cv_list)

train shape: (5429424, 30)


[I 2024-03-20 05:39:05,112] A new study created in memory with name: no-name-73424d81-db60-4e18-86a0-11d84e1b2ed1


Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:39:08,994] Trial 0 finished with value: 0.9850188152480994 and parameters: {'max_depth': 5, 'num_leaves': 96, 'learning_rate': 0.04600508397026657, 'feature_fraction': 0.26600824398443706, 'bagging_fraction': 0.1511654842770972, 'min_child_samples': 25, 'min_data_in_leaf': 4, 'lambda_l1': 0.012025760088628401, 'lambda_l2': 4.286588091207017e-05, 'min_split_gain': 0.0006508859042694555, 'bagging_freq': 2}. Best is trial 0 with value: 0.9850188152480994.


Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.128938
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[26]	valid_0's binary_logloss: 0.133176


[I 2024-03-20 05:39:15,529] Trial 1 finished with value: 0.9831566097061428 and parameters: {'max_depth': 7, 'num_leaves': 46, 'learning_rate': 0.06754632161607779, 'feature_fraction': 0.2284878864146806, 'bagging_fraction': 0.7358189699781701, 'min_child_samples': 10, 'min_data_in_leaf': 1, 'lambda_l1': 0.038474344291531605, 'lambda_l2': 2.4305559521946854e-06, 'min_split_gain': 0.04193722424074639, 'bagging_freq': 3}. Best is trial 0 with value: 0.9850188152480994.


Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:39:19,411] Trial 2 finished with value: 0.9765733807360025 and parameters: {'max_depth': 5, 'num_leaves': 24, 'learning_rate': 0.029683297729590424, 'feature_fraction': 0.30032765760993074, 'bagging_fraction': 0.46517405628004, 'min_child_samples': 21, 'min_data_in_leaf': 1, 'lambda_l1': 0.008549058194786095, 'lambda_l2': 0.014524984609202481, 'min_split_gain': 0.0010267460482076165, 'bagging_freq': 7}. Best is trial 0 with value: 0.9850188152480994.


Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.128908
Training until validation scores don't improve for 50 rounds
[100]	valid_0's binary_logloss: 0.128865
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.128865


[I 2024-03-20 05:39:29,410] Trial 3 finished with value: 0.9903411168857658 and parameters: {'max_depth': 6, 'num_leaves': 66, 'learning_rate': 0.0668009088057607, 'feature_fraction': 0.9721743290383759, 'bagging_fraction': 0.7879314007335999, 'min_child_samples': 24, 'min_data_in_leaf': 2, 'lambda_l1': 0.004503239597664615, 'lambda_l2': 0.2999416687833924, 'min_split_gain': 0.06889608909289538, 'bagging_freq': 6}. Best is trial 3 with value: 0.9903411168857658.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.130206


[I 2024-03-20 05:39:32,962] Trial 4 finished with value: 0.9745366485485873 and parameters: {'max_depth': 6, 'num_leaves': 40, 'learning_rate': 0.07338402854503519, 'feature_fraction': 0.5037044605350357, 'bagging_fraction': 0.39850903471261845, 'min_child_samples': 19, 'min_data_in_leaf': 1, 'lambda_l1': 9.24574794313825e-05, 'lambda_l2': 4.597533592729655, 'min_split_gain': 0.002583397747912006, 'bagging_freq': 8}. Best is trial 3 with value: 0.9903411168857658.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.130189


[I 2024-03-20 05:39:37,636] Trial 5 finished with value: 0.9808904871263978 and parameters: {'max_depth': 4, 'num_leaves': 75, 'learning_rate': 0.0979538494008963, 'feature_fraction': 0.3882074507089421, 'bagging_fraction': 0.8398869557812964, 'min_child_samples': 13, 'min_data_in_leaf': 4, 'lambda_l1': 0.013759344288281405, 'lambda_l2': 0.0019616795134124515, 'min_split_gain': 0.0006323942385822037, 'bagging_freq': 8}. Best is trial 3 with value: 0.9903411168857658.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[11]	valid_0's binary_logloss: 0.12895


[I 2024-03-20 05:39:43,606] Trial 6 finished with value: 0.9826705659877636 and parameters: {'max_depth': 4, 'num_leaves': 65, 'learning_rate': 0.03195660374794549, 'feature_fraction': 0.9392937972332555, 'bagging_fraction': 0.9002339539857596, 'min_child_samples': 17, 'min_data_in_leaf': 4, 'lambda_l1': 3.596670521871557e-06, 'lambda_l2': 0.6735524696941584, 'min_split_gain': 0.042597027613837775, 'bagging_freq': 2}. Best is trial 3 with value: 0.9903411168857658.


Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:39:48,060] Trial 7 finished with value: 0.9810171275086252 and parameters: {'max_depth': 6, 'num_leaves': 27, 'learning_rate': 0.04913749150390846, 'feature_fraction': 0.40021336767654886, 'bagging_fraction': 0.6564335529439961, 'min_child_samples': 21, 'min_data_in_leaf': 3, 'lambda_l1': 0.08980124861250796, 'lambda_l2': 4.488754145041574e-08, 'min_split_gain': 0.000144199007313941, 'bagging_freq': 1}. Best is trial 3 with value: 0.9903411168857658.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.130125
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[47]	valid_0's binary_logloss: 0.133902


[I 2024-03-20 05:39:53,066] Trial 8 finished with value: 0.9779256399406923 and parameters: {'max_depth': 7, 'num_leaves': 24, 'learning_rate': 0.05809177648908203, 'feature_fraction': 0.12894162232909262, 'bagging_fraction': 0.6220024761847026, 'min_child_samples': 20, 'min_data_in_leaf': 1, 'lambda_l1': 2.8618994610240454, 'lambda_l2': 0.4045814364430871, 'min_split_gain': 0.00019188108509860828, 'bagging_freq': 1}. Best is trial 3 with value: 0.9903411168857658.


Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:39:57,027] Trial 9 finished with value: 0.9852584680994426 and parameters: {'max_depth': 6, 'num_leaves': 60, 'learning_rate': 0.01681814460485387, 'feature_fraction': 0.7582649423533457, 'bagging_fraction': 0.28610818111618724, 'min_child_samples': 9, 'min_data_in_leaf': 4, 'lambda_l1': 1.888152010675401e-07, 'lambda_l2': 0.20193940955265094, 'min_split_gain': 0.0001882516014355837, 'bagging_freq': 9}. Best is trial 3 with value: 0.9903411168857658.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.128925
[LightGBM] [Info] Number of positive: 15976, number of negative: 4322072
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.070676 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 4338048, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003683 -> initscore=-5.600403
[LightGBM] [Info] Start training from score -5.600403
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.00965335
 - fold1 - 0.9892


[I 2024-03-20 05:40:03,368] A new study created in memory with name: no-name-6d082048-961a-4a78-91ea-7c488a43b4b1


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.128923


[I 2024-03-20 05:40:08,815] Trial 0 finished with value: 0.9852511090625745 and parameters: {'max_depth': 5, 'num_leaves': 77, 'learning_rate': 0.05878020958881207, 'feature_fraction': 0.8747843636303151, 'bagging_fraction': 0.9910105744084203, 'min_child_samples': 11, 'min_data_in_leaf': 4, 'lambda_l1': 2.5221306947340774e-07, 'lambda_l2': 0.6118936095801454, 'min_split_gain': 0.002489869807655913, 'bagging_freq': 9}. Best is trial 0 with value: 0.9852511090625745.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[26]	valid_0's binary_logloss: 0.133219


[I 2024-03-20 05:40:12,213] Trial 1 finished with value: 0.9772393321618044 and parameters: {'max_depth': 4, 'num_leaves': 56, 'learning_rate': 0.04043208250925138, 'feature_fraction': 0.2429826572537338, 'bagging_fraction': 0.1278056398782444, 'min_child_samples': 7, 'min_data_in_leaf': 1, 'lambda_l1': 0.8535210283492536, 'lambda_l2': 2.0965914697183553e-06, 'min_split_gain': 0.004571036114657694, 'bagging_freq': 6}. Best is trial 0 with value: 0.9852511090625745.


Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:40:16,152] Trial 2 finished with value: 0.9835324308677782 and parameters: {'max_depth': 5, 'num_leaves': 81, 'learning_rate': 0.03312828002245941, 'feature_fraction': 0.6369072860071773, 'bagging_fraction': 0.27145669713951, 'min_child_samples': 11, 'min_data_in_leaf': 2, 'lambda_l1': 0.03782922527358656, 'lambda_l2': 5.877535151705891e-05, 'min_split_gain': 0.0003852429338154961, 'bagging_freq': 4}. Best is trial 0 with value: 0.9852511090625745.


Early stopping, best iteration is:
[2]	valid_0's binary_logloss: 0.128925
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.128923


[I 2024-03-20 05:40:20,078] Trial 3 finished with value: 0.9835292204176306 and parameters: {'max_depth': 6, 'num_leaves': 34, 'learning_rate': 0.029674589782641144, 'feature_fraction': 0.37557002654351024, 'bagging_fraction': 0.2836637080264471, 'min_child_samples': 2, 'min_data_in_leaf': 4, 'lambda_l1': 0.29594872500177677, 'lambda_l2': 0.00037906356689000153, 'min_split_gain': 0.0020490432234271734, 'bagging_freq': 1}. Best is trial 0 with value: 0.9852511090625745.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[47]	valid_0's binary_logloss: 0.133944


[I 2024-03-20 05:40:25,731] Trial 4 finished with value: 0.9789452062227038 and parameters: {'max_depth': 7, 'num_leaves': 92, 'learning_rate': 0.03959287073322788, 'feature_fraction': 0.14391024010403136, 'bagging_fraction': 0.6192964471458448, 'min_child_samples': 20, 'min_data_in_leaf': 3, 'lambda_l1': 1.311138708561964, 'lambda_l2': 1.922196575127191e-05, 'min_split_gain': 0.03924598309366541, 'bagging_freq': 6}. Best is trial 0 with value: 0.9852511090625745.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[26]	valid_0's binary_logloss: 0.133043


[I 2024-03-20 05:40:31,046] Trial 5 finished with value: 0.979890086323477 and parameters: {'max_depth': 8, 'num_leaves': 64, 'learning_rate': 0.09185351862416916, 'feature_fraction': 0.24272688114827803, 'bagging_fraction': 0.35736450291770017, 'min_child_samples': 19, 'min_data_in_leaf': 1, 'lambda_l1': 0.07389985663306384, 'lambda_l2': 0.013457978772995597, 'min_split_gain': 0.01754821187616311, 'bagging_freq': 9}. Best is trial 0 with value: 0.9852511090625745.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[26]	valid_0's binary_logloss: 0.133198


[I 2024-03-20 05:40:35,078] Trial 6 finished with value: 0.9767896304313525 and parameters: {'max_depth': 4, 'num_leaves': 96, 'learning_rate': 0.08565502170296614, 'feature_fraction': 0.22218957060368805, 'bagging_fraction': 0.5258593872956169, 'min_child_samples': 21, 'min_data_in_leaf': 2, 'lambda_l1': 7.441708478203351e-05, 'lambda_l2': 3.9761818344157747, 'min_split_gain': 0.006073861953868727, 'bagging_freq': 9}. Best is trial 0 with value: 0.9852511090625745.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[20]	valid_0's binary_logloss: 0.128849


[I 2024-03-20 05:40:40,944] Trial 7 finished with value: 0.9818155011701418 and parameters: {'max_depth': 4, 'num_leaves': 51, 'learning_rate': 0.018825777427194188, 'feature_fraction': 0.9779249808594653, 'bagging_fraction': 0.8869637515497494, 'min_child_samples': 6, 'min_data_in_leaf': 1, 'lambda_l1': 0.0007827687651488105, 'lambda_l2': 1.3925081344429534e-06, 'min_split_gain': 0.0011582902987542114, 'bagging_freq': 10}. Best is trial 0 with value: 0.9852511090625745.


Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:40:45,790] Trial 8 finished with value: 0.9852807658066776 and parameters: {'max_depth': 5, 'num_leaves': 30, 'learning_rate': 0.05417829813079787, 'feature_fraction': 0.6904564038291795, 'bagging_fraction': 0.631991721855314, 'min_child_samples': 23, 'min_data_in_leaf': 4, 'lambda_l1': 0.0031479968195406226, 'lambda_l2': 1.682632176291287e-06, 'min_split_gain': 0.08314442791123393, 'bagging_freq': 5}. Best is trial 8 with value: 0.9852807658066776.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.128815
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.130109


[I 2024-03-20 05:40:51,133] Trial 9 finished with value: 0.9774416610017288 and parameters: {'max_depth': 8, 'num_leaves': 21, 'learning_rate': 0.056818732931108826, 'feature_fraction': 0.6075936539855981, 'bagging_fraction': 0.9915664925770354, 'min_child_samples': 4, 'min_data_in_leaf': 1, 'lambda_l1': 0.4832915288359298, 'lambda_l2': 7.235695019581464e-08, 'min_split_gain': 0.011306289103237869, 'bagging_freq': 10}. Best is trial 8 with value: 0.9852807658066776.


[LightGBM] [Info] Number of positive: 16177, number of negative: 4328735
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.068657 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1531
[LightGBM] [Info] Number of data points in the train set: 4344912, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003723 -> initscore=-5.589440
[LightGBM] [Info] Start training from score -5.589440
Training until validation scores don't improve for 50 rounds
[100]	valid_0's binary_logloss: 0.0034184
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.0034184
 - fold2 - 0.9970


[I 2024-03-20 05:41:01,545] A new study created in memory with name: no-name-9347366f-dbd1-4067-9fcb-218229c3b393


Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:41:06,103] Trial 0 finished with value: 0.9854218326843767 and parameters: {'max_depth': 7, 'num_leaves': 86, 'learning_rate': 0.0817346637066886, 'feature_fraction': 0.8535673566209931, 'bagging_fraction': 0.30721471066048195, 'min_child_samples': 21, 'min_data_in_leaf': 1, 'lambda_l1': 7.719490447860812e-07, 'lambda_l2': 4.5920502699539315e-07, 'min_split_gain': 0.01480811940202855, 'bagging_freq': 1}. Best is trial 0 with value: 0.9854218326843767.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.128625
Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:41:10,258] Trial 1 finished with value: 0.969311508460736 and parameters: {'max_depth': 5, 'num_leaves': 8, 'learning_rate': 0.08227353980640674, 'feature_fraction': 0.773819603853611, 'bagging_fraction': 0.5737738699052018, 'min_child_samples': 25, 'min_data_in_leaf': 1, 'lambda_l1': 3.112281412762012e-07, 'lambda_l2': 1.575712564060986, 'min_split_gain': 0.0004145279146212854, 'bagging_freq': 2}. Best is trial 0 with value: 0.9854218326843767.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.128853
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[35]	valid_0's binary_logloss: 0.133356


[I 2024-03-20 05:41:15,646] Trial 2 finished with value: 0.9771165844720635 and parameters: {'max_depth': 7, 'num_leaves': 94, 'learning_rate': 0.06291683577507445, 'feature_fraction': 0.18699655597370227, 'bagging_fraction': 0.8237656511968903, 'min_child_samples': 12, 'min_data_in_leaf': 3, 'lambda_l1': 0.004837607576131123, 'lambda_l2': 2.527034816330811, 'min_split_gain': 0.022761358400149458, 'bagging_freq': 10}. Best is trial 0 with value: 0.9854218326843767.


Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:41:18,986] Trial 3 finished with value: 0.9844313831912851 and parameters: {'max_depth': 5, 'num_leaves': 17, 'learning_rate': 0.05213963301564274, 'feature_fraction': 0.7122879361164854, 'bagging_fraction': 0.2757363800563751, 'min_child_samples': 12, 'min_data_in_leaf': 3, 'lambda_l1': 6.776498944626922e-05, 'lambda_l2': 0.42703073059053415, 'min_split_gain': 0.009563290340714585, 'bagging_freq': 8}. Best is trial 0 with value: 0.9854218326843767.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.128831
Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:41:23,025] Trial 4 finished with value: 0.9840208140374054 and parameters: {'max_depth': 4, 'num_leaves': 93, 'learning_rate': 0.01398013660653421, 'feature_fraction': 0.7754531172007164, 'bagging_fraction': 0.8555568910218883, 'min_child_samples': 10, 'min_data_in_leaf': 4, 'lambda_l1': 6.332590666509822e-05, 'lambda_l2': 2.538968738855862e-08, 'min_split_gain': 0.0012802994549751098, 'bagging_freq': 1}. Best is trial 0 with value: 0.9854218326843767.


Early stopping, best iteration is:
[2]	valid_0's binary_logloss: 0.128744
Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:41:26,932] Trial 5 finished with value: 0.9760027849664724 and parameters: {'max_depth': 4, 'num_leaves': 76, 'learning_rate': 0.0826774512740098, 'feature_fraction': 0.4040353675919893, 'bagging_fraction': 0.42758537689317605, 'min_child_samples': 21, 'min_data_in_leaf': 4, 'lambda_l1': 0.006502225474174752, 'lambda_l2': 4.864176249427763, 'min_split_gain': 0.00031758739393781513, 'bagging_freq': 1}. Best is trial 0 with value: 0.9854218326843767.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.130154
Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:41:31,029] Trial 6 finished with value: 0.9771573997759228 and parameters: {'max_depth': 8, 'num_leaves': 27, 'learning_rate': 0.0174541831739854, 'feature_fraction': 0.8313669689030461, 'bagging_fraction': 0.4236189777305203, 'min_child_samples': 15, 'min_data_in_leaf': 1, 'lambda_l1': 0.0023297999164704724, 'lambda_l2': 2.871333806734671, 'min_split_gain': 0.0003395140624926228, 'bagging_freq': 2}. Best is trial 0 with value: 0.9854218326843767.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.128802
Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:41:34,346] Trial 7 finished with value: 0.9704631951019685 and parameters: {'max_depth': 8, 'num_leaves': 77, 'learning_rate': 0.012202899312167812, 'feature_fraction': 0.8641802307276373, 'bagging_fraction': 0.11338271694892738, 'min_child_samples': 20, 'min_data_in_leaf': 3, 'lambda_l1': 0.004460848632695317, 'lambda_l2': 3.5484815941576544, 'min_split_gain': 0.01419628914386047, 'bagging_freq': 1}. Best is trial 0 with value: 0.9854218326843767.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.128887
Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:41:38,386] Trial 8 finished with value: 0.9840555255842967 and parameters: {'max_depth': 4, 'num_leaves': 43, 'learning_rate': 0.04178244374236702, 'feature_fraction': 0.8263495780895942, 'bagging_fraction': 0.49762969163672793, 'min_child_samples': 3, 'min_data_in_leaf': 2, 'lambda_l1': 0.00954367491988547, 'lambda_l2': 0.008658360344409186, 'min_split_gain': 0.012862204289113178, 'bagging_freq': 7}. Best is trial 0 with value: 0.9854218326843767.


Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.128749
Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:41:42,419] Trial 9 finished with value: 0.976015435924938 and parameters: {'max_depth': 8, 'num_leaves': 44, 'learning_rate': 0.022634513996213367, 'feature_fraction': 0.6261051642975975, 'bagging_fraction': 0.5456289789845342, 'min_child_samples': 3, 'min_data_in_leaf': 1, 'lambda_l1': 2.6668903612563137, 'lambda_l2': 0.014031891929626276, 'min_split_gain': 0.04890857662364207, 'bagging_freq': 10}. Best is trial 0 with value: 0.9854218326843767.


Early stopping, best iteration is:
[2]	valid_0's binary_logloss: 0.128757
[LightGBM] [Info] Number of positive: 16311, number of negative: 4328601
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067743 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1532
[LightGBM] [Info] Number of data points in the train set: 4344912, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003754 -> initscore=-5.581160
[LightGBM] [Info] Start training from score -5.581160
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.0108482
 - fold3 - 0.9835


[I 2024-03-20 05:41:48,388] A new study created in memory with name: no-name-540f3cee-37a2-4a8a-ab60-35351b0d13a5


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[35]	valid_0's binary_logloss: 0.13381


[I 2024-03-20 05:41:53,259] Trial 0 finished with value: 0.967394144108714 and parameters: {'max_depth': 5, 'num_leaves': 4, 'learning_rate': 0.0517850894805237, 'feature_fraction': 0.19753424685924822, 'bagging_fraction': 0.9735055411407932, 'min_child_samples': 10, 'min_data_in_leaf': 2, 'lambda_l1': 0.4762277196807044, 'lambda_l2': 2.7410187153672396, 'min_split_gain': 0.08114010355039201, 'bagging_freq': 9}. Best is trial 0 with value: 0.967394144108714.


Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:41:57,944] Trial 1 finished with value: 0.9741669209148462 and parameters: {'max_depth': 8, 'num_leaves': 43, 'learning_rate': 0.020585079413942938, 'feature_fraction': 0.49591454838142185, 'bagging_fraction': 0.6743624278332951, 'min_child_samples': 8, 'min_data_in_leaf': 3, 'lambda_l1': 6.370612468136081e-06, 'lambda_l2': 0.019553942328708895, 'min_split_gain': 0.0009780195695377834, 'bagging_freq': 9}. Best is trial 1 with value: 0.9741669209148462.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.130118
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[47]	valid_0's binary_logloss: 0.134093


[I 2024-03-20 05:42:02,895] Trial 2 finished with value: 0.9783409363339441 and parameters: {'max_depth': 5, 'num_leaves': 16, 'learning_rate': 0.04048238454050914, 'feature_fraction': 0.13905529009320242, 'bagging_fraction': 0.14860192902041106, 'min_child_samples': 15, 'min_data_in_leaf': 4, 'lambda_l1': 3.837156816913534e-08, 'lambda_l2': 4.7640487468544064e-05, 'min_split_gain': 0.0001822508440588827, 'bagging_freq': 4}. Best is trial 2 with value: 0.9783409363339441.


Training until validation scores don't improve for 50 rounds
[100]	valid_0's binary_logloss: 0.128797
Early stopping, best iteration is:
[50]	valid_0's binary_logloss: 0.128792


[I 2024-03-20 05:42:11,484] Trial 3 finished with value: 0.9861035218761939 and parameters: {'max_depth': 7, 'num_leaves': 89, 'learning_rate': 0.034413597976852006, 'feature_fraction': 0.9807319699658906, 'bagging_fraction': 0.774737849523241, 'min_child_samples': 7, 'min_data_in_leaf': 3, 'lambda_l1': 1.6263125723424326e-08, 'lambda_l2': 6.402644331767288e-05, 'min_split_gain': 0.0002991343555479293, 'bagging_freq': 5}. Best is trial 3 with value: 0.9861035218761939.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.130099


[I 2024-03-20 05:42:16,975] Trial 4 finished with value: 0.9807797602236187 and parameters: {'max_depth': 7, 'num_leaves': 64, 'learning_rate': 0.004935814436507654, 'feature_fraction': 0.4817646319810334, 'bagging_fraction': 0.6635981349091956, 'min_child_samples': 23, 'min_data_in_leaf': 4, 'lambda_l1': 2.995389601750742e-06, 'lambda_l2': 0.03232153591628277, 'min_split_gain': 0.0010690781928359102, 'bagging_freq': 2}. Best is trial 3 with value: 0.9861035218761939.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.129063


[I 2024-03-20 05:42:21,122] Trial 5 finished with value: 0.9639395155363163 and parameters: {'max_depth': 6, 'num_leaves': 8, 'learning_rate': 0.07380694444551229, 'feature_fraction': 0.7559906310439949, 'bagging_fraction': 0.6987049312253792, 'min_child_samples': 24, 'min_data_in_leaf': 1, 'lambda_l1': 1.0315958314282467, 'lambda_l2': 4.155712604648093e-05, 'min_split_gain': 0.018205332360120677, 'bagging_freq': 2}. Best is trial 3 with value: 0.9861035218761939.


Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:42:24,723] Trial 6 finished with value: 0.9675907024325602 and parameters: {'max_depth': 6, 'num_leaves': 4, 'learning_rate': 0.07141403178190746, 'feature_fraction': 0.9532095812680926, 'bagging_fraction': 0.5118659055468018, 'min_child_samples': 9, 'min_data_in_leaf': 1, 'lambda_l1': 2.1273077135839604, 'lambda_l2': 0.0029402464365493913, 'min_split_gain': 0.0006966015146465477, 'bagging_freq': 8}. Best is trial 3 with value: 0.9861035218761939.


Early stopping, best iteration is:
[11]	valid_0's binary_logloss: 0.129207
Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:42:29,307] Trial 7 finished with value: 0.9742927738055881 and parameters: {'max_depth': 7, 'num_leaves': 27, 'learning_rate': 0.09101174890244437, 'feature_fraction': 0.3720372074974685, 'bagging_fraction': 0.8849843082908866, 'min_child_samples': 17, 'min_data_in_leaf': 3, 'lambda_l1': 0.003010932680333237, 'lambda_l2': 9.894644323336319e-06, 'min_split_gain': 0.0003280198531086194, 'bagging_freq': 8}. Best is trial 3 with value: 0.9861035218761939.


Early stopping, best iteration is:
[2]	valid_0's binary_logloss: 0.128905
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.130298


[I 2024-03-20 05:42:34,440] Trial 8 finished with value: 0.9793423162173157 and parameters: {'max_depth': 4, 'num_leaves': 63, 'learning_rate': 0.060936219674752774, 'feature_fraction': 0.569033663894265, 'bagging_fraction': 0.703694189090676, 'min_child_samples': 23, 'min_data_in_leaf': 1, 'lambda_l1': 0.2427183146112743, 'lambda_l2': 0.000210592290306184, 'min_split_gain': 0.06939371814735258, 'bagging_freq': 2}. Best is trial 3 with value: 0.9861035218761939.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[35]	valid_0's binary_logloss: 0.133712


[I 2024-03-20 05:42:38,813] Trial 9 finished with value: 0.9814432945178775 and parameters: {'max_depth': 5, 'num_leaves': 71, 'learning_rate': 0.020136356104536988, 'feature_fraction': 0.15664572232754478, 'bagging_fraction': 0.10601639788379856, 'min_child_samples': 5, 'min_data_in_leaf': 1, 'lambda_l1': 0.012330895166422984, 'lambda_l2': 0.2107982534828656, 'min_split_gain': 0.0008375883043988781, 'bagging_freq': 1}. Best is trial 3 with value: 0.9861035218761939.


[LightGBM] [Info] Number of positive: 16037, number of negative: 4328875
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066744 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 4344912, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003691 -> initscore=-5.598164
[LightGBM] [Info] Start training from score -5.598164
Training until validation scores don't improve for 50 rounds
[100]	valid_0's binary_logloss: 0.00281273
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.00281273
 - fold4 - 0.9988


[I 2024-03-20 05:42:49,714] A new study created in memory with name: no-name-2df52dd7-7d95-4e00-8388-b6fa35aec287


Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:42:54,238] Trial 0 finished with value: 0.9704184564118846 and parameters: {'max_depth': 8, 'num_leaves': 86, 'learning_rate': 0.02705796714787235, 'feature_fraction': 0.6455943325015605, 'bagging_fraction': 0.6538539440672636, 'min_child_samples': 14, 'min_data_in_leaf': 4, 'lambda_l1': 1.9332225667343186, 'lambda_l2': 1.198382810378367e-06, 'min_split_gain': 0.00012536059060031549, 'bagging_freq': 9}. Best is trial 0 with value: 0.9704184564118846.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.12895
Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:42:59,025] Trial 1 finished with value: 0.9723059112416114 and parameters: {'max_depth': 5, 'num_leaves': 18, 'learning_rate': 0.003461540918859745, 'feature_fraction': 0.7410943109756366, 'bagging_fraction': 0.4442678532727629, 'min_child_samples': 5, 'min_data_in_leaf': 4, 'lambda_l1': 3.4018921808910114e-07, 'lambda_l2': 2.361184607917412e-07, 'min_split_gain': 0.000273691006237045, 'bagging_freq': 1}. Best is trial 1 with value: 0.9723059112416114.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.128915
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.1289


[I 2024-03-20 05:43:05,308] Trial 2 finished with value: 0.9806011120450525 and parameters: {'max_depth': 5, 'num_leaves': 69, 'learning_rate': 0.047207832535270715, 'feature_fraction': 0.6920299671180382, 'bagging_fraction': 0.9898709682681982, 'min_child_samples': 21, 'min_data_in_leaf': 3, 'lambda_l1': 1.313337203202679e-08, 'lambda_l2': 0.001199132359370741, 'min_split_gain': 0.0015441096464804327, 'bagging_freq': 3}. Best is trial 2 with value: 0.9806011120450525.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[11]	valid_0's binary_logloss: 0.128784


[I 2024-03-20 05:43:10,096] Trial 3 finished with value: 0.9861656319271503 and parameters: {'max_depth': 6, 'num_leaves': 65, 'learning_rate': 0.05465138587820461, 'feature_fraction': 0.9392666622259336, 'bagging_fraction': 0.25173416333674, 'min_child_samples': 8, 'min_data_in_leaf': 4, 'lambda_l1': 1.8059402002920946e-07, 'lambda_l2': 0.0002767524467166091, 'min_split_gain': 0.007614241374595518, 'bagging_freq': 4}. Best is trial 3 with value: 0.9861656319271503.


Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:43:13,355] Trial 4 finished with value: 0.9770735326865363 and parameters: {'max_depth': 4, 'num_leaves': 89, 'learning_rate': 0.06845230395805685, 'feature_fraction': 0.5154463790267569, 'bagging_fraction': 0.2172050195881215, 'min_child_samples': 4, 'min_data_in_leaf': 1, 'lambda_l1': 8.450127385478773e-06, 'lambda_l2': 0.4904575360780777, 'min_split_gain': 0.005028449352573066, 'bagging_freq': 9}. Best is trial 3 with value: 0.9861656319271503.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.130256
Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:43:16,634] Trial 5 finished with value: 0.9703199052581146 and parameters: {'max_depth': 8, 'num_leaves': 44, 'learning_rate': 0.04616237140396398, 'feature_fraction': 0.5744635925600599, 'bagging_fraction': 0.16263501258002058, 'min_child_samples': 1, 'min_data_in_leaf': 4, 'lambda_l1': 0.0009705633330306041, 'lambda_l2': 5.998500594289373e-07, 'min_split_gain': 0.005644722632832368, 'bagging_freq': 4}. Best is trial 3 with value: 0.9861656319271503.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.130063
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[11]	valid_0's binary_logloss: 0.129047


[I 2024-03-20 05:43:21,585] Trial 6 finished with value: 0.9782069097394983 and parameters: {'max_depth': 4, 'num_leaves': 15, 'learning_rate': 0.029566835814131557, 'feature_fraction': 0.9648798187962058, 'bagging_fraction': 0.7286826289078313, 'min_child_samples': 24, 'min_data_in_leaf': 4, 'lambda_l1': 0.06318483585455757, 'lambda_l2': 0.5775857562532368, 'min_split_gain': 0.018571256441806384, 'bagging_freq': 7}. Best is trial 3 with value: 0.9861656319271503.


Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:43:27,228] Trial 7 finished with value: 0.9806183958220257 and parameters: {'max_depth': 6, 'num_leaves': 59, 'learning_rate': 0.040010273184827, 'feature_fraction': 0.8003071540512136, 'bagging_fraction': 0.8468859148402039, 'min_child_samples': 18, 'min_data_in_leaf': 3, 'lambda_l1': 2.3182835577167392e-06, 'lambda_l2': 9.802229363308487e-07, 'min_split_gain': 0.017010778182255108, 'bagging_freq': 5}. Best is trial 3 with value: 0.9861656319271503.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.128837
Training until validation scores don't improve for 50 rounds


[I 2024-03-20 05:43:32,019] Trial 8 finished with value: 0.9765446971736508 and parameters: {'max_depth': 7, 'num_leaves': 65, 'learning_rate': 0.08788056211108848, 'feature_fraction': 0.7264600910430782, 'bagging_fraction': 0.5759790229277664, 'min_child_samples': 22, 'min_data_in_leaf': 4, 'lambda_l1': 0.00047537800634342577, 'lambda_l2': 1.5467102660566567e-08, 'min_split_gain': 0.0019344255461316556, 'bagging_freq': 4}. Best is trial 3 with value: 0.9861656319271503.


Early stopping, best iteration is:
[3]	valid_0's binary_logloss: 0.12882
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[11]	valid_0's binary_logloss: 0.128814


[I 2024-03-20 05:43:36,998] Trial 9 finished with value: 0.9710756124562612 and parameters: {'max_depth': 7, 'num_leaves': 51, 'learning_rate': 0.03535917566943197, 'feature_fraction': 0.9561999016312624, 'bagging_fraction': 0.7272600792478007, 'min_child_samples': 13, 'min_data_in_leaf': 3, 'lambda_l1': 0.0005875263401978008, 'lambda_l2': 5.4149808710628045e-05, 'min_split_gain': 0.0018382408211251647, 'bagging_freq': 4}. Best is trial 3 with value: 0.9861656319271503.


[LightGBM] [Info] Number of positive: 16227, number of negative: 4328685
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067517 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1533
[LightGBM] [Info] Number of data points in the train set: 4344912, number of used features: 17
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003735 -> initscore=-5.586343
[LightGBM] [Info] Start training from score -5.586343
Training until validation scores don't improve for 50 rounds
[100]	valid_0's binary_logloss: 0.0129304
Did not meet early stopping. Best iteration is:
[81]	valid_0's binary_logloss: 0.00815623
 - fold5 - 0.9735
FINISH: CV Score: 0.9881


In [17]:
def train_catboost(X, y, cv, model_path=[], params={}, verbose=100):

    models = []
    n_records = len(X)
    oof_pred = np.zeros((n_records), dtype=np.float32)

    def objective(trial):
        cb_params = {
            "iterations": trial.suggest_int('iterations', 2, 6),
            "objective": "Logloss",
            "eval_metric": "Logloss",
            "boosting_type": "Plain",
            "boost_from_average": False,
            "random_seed": 42,
            "max_depth": trial.suggest_int('max_depth', 4, 8),
            "learning_rate": trial.suggest_float("learning_rate", 0.001, 0.1),
            "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 100.0, log=True),
            "subsample": trial.suggest_float("subsample", 0.1, 1.0),
            "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.1, 1.0),
            "grow_policy": "Lossguide",
        }


        model = CatBoost(cb_params)

        model.fit(c_train)

        pred_y = model.predict(c_valid)
        auc = roc_auc_score(y_valid, pred_y)

        return auc

    for i, (idx_train, idx_valid) in enumerate(cv):
        x_train, y_train = X[idx_train], y[idx_train]
        x_valid, y_valid = X[idx_valid], y[idx_valid]
        c_train = Pool(x_train, label=y_train)
        c_valid = Pool(x_valid, label=y_valid)
        
        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=10)

        best_params = study.best_params
        model = CatBoostClassifier(**best_params)

        model.fit(c_train)

        c
        oof_pred[idx_valid] = pred_y
        models.append(model)
        score = roc_auc_score(y_valid, pred_y)
        print(f" - fold{i + 1} - {score:.4f}")

    score = roc_auc_score(y, oof_pred)

    print("=" * 50)
    print(f"FINISH: CV Score: {score:.4f}")
    return score, oof_pred, models

In [18]:
cb_score, cb_oof_pred, cb_models = train_catboost(X, y=y, cv=cv_list)

[I 2024-03-20 05:44:02,791] A new study created in memory with name: no-name-6eb59d0d-2ab8-4776-a0df-4229d99931d8


0:	learn: 0.6580338	total: 179ms	remaining: 358ms
1:	learn: 0.6250497	total: 296ms	remaining: 148ms
2:	learn: 0.5946475	total: 330ms	remaining: 0us


[I 2024-03-20 05:44:03,932] Trial 0 finished with value: 0.966357770072902 and parameters: {'iterations': 3, 'max_depth': 6, 'learning_rate': 0.03622715393576466, 'reg_lambda': 4.845627015197551e-06, 'subsample': 0.569992941751225, 'colsample_bylevel': 0.13615863016378052}. Best is trial 0 with value: 0.966357770072902.


0:	learn: 0.6880220	total: 168ms	remaining: 335ms
1:	learn: 0.6829514	total: 282ms	remaining: 141ms


[I 2024-03-20 05:44:04,978] Trial 1 finished with value: 0.9694402942134253 and parameters: {'iterations': 3, 'max_depth': 6, 'learning_rate': 0.005145151282299943, 'reg_lambda': 4.573850429723252e-08, 'subsample': 0.1537637127281278, 'colsample_bylevel': 0.7988603655483268}. Best is trial 1 with value: 0.9694402942134253.


2:	learn: 0.6779345	total: 389ms	remaining: 0us
0:	learn: 0.6225574	total: 360ms	remaining: 360ms


[I 2024-03-20 05:44:06,097] Trial 2 finished with value: 0.9616880408976787 and parameters: {'iterations': 2, 'max_depth': 6, 'learning_rate': 0.07345024568800422, 'reg_lambda': 9.939621209062241e-08, 'subsample': 0.949558793940755, 'colsample_bylevel': 0.8434006670583232}. Best is trial 1 with value: 0.9694402942134253.


1:	learn: 0.5617023	total: 623ms	remaining: 0us
0:	learn: 0.6811327	total: 58.6ms	remaining: 235ms
1:	learn: 0.6692863	total: 154ms	remaining: 231ms
2:	learn: 0.6577192	total: 355ms	remaining: 237ms
3:	learn: 0.6464324	total: 559ms	remaining: 140ms


[I 2024-03-20 05:44:07,318] Trial 3 finished with value: 0.9754053036556136 and parameters: {'iterations': 5, 'max_depth': 4, 'learning_rate': 0.012242737809631093, 'reg_lambda': 5.8018156634056426e-08, 'subsample': 0.9460369793600333, 'colsample_bylevel': 0.2912089974895725}. Best is trial 3 with value: 0.9754053036556136.


4:	learn: 0.6354104	total: 762ms	remaining: 0us
0:	learn: 0.6604224	total: 91.9ms	remaining: 184ms
1:	learn: 0.6298330	total: 184ms	remaining: 92ms
2:	learn: 0.6011877	total: 274ms	remaining: 0us


[I 2024-03-20 05:44:08,009] Trial 4 finished with value: 0.9583313258483834 and parameters: {'iterations': 3, 'max_depth': 8, 'learning_rate': 0.033334308574016765, 'reg_lambda': 4.2576228955320634e-08, 'subsample': 0.4431093771906668, 'colsample_bylevel': 0.8905456183360395}. Best is trial 3 with value: 0.9754053036556136.


0:	learn: 0.6169103	total: 44.2ms	remaining: 88.3ms
1:	learn: 0.5512938	total: 110ms	remaining: 54.8ms


[I 2024-03-20 05:44:08,689] Trial 5 finished with value: 0.96628147241392 and parameters: {'iterations': 3, 'max_depth': 4, 'learning_rate': 0.08051130146335858, 'reg_lambda': 0.0006186782652117521, 'subsample': 0.5004378728925183, 'colsample_bylevel': 0.31773938197011053}. Best is trial 3 with value: 0.9754053036556136.


2:	learn: 0.4947331	total: 242ms	remaining: 0us
0:	learn: 0.6424030	total: 192ms	remaining: 961ms
1:	learn: 0.5967573	total: 374ms	remaining: 748ms
2:	learn: 0.5554234	total: 561ms	remaining: 561ms
3:	learn: 0.5178515	total: 752ms	remaining: 376ms
4:	learn: 0.4835343	total: 941ms	remaining: 188ms


[I 2024-03-20 05:44:10,294] Trial 6 finished with value: 0.985889447988592 and parameters: {'iterations': 6, 'max_depth': 4, 'learning_rate': 0.05221425171828699, 'reg_lambda': 1.9432361960037734, 'subsample': 0.5183614219298069, 'colsample_bylevel': 0.8511629421888155}. Best is trial 6 with value: 0.985889447988592.


5:	learn: 0.4521226	total: 1.14s	remaining: 0us
0:	learn: 0.6608119	total: 222ms	remaining: 222ms


[I 2024-03-20 05:44:11,192] Trial 7 finished with value: 0.9862858121635905 and parameters: {'iterations': 2, 'max_depth': 5, 'learning_rate': 0.03292636073281975, 'reg_lambda': 0.4160898902178385, 'subsample': 0.450024379192106, 'colsample_bylevel': 0.9558765528968268}. Best is trial 7 with value: 0.9862858121635905.


1:	learn: 0.6305870	total: 438ms	remaining: 0us
0:	learn: 0.6545968	total: 51.7ms	remaining: 155ms
1:	learn: 0.6186025	total: 224ms	remaining: 224ms
2:	learn: 0.5852881	total: 392ms	remaining: 131ms
3:	learn: 0.5546251	total: 578ms	remaining: 0us


[I 2024-03-20 05:44:12,219] Trial 8 finished with value: 0.9714049286160047 and parameters: {'iterations': 4, 'max_depth': 4, 'learning_rate': 0.03985113430073847, 'reg_lambda': 0.015749574624655975, 'subsample': 0.7874457386982546, 'colsample_bylevel': 0.26080595275755336}. Best is trial 7 with value: 0.9862858121635905.


0:	learn: 0.6182265	total: 38.9ms	remaining: 194ms
1:	learn: 0.5535746	total: 88.6ms	remaining: 177ms
2:	learn: 0.4983733	total: 192ms	remaining: 192ms
3:	learn: 0.4495934	total: 295ms	remaining: 148ms
4:	learn: 0.4067779	total: 390ms	remaining: 78ms
5:	learn: 0.3693399	total: 492ms	remaining: 0us


[I 2024-03-20 05:44:13,169] Trial 9 finished with value: 0.9744040423527868 and parameters: {'iterations': 6, 'max_depth': 4, 'learning_rate': 0.0790648425779014, 'reg_lambda': 0.01547148991256482, 'subsample': 0.26246931548999775, 'colsample_bylevel': 0.17753601009028597}. Best is trial 7 with value: 0.9862858121635905.


0:	learn: 0.6608469	total: 111ms	remaining: 111ms
1:	learn: 0.6306251	total: 207ms	remaining: 0us
 - fold1 - 0.9150


[I 2024-03-20 05:44:31,749] A new study created in memory with name: no-name-834c37cb-14db-4e01-93f2-5088f5843b72


0:	learn: 0.6752138	total: 335ms	remaining: 1s
1:	learn: 0.6579342	total: 637ms	remaining: 637ms
2:	learn: 0.6412716	total: 947ms	remaining: 316ms


[I 2024-03-20 05:44:33,483] Trial 0 finished with value: 0.977577946955999 and parameters: {'iterations': 4, 'max_depth': 5, 'learning_rate': 0.018123047361472477, 'reg_lambda': 0.00010924532292497749, 'subsample': 0.8311014176823753, 'colsample_bylevel': 0.7796834957915723}. Best is trial 0 with value: 0.977577946955999.


3:	learn: 0.6251790	total: 1.25s	remaining: 0us
0:	learn: 0.6509351	total: 182ms	remaining: 730ms
1:	learn: 0.6123611	total: 367ms	remaining: 551ms
2:	learn: 0.5767589	total: 496ms	remaining: 331ms
3:	learn: 0.5439214	total: 670ms	remaining: 168ms


[I 2024-03-20 05:44:34,784] Trial 1 finished with value: 0.9549834676895629 and parameters: {'iterations': 5, 'max_depth': 7, 'learning_rate': 0.0432481556658053, 'reg_lambda': 2.5794592312776884e-08, 'subsample': 0.5344234379546441, 'colsample_bylevel': 0.7229240929477218}. Best is trial 0 with value: 0.977577946955999.


4:	learn: 0.5135217	total: 855ms	remaining: 0us
0:	learn: 0.6657176	total: 203ms	remaining: 1.01s
1:	learn: 0.6397979	total: 543ms	remaining: 1.09s
2:	learn: 0.6152647	total: 724ms	remaining: 724ms
3:	learn: 0.5919958	total: 986ms	remaining: 493ms
4:	learn: 0.5699091	total: 1.32s	remaining: 264ms


[I 2024-03-20 05:44:36,891] Trial 2 finished with value: 0.9853831270107263 and parameters: {'iterations': 6, 'max_depth': 6, 'learning_rate': 0.027860901913870466, 'reg_lambda': 0.00022149867576944066, 'subsample': 0.7260260101971551, 'colsample_bylevel': 0.9861680616706482}. Best is trial 2 with value: 0.9853831270107263.


5:	learn: 0.5489015	total: 1.65s	remaining: 0us
0:	learn: 0.6168204	total: 147ms	remaining: 589ms
1:	learn: 0.5515338	total: 284ms	remaining: 426ms
2:	learn: 0.4953104	total: 394ms	remaining: 263ms
3:	learn: 0.4465625	total: 612ms	remaining: 153ms
4:	learn: 0.4035647	total: 691ms	remaining: 0us


[I 2024-03-20 05:44:38,042] Trial 3 finished with value: 0.9650029622023729 and parameters: {'iterations': 5, 'max_depth': 7, 'learning_rate': 0.08004135175929981, 'reg_lambda': 1.0009989454261798, 'subsample': 0.46798143919821883, 'colsample_bylevel': 0.5588239017940853}. Best is trial 2 with value: 0.9853831270107263.


0:	learn: 0.6277032	total: 237ms	remaining: 237ms


[I 2024-03-20 05:44:38,959] Trial 4 finished with value: 0.9713194760733463 and parameters: {'iterations': 2, 'max_depth': 6, 'learning_rate': 0.06789683682080522, 'reg_lambda': 7.91538282464134, 'subsample': 0.5296003763191351, 'colsample_bylevel': 0.5825092893285987}. Best is trial 2 with value: 0.9853831270107263.


1:	learn: 0.5707217	total: 468ms	remaining: 0us
0:	learn: 0.6864403	total: 106ms	remaining: 106ms
1:	learn: 0.6798286	total: 248ms	remaining: 0us


[I 2024-03-20 05:44:39,659] Trial 5 finished with value: 0.9686385945649848 and parameters: {'iterations': 2, 'max_depth': 8, 'learning_rate': 0.006742659833144009, 'reg_lambda': 0.11563766364478664, 'subsample': 0.16142548148891697, 'colsample_bylevel': 0.4422090409311584}. Best is trial 2 with value: 0.9853831270107263.


0:	learn: 0.6117068	total: 191ms	remaining: 956ms
1:	learn: 0.5430744	total: 372ms	remaining: 745ms
2:	learn: 0.4843297	total: 557ms	remaining: 557ms
3:	learn: 0.4336257	total: 746ms	remaining: 373ms
4:	learn: 0.3898981	total: 899ms	remaining: 180ms
5:	learn: 0.3511251	total: 1.08s	remaining: 0us


[I 2024-03-20 05:44:41,214] Trial 6 finished with value: 0.9721685151209482 and parameters: {'iterations': 6, 'max_depth': 5, 'learning_rate': 0.08556023119940104, 'reg_lambda': 0.05608127015508775, 'subsample': 0.6117651932542418, 'colsample_bylevel': 0.26535710811130575}. Best is trial 2 with value: 0.9853831270107263.
[I 2024-03-20 05:44:41,815] Trial 7 finished with value: 0.9315851751737017 and parameters: {'iterations': 2, 'max_depth': 7, 'learning_rate': 0.0445638344522749, 'reg_lambda': 0.10190728623873888, 'subsample': 0.1298774634327195, 'colsample_bylevel': 0.13099537307455417}. Best is trial 2 with value: 0.9853831270107263.


0:	learn: 0.6498472	total: 66.7ms	remaining: 66.7ms
1:	learn: 0.6103979	total: 161ms	remaining: 0us
0:	learn: 0.6116628	total: 256ms	remaining: 1.28s
1:	learn: 0.5430921	total: 502ms	remaining: 1s
2:	learn: 0.4844759	total: 761ms	remaining: 761ms
3:	learn: 0.4338910	total: 1.02s	remaining: 512ms
4:	learn: 0.3898120	total: 1.26s	remaining: 253ms


[I 2024-03-20 05:44:43,780] Trial 8 finished with value: 0.9851230347941333 and parameters: {'iterations': 6, 'max_depth': 5, 'learning_rate': 0.08530005460037629, 'reg_lambda': 3.713583045521075e-05, 'subsample': 0.567302353714491, 'colsample_bylevel': 0.9200198524098088}. Best is trial 2 with value: 0.9853831270107263.


5:	learn: 0.3511255	total: 1.52s	remaining: 0us
0:	learn: 0.6093429	total: 200ms	remaining: 599ms
1:	learn: 0.5387312	total: 320ms	remaining: 320ms
2:	learn: 0.4786578	total: 434ms	remaining: 145ms
3:	learn: 0.4274963	total: 609ms	remaining: 0us


[I 2024-03-20 05:44:44,830] Trial 9 finished with value: 0.9776995782556218 and parameters: {'iterations': 4, 'max_depth': 8, 'learning_rate': 0.08855510651228041, 'reg_lambda': 1.3156789098695641e-08, 'subsample': 0.710041121968358, 'colsample_bylevel': 0.1725116300386258}. Best is trial 2 with value: 0.9853831270107263.


0:	learn: 0.6657393	total: 135ms	remaining: 675ms
1:	learn: 0.6398530	total: 278ms	remaining: 557ms
2:	learn: 0.6153290	total: 419ms	remaining: 419ms
3:	learn: 0.5920792	total: 561ms	remaining: 281ms
4:	learn: 0.5700025	total: 683ms	remaining: 137ms
5:	learn: 0.5490062	total: 797ms	remaining: 0us
 - fold2 - 0.9350


[I 2024-03-20 05:45:04,600] A new study created in memory with name: no-name-32a2db51-9d47-4f5d-b286-adf4920ecb3a


0:	learn: 0.6403087	total: 344ms	remaining: 1.72s
1:	learn: 0.5929686	total: 601ms	remaining: 1.2s
2:	learn: 0.5502859	total: 856ms	remaining: 856ms
3:	learn: 0.5116053	total: 1.12s	remaining: 559ms
4:	learn: 0.4764352	total: 1.35s	remaining: 270ms


[I 2024-03-20 05:45:06,723] Trial 0 finished with value: 0.9792277794137403 and parameters: {'iterations': 6, 'max_depth': 4, 'learning_rate': 0.05445829780885526, 'reg_lambda': 0.005715217874202884, 'subsample': 0.8372076985033007, 'colsample_bylevel': 0.7447951535153253}. Best is trial 0 with value: 0.9792277794137403.


5:	learn: 0.4442895	total: 1.59s	remaining: 0us
0:	learn: 0.6830129	total: 287ms	remaining: 1.44s
1:	learn: 0.6730863	total: 545ms	remaining: 1.09s
2:	learn: 0.6633675	total: 811ms	remaining: 811ms
3:	learn: 0.6538366	total: 1.09s	remaining: 548ms
4:	learn: 0.6444956	total: 1.35s	remaining: 271ms


[I 2024-03-20 05:45:08,878] Trial 1 finished with value: 0.9856910569423653 and parameters: {'iterations': 6, 'max_depth': 5, 'learning_rate': 0.010200904939034744, 'reg_lambda': 0.025127896963112545, 'subsample': 0.5797036126898163, 'colsample_bylevel': 0.9543004136684674}. Best is trial 1 with value: 0.9856910569423653.


5:	learn: 0.6353377	total: 1.61s	remaining: 0us
0:	learn: 0.6286313	total: 199ms	remaining: 795ms
1:	learn: 0.5719404	total: 383ms	remaining: 574ms
2:	learn: 0.5219870	total: 470ms	remaining: 313ms
3:	learn: 0.4776764	total: 691ms	remaining: 173ms
4:	learn: 0.4384977	total: 860ms	remaining: 0us


[I 2024-03-20 05:45:10,192] Trial 2 finished with value: 0.9651232910582827 and parameters: {'iterations': 5, 'max_depth': 8, 'learning_rate': 0.06730265645124066, 'reg_lambda': 8.414171353843706e-06, 'subsample': 0.6019200498174362, 'colsample_bylevel': 0.33785510075184016}. Best is trial 1 with value: 0.9856910569423653.


0:	learn: 0.6174585	total: 181ms	remaining: 725ms
1:	learn: 0.5532886	total: 351ms	remaining: 527ms
2:	learn: 0.4978347	total: 499ms	remaining: 333ms
3:	learn: 0.4493097	total: 666ms	remaining: 167ms


[I 2024-03-20 05:45:11,483] Trial 3 finished with value: 0.9631191310757464 and parameters: {'iterations': 5, 'max_depth': 5, 'learning_rate': 0.07905019604558876, 'reg_lambda': 0.008693497449239614, 'subsample': 0.5899024842624929, 'colsample_bylevel': 0.21599528500011153}. Best is trial 1 with value: 0.9856910569423653.


4:	learn: 0.4067208	total: 836ms	remaining: 0us
0:	learn: 0.6186058	total: 172ms	remaining: 344ms
1:	learn: 0.5543654	total: 320ms	remaining: 160ms


[I 2024-03-20 05:45:12,419] Trial 4 finished with value: 0.9617431387168421 and parameters: {'iterations': 3, 'max_depth': 6, 'learning_rate': 0.07838984620707548, 'reg_lambda': 0.22126203349480064, 'subsample': 0.3788865432930342, 'colsample_bylevel': 0.35172838668254525}. Best is trial 1 with value: 0.9856910569423653.


2:	learn: 0.4989205	total: 496ms	remaining: 0us
0:	learn: 0.6331028	total: 170ms	remaining: 848ms
1:	learn: 0.5801341	total: 339ms	remaining: 678ms
2:	learn: 0.5330211	total: 510ms	remaining: 510ms
3:	learn: 0.4908559	total: 678ms	remaining: 339ms
4:	learn: 0.4529291	total: 844ms	remaining: 169ms
5:	learn: 0.4186450	total: 1.01s	remaining: 0us


[I 2024-03-20 05:45:13,884] Trial 5 finished with value: 0.9750729984459832 and parameters: {'iterations': 6, 'max_depth': 4, 'learning_rate': 0.0621372670103547, 'reg_lambda': 0.002380790078227978, 'subsample': 0.4422089671274576, 'colsample_bylevel': 0.7562984484778843}. Best is trial 1 with value: 0.9856910569423653.


0:	learn: 0.6432977	total: 260ms	remaining: 1.3s
1:	learn: 0.5983582	total: 491ms	remaining: 983ms
2:	learn: 0.5576293	total: 740ms	remaining: 740ms
3:	learn: 0.5205456	total: 991ms	remaining: 495ms
4:	learn: 0.4866440	total: 1.23s	remaining: 247ms


[I 2024-03-20 05:45:15,817] Trial 6 finished with value: 0.9695525669390089 and parameters: {'iterations': 6, 'max_depth': 4, 'learning_rate': 0.05126929795827078, 'reg_lambda': 0.12080708778150692, 'subsample': 0.7195631890460319, 'colsample_bylevel': 0.9606559902082852}. Best is trial 1 with value: 0.9856910569423653.


5:	learn: 0.4555142	total: 1.48s	remaining: 0us
0:	learn: 0.6127142	total: 293ms	remaining: 880ms
1:	learn: 0.5448311	total: 523ms	remaining: 523ms
2:	learn: 0.4867304	total: 664ms	remaining: 221ms


[I 2024-03-20 05:45:17,165] Trial 7 finished with value: 0.9728190979150123 and parameters: {'iterations': 4, 'max_depth': 6, 'learning_rate': 0.084222514263553, 'reg_lambda': 15.30132054164793, 'subsample': 0.8334242201606875, 'colsample_bylevel': 0.3733878163608369}. Best is trial 1 with value: 0.9856910569423653.


3:	learn: 0.4367789	total: 899ms	remaining: 0us
0:	learn: 0.6512619	total: 288ms	remaining: 575ms
1:	learn: 0.6128570	total: 527ms	remaining: 263ms


[I 2024-03-20 05:45:18,367] Trial 8 finished with value: 0.9551859016273063 and parameters: {'iterations': 3, 'max_depth': 7, 'learning_rate': 0.04291927044609393, 'reg_lambda': 0.20497821406885297, 'subsample': 0.9328061313918085, 'colsample_bylevel': 0.4468855179598754}. Best is trial 1 with value: 0.9856910569423653.


2:	learn: 0.5775140	total: 735ms	remaining: 0us
0:	learn: 0.6128113	total: 70.7ms	remaining: 141ms
1:	learn: 0.5450224	total: 142ms	remaining: 70.8ms
2:	learn: 0.4869520	total: 238ms	remaining: 0us


[I 2024-03-20 05:45:19,050] Trial 9 finished with value: 0.9610145850528975 and parameters: {'iterations': 3, 'max_depth': 7, 'learning_rate': 0.08405744631710388, 'reg_lambda': 1.4295009450673474e-05, 'subsample': 0.22449577158390902, 'colsample_bylevel': 0.9975846514317621}. Best is trial 1 with value: 0.9856910569423653.


0:	learn: 0.6830209	total: 144ms	remaining: 722ms
1:	learn: 0.6731003	total: 251ms	remaining: 503ms
2:	learn: 0.6633788	total: 363ms	remaining: 363ms
3:	learn: 0.6538559	total: 467ms	remaining: 234ms
4:	learn: 0.6445164	total: 565ms	remaining: 113ms
5:	learn: 0.6353665	total: 677ms	remaining: 0us
 - fold3 - 0.9266


[I 2024-03-20 05:45:38,081] A new study created in memory with name: no-name-a0a0b768-ca05-4334-926f-9a6d0f759617


0:	learn: 0.6290857	total: 281ms	remaining: 281ms


[I 2024-03-20 05:45:39,186] Trial 0 finished with value: 0.947623778759191 and parameters: {'iterations': 2, 'max_depth': 6, 'learning_rate': 0.06647758286241855, 'reg_lambda': 0.0014278988900304592, 'subsample': 0.6779683809406977, 'colsample_bylevel': 0.4191730235204868}. Best is trial 0 with value: 0.947623778759191.


1:	learn: 0.5730683	total: 529ms	remaining: 0us
0:	learn: 0.6482506	total: 255ms	remaining: 510ms
1:	learn: 0.6073719	total: 432ms	remaining: 216ms


[I 2024-03-20 05:45:40,237] Trial 1 finished with value: 0.9834728113763217 and parameters: {'iterations': 3, 'max_depth': 6, 'learning_rate': 0.046040265171620694, 'reg_lambda': 0.00017919188933078554, 'subsample': 0.4608320294159271, 'colsample_bylevel': 0.5842394981111572}. Best is trial 1 with value: 0.9834728113763217.


2:	learn: 0.5699717	total: 596ms	remaining: 0us


[I 2024-03-20 05:45:40,822] Trial 2 finished with value: 0.9672146113400819 and parameters: {'iterations': 2, 'max_depth': 7, 'learning_rate': 0.0580557920030238, 'reg_lambda': 0.0007438537009157415, 'subsample': 0.17171910097392654, 'colsample_bylevel': 0.8836281723959388}. Best is trial 1 with value: 0.9834728113763217.


0:	learn: 0.6368760	total: 66.4ms	remaining: 66.4ms
1:	learn: 0.5868448	total: 145ms	remaining: 0us
0:	learn: 0.6729167	total: 189ms	remaining: 189ms
1:	learn: 0.6536025	total: 331ms	remaining: 0us


[I 2024-03-20 05:45:41,595] Trial 3 finished with value: 0.9188181728350113 and parameters: {'iterations': 2, 'max_depth': 6, 'learning_rate': 0.020536805765322796, 'reg_lambda': 0.00019868196655762322, 'subsample': 0.617058879144735, 'colsample_bylevel': 0.24040722077690505}. Best is trial 1 with value: 0.9834728113763217.


0:	learn: 0.6429481	total: 118ms	remaining: 355ms
1:	learn: 0.5977200	total: 193ms	remaining: 193ms
2:	learn: 0.5570435	total: 264ms	remaining: 88.1ms


[I 2024-03-20 05:45:42,363] Trial 4 finished with value: 0.9854021311926456 and parameters: {'iterations': 4, 'max_depth': 8, 'learning_rate': 0.05164344977936916, 'reg_lambda': 0.0007223298780745926, 'subsample': 0.1327230386529416, 'colsample_bylevel': 0.6016683685671178}. Best is trial 4 with value: 0.9854021311926456.


3:	learn: 0.5197262	total: 333ms	remaining: 0us
0:	learn: 0.6174431	total: 138ms	remaining: 138ms
1:	learn: 0.5528471	total: 289ms	remaining: 0us


[I 2024-03-20 05:45:43,086] Trial 5 finished with value: 0.9444118309780946 and parameters: {'iterations': 2, 'max_depth': 8, 'learning_rate': 0.0791223188273213, 'reg_lambda': 1.0012415840074915, 'subsample': 0.3674073691896671, 'colsample_bylevel': 0.4453657856286598}. Best is trial 4 with value: 0.9854021311926456.
[I 2024-03-20 05:45:43,575] Trial 6 finished with value: 0.5 and parameters: {'iterations': 2, 'max_depth': 5, 'learning_rate': 0.08111302419463796, 'reg_lambda': 3.610487231431475e-05, 'subsample': 0.5697141323598091, 'colsample_bylevel': 0.10173264098339263}. Best is trial 4 with value: 0.9854021311926456.


0:	learn: 0.6163679	total: 44.3ms	remaining: 44.3ms
1:	learn: 0.5511757	total: 77.1ms	remaining: 0us
0:	learn: 0.6097464	total: 98.4ms	remaining: 197ms
1:	learn: 0.5398563	total: 228ms	remaining: 114ms


[I 2024-03-20 05:45:44,359] Trial 7 finished with value: 0.9704298267861873 and parameters: {'iterations': 3, 'max_depth': 6, 'learning_rate': 0.0874528984028572, 'reg_lambda': 0.5012363758588892, 'subsample': 0.17371303238184152, 'colsample_bylevel': 0.4797961094749963}. Best is trial 4 with value: 0.9854021311926456.


2:	learn: 0.4802941	total: 346ms	remaining: 0us
0:	learn: 0.6235267	total: 170ms	remaining: 340ms
1:	learn: 0.5633506	total: 322ms	remaining: 161ms


[I 2024-03-20 05:45:45,246] Trial 8 finished with value: 0.9725935303874975 and parameters: {'iterations': 3, 'max_depth': 7, 'learning_rate': 0.07243440457626432, 'reg_lambda': 0.27834520383507116, 'subsample': 0.36019396475739085, 'colsample_bylevel': 0.7436485927869313}. Best is trial 4 with value: 0.9854021311926456.


2:	learn: 0.5107630	total: 449ms	remaining: 0us
0:	learn: 0.6252652	total: 161ms	remaining: 161ms
1:	learn: 0.5663900	total: 312ms	remaining: 0us


[I 2024-03-20 05:45:45,982] Trial 9 finished with value: 0.9553111821025149 and parameters: {'iterations': 2, 'max_depth': 4, 'learning_rate': 0.07056713865543736, 'reg_lambda': 59.944266389885335, 'subsample': 0.3402461890056357, 'colsample_bylevel': 0.9879008788084138}. Best is trial 4 with value: 0.9854021311926456.


0:	learn: 0.6429699	total: 90.5ms	remaining: 272ms
1:	learn: 0.5977461	total: 176ms	remaining: 176ms
2:	learn: 0.5567652	total: 262ms	remaining: 87.3ms
3:	learn: 0.5194768	total: 351ms	remaining: 0us
 - fold4 - 0.9240


[I 2024-03-20 05:46:05,234] A new study created in memory with name: no-name-f4bd6bee-b9dc-44b1-ae67-0de52b6a7513


0:	learn: 0.6031583	total: 253ms	remaining: 253ms
1:	learn: 0.5290464	total: 380ms	remaining: 0us


[I 2024-03-20 05:46:06,115] Trial 0 finished with value: 0.9578116448246964 and parameters: {'iterations': 2, 'max_depth': 8, 'learning_rate': 0.09471478240423838, 'reg_lambda': 0.40927168147129617, 'subsample': 0.67922156936905, 'colsample_bylevel': 0.4883137393348616}. Best is trial 0 with value: 0.9578116448246964.


0:	learn: 0.6284744	total: 125ms	remaining: 501ms
1:	learn: 0.5717931	total: 342ms	remaining: 513ms
2:	learn: 0.5221186	total: 433ms	remaining: 289ms
3:	learn: 0.4783094	total: 476ms	remaining: 119ms


[I 2024-03-20 05:46:07,143] Trial 1 finished with value: 0.9452070233099462 and parameters: {'iterations': 5, 'max_depth': 8, 'learning_rate': 0.06735551661499606, 'reg_lambda': 1.05399233029862e-08, 'subsample': 0.59594267949969, 'colsample_bylevel': 0.13388612579213088}. Best is trial 0 with value: 0.9578116448246964.


4:	learn: 0.4392042	total: 556ms	remaining: 0us
0:	learn: 0.5995837	total: 97.3ms	remaining: 487ms
1:	learn: 0.5229212	total: 188ms	remaining: 376ms
2:	learn: 0.4588707	total: 385ms	remaining: 385ms
3:	learn: 0.4043706	total: 521ms	remaining: 261ms
4:	learn: 0.3577485	total: 605ms	remaining: 121ms
5:	learn: 0.3175453	total: 725ms	remaining: 0us


[I 2024-03-20 05:46:08,339] Trial 2 finished with value: 0.9560176372585723 and parameters: {'iterations': 6, 'max_depth': 8, 'learning_rate': 0.09903133387464738, 'reg_lambda': 2.6098244349522423e-08, 'subsample': 0.4908384927569587, 'colsample_bylevel': 0.5499236333480244}. Best is trial 0 with value: 0.9578116448246964.


0:	learn: 0.6228729	total: 181ms	remaining: 542ms
1:	learn: 0.5624363	total: 274ms	remaining: 274ms
2:	learn: 0.5094617	total: 363ms	remaining: 121ms


[I 2024-03-20 05:46:09,272] Trial 3 finished with value: 0.9512386948040132 and parameters: {'iterations': 4, 'max_depth': 8, 'learning_rate': 0.0731562081758012, 'reg_lambda': 1.0107799523891255e-06, 'subsample': 0.422327468713652, 'colsample_bylevel': 0.7169960792908876}. Best is trial 0 with value: 0.9578116448246964.


3:	learn: 0.4628820	total: 465ms	remaining: 0us
0:	learn: 0.6150426	total: 229ms	remaining: 1.14s
1:	learn: 0.5487766	total: 461ms	remaining: 923ms
2:	learn: 0.4918101	total: 672ms	remaining: 672ms
3:	learn: 0.4423740	total: 878ms	remaining: 439ms
4:	learn: 0.3990724	total: 1.09s	remaining: 219ms


[I 2024-03-20 05:46:11,035] Trial 4 finished with value: 0.9781145167385561 and parameters: {'iterations': 6, 'max_depth': 4, 'learning_rate': 0.0816340043744661, 'reg_lambda': 0.07188679187717663, 'subsample': 0.7720326815149601, 'colsample_bylevel': 0.5954514531860144}. Best is trial 4 with value: 0.9781145167385561.


5:	learn: 0.3609608	total: 1.31s	remaining: 0us
0:	learn: 0.6083181	total: 117ms	remaining: 467ms
1:	learn: 0.5373891	total: 228ms	remaining: 342ms
2:	learn: 0.4772782	total: 343ms	remaining: 229ms
3:	learn: 0.4255370	total: 457ms	remaining: 114ms


[I 2024-03-20 05:46:12,083] Trial 5 finished with value: 0.9578274008509403 and parameters: {'iterations': 5, 'max_depth': 8, 'learning_rate': 0.08898139618938766, 'reg_lambda': 0.026928698095439886, 'subsample': 0.6279434967586403, 'colsample_bylevel': 0.9547846049568111}. Best is trial 4 with value: 0.9781145167385561.


4:	learn: 0.3806967	total: 616ms	remaining: 0us
0:	learn: 0.6023091	total: 225ms	remaining: 899ms
1:	learn: 0.5276603	total: 325ms	remaining: 487ms
2:	learn: 0.4643136	total: 550ms	remaining: 366ms
3:	learn: 0.4111920	total: 641ms	remaining: 160ms


[I 2024-03-20 05:46:13,345] Trial 6 finished with value: 0.9554143024286368 and parameters: {'iterations': 5, 'max_depth': 5, 'learning_rate': 0.09660122713867356, 'reg_lambda': 0.002665017732078746, 'subsample': 0.9848500185617645, 'colsample_bylevel': 0.14260801198097217}. Best is trial 4 with value: 0.9781145167385561.


4:	learn: 0.3656332	total: 818ms	remaining: 0us
0:	learn: 0.6205990	total: 99.7ms	remaining: 299ms
1:	learn: 0.5583155	total: 222ms	remaining: 222ms
2:	learn: 0.5042111	total: 304ms	remaining: 101ms
3:	learn: 0.4568106	total: 386ms	remaining: 0us


[I 2024-03-20 05:46:14,197] Trial 7 finished with value: 0.9625130503613339 and parameters: {'iterations': 4, 'max_depth': 6, 'learning_rate': 0.07556295120616312, 'reg_lambda': 1.5110248924137426e-05, 'subsample': 0.15685771304089383, 'colsample_bylevel': 0.7906938712726943}. Best is trial 4 with value: 0.9781145167385561.


0:	learn: 0.6496362	total: 159ms	remaining: 477ms
1:	learn: 0.6098922	total: 317ms	remaining: 317ms
2:	learn: 0.5733836	total: 469ms	remaining: 156ms
3:	learn: 0.5397599	total: 629ms	remaining: 0us


[I 2024-03-20 05:46:15,258] Trial 8 finished with value: 0.9840798459672528 and parameters: {'iterations': 4, 'max_depth': 4, 'learning_rate': 0.04462557089973648, 'reg_lambda': 2.4018678641558386, 'subsample': 0.6625401258635375, 'colsample_bylevel': 0.31805089467726344}. Best is trial 8 with value: 0.9840798459672528.


0:	learn: 0.6460733	total: 142ms	remaining: 283ms
1:	learn: 0.6033387	total: 232ms	remaining: 116ms


[I 2024-03-20 05:46:16,125] Trial 9 finished with value: 0.9569003367178752 and parameters: {'iterations': 3, 'max_depth': 8, 'learning_rate': 0.04843779190463181, 'reg_lambda': 2.7239091253696996e-07, 'subsample': 0.4085133143667451, 'colsample_bylevel': 0.4247063307396602}. Best is trial 8 with value: 0.9840798459672528.


2:	learn: 0.5643861	total: 432ms	remaining: 0us
0:	learn: 0.6496495	total: 107ms	remaining: 320ms
1:	learn: 0.6103103	total: 214ms	remaining: 214ms
2:	learn: 0.5742214	total: 316ms	remaining: 105ms
3:	learn: 0.5405578	total: 417ms	remaining: 0us
 - fold5 - 0.5000
FINISH: CV Score: 0.8417


In [19]:
def train_xgb(X, y, cv, model_path=[], params={}, verbose=100):

    models = []
    n_records = len(X)
    oof_pred = np.zeros((n_records), dtype=np.float32)

    def objective(trial):
        xgb_params = {
            "objective": "binary:logistic",
            "eval_metric": "logloss",
            "booster": "gbtree",
            # "verbosity": 0,
            "n_jobs": -1,
            "seed": 42,
            "max_depth": trial.suggest_int('max_depth', 4, 8),
            "min_child_weight": trial.suggest_float("min_child_weight", 1e-5, 1e2, log=True),
            "subsample": trial.suggest_float("subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
            "learning_rate": trial.suggest_float("learning_rate", 0.001, 0.1),
            "gamma": trial.suggest_float("gamma", 0, 1.0),
            "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
            "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
        }

        dtrain = xgb.DMatrix(X_train, label=y_train)
        dvalid = xgb.DMatrix(X_valid, label=y_valid)
        evals = [(dtrain, 'train'), (dvalid, 'eval')]

        model = xgb.train(xgb_params, dtrain, num_boost_round=1000,
                          early_stopping_rounds=50, evals=evals, verbose_eval=verbose)

        pred_y = model.predict(dvalid, ntree_limit=model.best_ntree_limit)
        auc = roc_auc_score(y_valid, pred_y)

        return auc

    for i, (idx_train, idx_valid) in enumerate(cv):
        X_train, y_train = X[idx_train], y[idx_train]
        X_valid, y_valid = X[idx_valid], y[idx_valid]

        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=10)

        best_params = study.best_params
        xgb_params = {
            "objective": "binary:logistic",
            "eval_metric": "logloss",
            "booster": "gbtree",
            "verbosity": 0,
            "n_jobs": -1,
            "seed": 42,
            **best_params,
        }

        dtrain = xgb.DMatrix(X_train, label=y_train)
        dvalid = xgb.DMatrix(X_valid, label=y_valid)
        evals = [(dtrain, 'train'), (dvalid, 'eval')]

        model = xgb.train(xgb_params, dtrain, num_boost_round=1000,
                          early_stopping_rounds=50, evals=evals, verbose_eval=verbose)

        pred_i = model.predict(dvalid, ntree_limit=model.best_ntree_limit)
        oof_pred[idx_valid] = pred_i
        models.append(model)
        score = roc_auc_score(y_valid, pred_i)
        print(f" - fold{i + 1} - {score:.4f}")

    score = roc_auc_score(y, oof_pred)

    print("=" * 50)
    print(f"FINISH: CV Score: {score:.4f}")
    return score, oof_pred, models

In [None]:
xgb_score, xgb_oof_pred, xgb_models = train_catboost(X, y=y, cv=cv_list)

[I 2024-03-20 05:46:36,058] A new study created in memory with name: no-name-04b2299f-f668-4dfe-9d1f-6e0a466e59a3


0:	learn: 0.6267388	total: 266ms	remaining: 266ms
1:	learn: 0.5689424	total: 453ms	remaining: 0us


[I 2024-03-20 05:46:37,128] Trial 0 finished with value: 0.952396034815814 and parameters: {'iterations': 2, 'max_depth': 8, 'learning_rate': 0.0689476623087299, 'reg_lambda': 4.4583556630536574e-07, 'subsample': 0.7826883563194972, 'colsample_bylevel': 0.5831409400263383}. Best is trial 0 with value: 0.952396034815814.


0:	learn: 0.6631919	total: 109ms	remaining: 544ms
1:	learn: 0.6347265	total: 192ms	remaining: 384ms
2:	learn: 0.6079236	total: 363ms	remaining: 363ms
3:	learn: 0.5826613	total: 436ms	remaining: 218ms
4:	learn: 0.5589862	total: 577ms	remaining: 115ms
5:	learn: 0.5364986	total: 727ms	remaining: 0us


[I 2024-03-20 05:46:38,424] Trial 1 finished with value: 0.9716386532688309 and parameters: {'iterations': 6, 'max_depth': 5, 'learning_rate': 0.030817002919564015, 'reg_lambda': 5.812843005717101e-05, 'subsample': 0.6480239388396605, 'colsample_bylevel': 0.18634694566298424}. Best is trial 1 with value: 0.9716386532688309.


0:	learn: 0.6420072	total: 53.3ms	remaining: 53.3ms
1:	learn: 0.5955440	total: 200ms	remaining: 0us


[I 2024-03-20 05:46:39,101] Trial 2 finished with value: 0.9257283051301544 and parameters: {'iterations': 2, 'max_depth': 5, 'learning_rate': 0.05323728530767126, 'reg_lambda': 3.078288569502599e-06, 'subsample': 0.49933455182917796, 'colsample_bylevel': 0.22757017263779727}. Best is trial 1 with value: 0.9716386532688309.
[I 2024-03-20 05:46:39,782] Trial 3 finished with value: 0.9426788345863499 and parameters: {'iterations': 2, 'max_depth': 8, 'learning_rate': 0.030099842055506826, 'reg_lambda': 2.0543759162520258e-07, 'subsample': 0.46555516743032355, 'colsample_bylevel': 0.6365889692783574}. Best is trial 1 with value: 0.9716386532688309.


0:	learn: 0.6635565	total: 145ms	remaining: 145ms
1:	learn: 0.6357369	total: 233ms	remaining: 0us
0:	learn: 0.6900181	total: 110ms	remaining: 110ms
1:	learn: 0.6869122	total: 284ms	remaining: 0us


[I 2024-03-20 05:46:40,491] Trial 4 finished with value: 0.9587740810881158 and parameters: {'iterations': 2, 'max_depth': 7, 'learning_rate': 0.0031366763407918215, 'reg_lambda': 9.413704791707366, 'subsample': 0.5414526203587652, 'colsample_bylevel': 0.9453803154259177}. Best is trial 1 with value: 0.9716386532688309.


0:	learn: 0.6296859	total: 44.2ms	remaining: 44.2ms
1:	learn: 0.5735226	total: 196ms	remaining: 0us


[I 2024-03-20 05:46:41,119] Trial 5 finished with value: 0.9190553194182906 and parameters: {'iterations': 2, 'max_depth': 7, 'learning_rate': 0.06653019277670111, 'reg_lambda': 0.10090434545256362, 'subsample': 0.5463237552611666, 'colsample_bylevel': 0.3860371935736353}. Best is trial 1 with value: 0.9716386532688309.


0:	learn: 0.6902325	total: 185ms	remaining: 370ms
1:	learn: 0.6873374	total: 313ms	remaining: 156ms


[I 2024-03-20 05:46:42,171] Trial 6 finished with value: 0.9799677876027869 and parameters: {'iterations': 3, 'max_depth': 7, 'learning_rate': 0.0029217710121678827, 'reg_lambda': 37.272183086425066, 'subsample': 0.8644158588246991, 'colsample_bylevel': 0.7672072659452166}. Best is trial 6 with value: 0.9799677876027869.


2:	learn: 0.6844601	total: 629ms	remaining: 0us
0:	learn: 0.6898217	total: 259ms	remaining: 1.29s
1:	learn: 0.6865198	total: 472ms	remaining: 944ms
2:	learn: 0.6832416	total: 712ms	remaining: 712ms
3:	learn: 0.6799843	total: 932ms	remaining: 466ms
4:	learn: 0.6767482	total: 1.15s	remaining: 230ms


[I 2024-03-20 05:46:43,985] Trial 7 finished with value: 0.9859585378810586 and parameters: {'iterations': 6, 'max_depth': 4, 'learning_rate': 0.0033347430411297947, 'reg_lambda': 0.018125888080842784, 'subsample': 0.6965499760081317, 'colsample_bylevel': 0.7387151789401631}. Best is trial 7 with value: 0.9859585378810586.


5:	learn: 0.6735351	total: 1.36s	remaining: 0us
0:	learn: 0.6898113	total: 178ms	remaining: 712ms
1:	learn: 0.6864965	total: 327ms	remaining: 490ms
2:	learn: 0.6832109	total: 535ms	remaining: 357ms
3:	learn: 0.6799444	total: 801ms	remaining: 200ms
4:	learn: 0.6766966	total: 936ms	remaining: 0us


[I 2024-03-20 05:46:45,383] Trial 8 finished with value: 0.9808814771175518 and parameters: {'iterations': 5, 'max_depth': 7, 'learning_rate': 0.003344693039121857, 'reg_lambda': 0.00028282981879593596, 'subsample': 0.775942054779506, 'colsample_bylevel': 0.9619363437960172}. Best is trial 7 with value: 0.9859585378810586.
[I 2024-03-20 05:46:45,895] Trial 9 finished with value: 0.966357770072902 and parameters: {'iterations': 2, 'max_depth': 4, 'learning_rate': 0.049308373660589655, 'reg_lambda': 24.906447627088234, 'subsample': 0.6177856434715019, 'colsample_bylevel': 0.12516681972916935}. Best is trial 7 with value: 0.9859585378810586.


0:	learn: 0.6456863	total: 46.2ms	remaining: 46.2ms
1:	learn: 0.6022450	total: 106ms	remaining: 0us
0:	learn: 0.6898231	total: 112ms	remaining: 558ms
1:	learn: 0.6865243	total: 221ms	remaining: 441ms
2:	learn: 0.6832463	total: 333ms	remaining: 333ms
3:	learn: 0.6799919	total: 453ms	remaining: 226ms
4:	learn: 0.6767584	total: 564ms	remaining: 113ms
5:	learn: 0.6735456	total: 686ms	remaining: 0us
 - fold1 - 0.9282


[I 2024-03-20 05:47:06,045] A new study created in memory with name: no-name-7047184c-8b4d-4cbc-b8e6-6f4aa55c65fb
[I 2024-03-20 05:47:06,887] Trial 0 finished with value: 0.9685595132262476 and parameters: {'iterations': 2, 'max_depth': 7, 'learning_rate': 0.037623697002957555, 'reg_lambda': 0.012240076668969615, 'subsample': 0.15208935574669702, 'colsample_bylevel': 0.8418673511597535}. Best is trial 0 with value: 0.9685595132262476.


0:	learn: 0.6562963	total: 97ms	remaining: 97ms
1:	learn: 0.6221573	total: 161ms	remaining: 0us
0:	learn: 0.6118329	total: 344ms	remaining: 1.72s
1:	learn: 0.5433380	total: 607ms	remaining: 1.21s
2:	learn: 0.4848225	total: 857ms	remaining: 857ms
3:	learn: 0.4342600	total: 1.11s	remaining: 557ms
4:	learn: 0.3901966	total: 1.38s	remaining: 275ms


[I 2024-03-20 05:47:09,017] Trial 1 finished with value: 0.9874055639809364 and parameters: {'iterations': 6, 'max_depth': 5, 'learning_rate': 0.0851142039186763, 'reg_lambda': 0.08476332007272144, 'subsample': 0.5727355677538953, 'colsample_bylevel': 0.9038272029124303}. Best is trial 1 with value: 0.9874055639809364.


5:	learn: 0.3515659	total: 1.64s	remaining: 0us


[I 2024-03-20 05:47:09,725] Trial 2 finished with value: 0.9616891489859584 and parameters: {'iterations': 2, 'max_depth': 5, 'learning_rate': 0.09660378148511475, 'reg_lambda': 1.654180017593957e-06, 'subsample': 0.3870425525736755, 'colsample_bylevel': 0.35509613749329777}. Best is trial 1 with value: 0.9874055639809364.


0:	learn: 0.6015960	total: 171ms	remaining: 171ms
1:	learn: 0.5260715	total: 245ms	remaining: 0us
0:	learn: 0.6738400	total: 325ms	remaining: 651ms
1:	learn: 0.6552927	total: 595ms	remaining: 298ms


[I 2024-03-20 05:47:11,062] Trial 3 finished with value: 0.9715377285657573 and parameters: {'iterations': 3, 'max_depth': 5, 'learning_rate': 0.01953035757748089, 'reg_lambda': 4.608014088139528e-06, 'subsample': 0.7880711218718826, 'colsample_bylevel': 0.6878672741218052}. Best is trial 1 with value: 0.9874055639809364.


2:	learn: 0.6374564	total: 883ms	remaining: 0us
0:	learn: 0.6732557	total: 192ms	remaining: 767ms
1:	learn: 0.6542209	total: 396ms	remaining: 594ms
2:	learn: 0.6358701	total: 594ms	remaining: 396ms
3:	learn: 0.6182293	total: 806ms	remaining: 202ms


[I 2024-03-20 05:47:12,566] Trial 4 finished with value: 0.9759649062743418 and parameters: {'iterations': 5, 'max_depth': 5, 'learning_rate': 0.020130944006746473, 'reg_lambda': 0.0007807057417392377, 'subsample': 0.46163892320486405, 'colsample_bylevel': 0.5965319452828525}. Best is trial 1 with value: 0.9874055639809364.


4:	learn: 0.6012481	total: 1.01s	remaining: 0us
0:	learn: 0.6535771	total: 170ms	remaining: 509ms
1:	learn: 0.6172807	total: 211ms	remaining: 211ms
2:	learn: 0.5834697	total: 376ms	remaining: 125ms
3:	learn: 0.5523861	total: 471ms	remaining: 0us


[I 2024-03-20 05:47:13,493] Trial 5 finished with value: 0.9270364294825126 and parameters: {'iterations': 4, 'max_depth': 7, 'learning_rate': 0.04077548892153065, 'reg_lambda': 0.00038112888699675214, 'subsample': 0.5927773062842323, 'colsample_bylevel': 0.13486666904757513}. Best is trial 1 with value: 0.9874055639809364.


0:	learn: 0.6137606	total: 187ms	remaining: 560ms
1:	learn: 0.5466360	total: 373ms	remaining: 373ms
2:	learn: 0.4890570	total: 544ms	remaining: 181ms
3:	learn: 0.4391244	total: 710ms	remaining: 0us


[I 2024-03-20 05:47:14,650] Trial 6 finished with value: 0.9878388966444724 and parameters: {'iterations': 4, 'max_depth': 5, 'learning_rate': 0.08301031049786371, 'reg_lambda': 8.552146500548944e-07, 'subsample': 0.26237695041724, 'colsample_bylevel': 0.9004778503444115}. Best is trial 6 with value: 0.9878388966444724.


0:	learn: 0.6665296	total: 128ms	remaining: 128ms
1:	learn: 0.6413300	total: 253ms	remaining: 0us


[I 2024-03-20 05:47:15,348] Trial 7 finished with value: 0.9628545687331294 and parameters: {'iterations': 2, 'max_depth': 4, 'learning_rate': 0.02702937159482659, 'reg_lambda': 0.03296168299978079, 'subsample': 0.2042209960581529, 'colsample_bylevel': 0.942693480298621}. Best is trial 6 with value: 0.9878388966444724.


0:	learn: 0.6864629	total: 117ms	remaining: 117ms
1:	learn: 0.6798541	total: 246ms	remaining: 0us


[I 2024-03-20 05:47:16,033] Trial 8 finished with value: 0.9674192945768453 and parameters: {'iterations': 2, 'max_depth': 4, 'learning_rate': 0.006736564259720137, 'reg_lambda': 7.429914605132722e-05, 'subsample': 0.41013888867305015, 'colsample_bylevel': 0.4288935703036621}. Best is trial 6 with value: 0.9878388966444724.


0:	learn: 0.6290041	total: 281ms	remaining: 1.13s
1:	learn: 0.5729262	total: 604ms	remaining: 906ms
2:	learn: 0.5234201	total: 736ms	remaining: 491ms
3:	learn: 0.4794275	total: 1s	remaining: 250ms


[I 2024-03-20 05:47:17,793] Trial 9 finished with value: 0.9826336536445408 and parameters: {'iterations': 5, 'max_depth': 7, 'learning_rate': 0.06650251942498385, 'reg_lambda': 10.2658386200019, 'subsample': 0.9949091457353634, 'colsample_bylevel': 0.6860011551808817}. Best is trial 6 with value: 0.9878388966444724.


4:	learn: 0.4401092	total: 1.31s	remaining: 0us
0:	learn: 0.6138249	total: 85.1ms	remaining: 255ms
1:	learn: 0.5467108	total: 169ms	remaining: 169ms
2:	learn: 0.4891249	total: 249ms	remaining: 83ms
3:	learn: 0.4392229	total: 330ms	remaining: 0us
 - fold2 - 0.9437


[I 2024-03-20 05:47:38,688] A new study created in memory with name: no-name-4222accd-1917-43b2-870c-b5bab068857e


0:	learn: 0.6702323	total: 293ms	remaining: 586ms
1:	learn: 0.6483779	total: 537ms	remaining: 268ms


[I 2024-03-20 05:47:40,189] Trial 0 finished with value: 0.9720886227377904 and parameters: {'iterations': 3, 'max_depth': 6, 'learning_rate': 0.023216093746773817, 'reg_lambda': 3.1620318348240513e-06, 'subsample': 0.5793560106135314, 'colsample_bylevel': 0.8784115730141324}. Best is trial 0 with value: 0.9720886227377904.


2:	learn: 0.6275099	total: 798ms	remaining: 0us
0:	learn: 0.6332095	total: 264ms	remaining: 528ms
1:	learn: 0.5803218	total: 468ms	remaining: 234ms
2:	learn: 0.5336520	total: 637ms	remaining: 0us


[I 2024-03-20 05:47:41,310] Trial 1 finished with value: 0.9830351864955847 and parameters: {'iterations': 3, 'max_depth': 7, 'learning_rate': 0.062025885422958164, 'reg_lambda': 46.16858309191071, 'subsample': 0.7136683325988206, 'colsample_bylevel': 0.6114952096328136}. Best is trial 1 with value: 0.9830351864955847.


0:	learn: 0.6400617	total: 247ms	remaining: 988ms
1:	learn: 0.5925544	total: 581ms	remaining: 871ms
2:	learn: 0.5497179	total: 701ms	remaining: 467ms
3:	learn: 0.5110256	total: 1.01s	remaining: 252ms
4:	learn: 0.4757612	total: 1.15s	remaining: 0us


[I 2024-03-20 05:47:42,928] Trial 2 finished with value: 0.9669702436201366 and parameters: {'iterations': 5, 'max_depth': 8, 'learning_rate': 0.054716176868583544, 'reg_lambda': 20.59623123690918, 'subsample': 0.7341894520108085, 'colsample_bylevel': 0.4796627334200511}. Best is trial 1 with value: 0.9830351864955847.


0:	learn: 0.6401358	total: 244ms	remaining: 489ms
1:	learn: 0.5924767	total: 500ms	remaining: 250ms


[I 2024-03-20 05:47:44,183] Trial 3 finished with value: 0.9616062430875738 and parameters: {'iterations': 3, 'max_depth': 6, 'learning_rate': 0.05490608472868229, 'reg_lambda': 0.00039429402739347356, 'subsample': 0.8676556799766583, 'colsample_bylevel': 0.38005446741203996}. Best is trial 1 with value: 0.9830351864955847.


2:	learn: 0.5496996	total: 790ms	remaining: 0us
0:	learn: 0.6796124	total: 287ms	remaining: 1.43s
1:	learn: 0.6663639	total: 553ms	remaining: 1.1s
2:	learn: 0.6534902	total: 840ms	remaining: 840ms
3:	learn: 0.6409431	total: 1.13s	remaining: 564ms
4:	learn: 0.6287503	total: 1.39s	remaining: 278ms


[I 2024-03-20 05:47:46,307] Trial 4 finished with value: 0.9730930525667219 and parameters: {'iterations': 6, 'max_depth': 5, 'learning_rate': 0.013734932235868077, 'reg_lambda': 0.19279258252760595, 'subsample': 0.7447281073301849, 'colsample_bylevel': 0.6980653877689033}. Best is trial 1 with value: 0.9830351864955847.


5:	learn: 0.6168567	total: 1.65s	remaining: 0us
0:	learn: 0.6702848	total: 168ms	remaining: 673ms
1:	learn: 0.6483502	total: 344ms	remaining: 517ms
2:	learn: 0.6274029	total: 468ms	remaining: 312ms
3:	learn: 0.6074043	total: 632ms	remaining: 158ms


[I 2024-03-20 05:47:47,577] Trial 5 finished with value: 0.9724847290260282 and parameters: {'iterations': 5, 'max_depth': 6, 'learning_rate': 0.02330064858031478, 'reg_lambda': 0.0054632208751502565, 'subsample': 0.35822582662777497, 'colsample_bylevel': 0.3871127374834318}. Best is trial 1 with value: 0.9830351864955847.


4:	learn: 0.5882477	total: 792ms	remaining: 0us
0:	learn: 0.6038016	total: 82.6ms	remaining: 413ms
1:	learn: 0.5298370	total: 199ms	remaining: 397ms
2:	learn: 0.4676705	total: 275ms	remaining: 275ms
3:	learn: 0.4145581	total: 351ms	remaining: 176ms
4:	learn: 0.3688275	total: 426ms	remaining: 85.2ms
5:	learn: 0.3291769	total: 498ms	remaining: 0us


[I 2024-03-20 05:47:48,508] Trial 6 finished with value: 0.9621469862795831 and parameters: {'iterations': 6, 'max_depth': 8, 'learning_rate': 0.09398401120526768, 'reg_lambda': 0.00022546432520394657, 'subsample': 0.29755220373847746, 'colsample_bylevel': 0.9631021930353415}. Best is trial 1 with value: 0.9830351864955847.


0:	learn: 0.6908676	total: 110ms	remaining: 331ms
1:	learn: 0.6885997	total: 213ms	remaining: 213ms
2:	learn: 0.6863444	total: 331ms	remaining: 110ms
3:	learn: 0.6840983	total: 459ms	remaining: 0us


[I 2024-03-20 05:47:49,414] Trial 7 finished with value: 0.9852834059164496 and parameters: {'iterations': 4, 'max_depth': 4, 'learning_rate': 0.0022833460218836888, 'reg_lambda': 1.8515897759740404e-08, 'subsample': 0.12414851652477858, 'colsample_bylevel': 0.833198321431102}. Best is trial 7 with value: 0.9852834059164496.


0:	learn: 0.6664743	total: 109ms	remaining: 544ms
1:	learn: 0.6412430	total: 207ms	remaining: 415ms
2:	learn: 0.6173429	total: 304ms	remaining: 304ms
3:	learn: 0.5946292	total: 439ms	remaining: 219ms
4:	learn: 0.5730251	total: 540ms	remaining: 108ms


[I 2024-03-20 05:47:50,586] Trial 8 finished with value: 0.9580662350890632 and parameters: {'iterations': 6, 'max_depth': 8, 'learning_rate': 0.027083400512097767, 'reg_lambda': 1.5752410715239626, 'subsample': 0.5050929517741607, 'colsample_bylevel': 0.9640299261877819}. Best is trial 7 with value: 0.9852834059164496.


5:	learn: 0.5524671	total: 684ms	remaining: 0us
0:	learn: 0.6047227	total: 109ms	remaining: 326ms
1:	learn: 0.5314017	total: 256ms	remaining: 256ms
2:	learn: 0.4695978	total: 412ms	remaining: 137ms
3:	learn: 0.4167060	total: 551ms	remaining: 0us


[I 2024-03-20 05:47:51,615] Trial 9 finished with value: 0.971013494822409 and parameters: {'iterations': 4, 'max_depth': 6, 'learning_rate': 0.09306438821245168, 'reg_lambda': 0.09670564912627963, 'subsample': 0.30145149807154203, 'colsample_bylevel': 0.4970299872193257}. Best is trial 7 with value: 0.9852834059164496.


0:	learn: 0.6908684	total: 57.3ms	remaining: 172ms
1:	learn: 0.6886043	total: 112ms	remaining: 112ms
2:	learn: 0.6863484	total: 171ms	remaining: 56.8ms
3:	learn: 0.6841042	total: 225ms	remaining: 0us
 - fold3 - 0.9465


[I 2024-03-20 05:48:12,403] A new study created in memory with name: no-name-cdb548a8-83a9-4a9a-982a-054d0de54654


0:	learn: 0.6907976	total: 201ms	remaining: 604ms
1:	learn: 0.6884578	total: 360ms	remaining: 360ms
2:	learn: 0.6861556	total: 397ms	remaining: 132ms


[I 2024-03-20 05:48:13,565] Trial 0 finished with value: 0.973591215194473 and parameters: {'iterations': 4, 'max_depth': 5, 'learning_rate': 0.002356731697826061, 'reg_lambda': 0.11717879303995585, 'subsample': 0.4008672128300185, 'colsample_bylevel': 0.19156422409840748}. Best is trial 0 with value: 0.973591215194473.


3:	learn: 0.6838549	total: 501ms	remaining: 0us
0:	learn: 0.6533025	total: 200ms	remaining: 399ms
1:	learn: 0.6166198	total: 354ms	remaining: 177ms


[I 2024-03-20 05:48:14,642] Trial 1 finished with value: 0.9701781753801857 and parameters: {'iterations': 3, 'max_depth': 6, 'learning_rate': 0.040744063553040825, 'reg_lambda': 3.083280933050544e-08, 'subsample': 0.5830344970048673, 'colsample_bylevel': 0.8103209933840131}. Best is trial 0 with value: 0.973591215194473.


2:	learn: 0.5827238	total: 582ms	remaining: 0us
0:	learn: 0.6250549	total: 120ms	remaining: 479ms
1:	learn: 0.5660582	total: 300ms	remaining: 450ms
2:	learn: 0.5143976	total: 497ms	remaining: 331ms
3:	learn: 0.4687668	total: 690ms	remaining: 173ms


[I 2024-03-20 05:48:15,899] Trial 2 finished with value: 0.9819928885064804 and parameters: {'iterations': 5, 'max_depth': 7, 'learning_rate': 0.07072400591583834, 'reg_lambda': 2.2672612578774892e-05, 'subsample': 0.4304755819596619, 'colsample_bylevel': 0.8293294625574535}. Best is trial 2 with value: 0.9819928885064804.


4:	learn: 0.4281873	total: 790ms	remaining: 0us
0:	learn: 0.6819760	total: 297ms	remaining: 1.19s
1:	learn: 0.6710608	total: 571ms	remaining: 856ms
2:	learn: 0.6603888	total: 877ms	remaining: 585ms
3:	learn: 0.6499530	total: 1.21s	remaining: 303ms


[I 2024-03-20 05:48:17,912] Trial 3 finished with value: 0.9845227367298944 and parameters: {'iterations': 5, 'max_depth': 5, 'learning_rate': 0.011248703724988382, 'reg_lambda': 0.0033597208781794446, 'subsample': 0.9880498629357614, 'colsample_bylevel': 0.5256292709641319}. Best is trial 3 with value: 0.9845227367298944.


4:	learn: 0.6397632	total: 1.53s	remaining: 0us
0:	learn: 0.6610359	total: 233ms	remaining: 932ms
1:	learn: 0.6309321	total: 452ms	remaining: 678ms
2:	learn: 0.6026739	total: 659ms	remaining: 439ms
3:	learn: 0.5761150	total: 855ms	remaining: 214ms


[I 2024-03-20 05:48:19,588] Trial 4 finished with value: 0.9690245778664123 and parameters: {'iterations': 5, 'max_depth': 4, 'learning_rate': 0.03280169466438717, 'reg_lambda': 48.74698099570142, 'subsample': 0.688362677924533, 'colsample_bylevel': 0.561248686702904}. Best is trial 3 with value: 0.9845227367298944.


4:	learn: 0.5511036	total: 1.06s	remaining: 0us
0:	learn: 0.6551567	total: 103ms	remaining: 103ms
1:	learn: 0.6200824	total: 205ms	remaining: 0us


[I 2024-03-20 05:48:20,219] Trial 5 finished with value: 0.9558306560935734 and parameters: {'iterations': 2, 'max_depth': 8, 'learning_rate': 0.0388233830994156, 'reg_lambda': 6.708756460707221e-06, 'subsample': 0.25411534414589854, 'colsample_bylevel': 0.47713975926255453}. Best is trial 3 with value: 0.9845227367298944.


0:	learn: 0.6336366	total: 164ms	remaining: 493ms
1:	learn: 0.5810416	total: 245ms	remaining: 245ms
2:	learn: 0.5342048	total: 333ms	remaining: 111ms


[I 2024-03-20 05:48:21,105] Trial 6 finished with value: 0.9686776313624196 and parameters: {'iterations': 4, 'max_depth': 8, 'learning_rate': 0.061586626652899525, 'reg_lambda': 8.734522327463714e-08, 'subsample': 0.4796276273760423, 'colsample_bylevel': 0.6876963799293759}. Best is trial 3 with value: 0.9845227367298944.


3:	learn: 0.4922661	total: 446ms	remaining: 0us
0:	learn: 0.6362924	total: 117ms	remaining: 466ms
1:	learn: 0.5856387	total: 235ms	remaining: 353ms
2:	learn: 0.5404167	total: 351ms	remaining: 234ms
3:	learn: 0.4996356	total: 466ms	remaining: 116ms


[I 2024-03-20 05:48:22,130] Trial 7 finished with value: 0.9656620587926628 and parameters: {'iterations': 5, 'max_depth': 5, 'learning_rate': 0.0589443119305102, 'reg_lambda': 0.019897895482057112, 'subsample': 0.12922080793384322, 'colsample_bylevel': 0.40506394278968255}. Best is trial 3 with value: 0.9845227367298944.


4:	learn: 0.4631398	total: 571ms	remaining: 0us
0:	learn: 0.6201890	total: 256ms	remaining: 512ms
1:	learn: 0.5581349	total: 307ms	remaining: 154ms


[I 2024-03-20 05:48:23,076] Trial 8 finished with value: 0.9688356587953594 and parameters: {'iterations': 3, 'max_depth': 6, 'learning_rate': 0.07626032702640256, 'reg_lambda': 0.2520039638502834, 'subsample': 0.9297500746503258, 'colsample_bylevel': 0.35662692286875786}. Best is trial 3 with value: 0.9845227367298944.


2:	learn: 0.5035675	total: 492ms	remaining: 0us
0:	learn: 0.6364073	total: 68.3ms	remaining: 205ms
1:	learn: 0.5860043	total: 164ms	remaining: 164ms
2:	learn: 0.5409173	total: 229ms	remaining: 76.3ms


[I 2024-03-20 05:48:23,881] Trial 9 finished with value: 0.9719500166544734 and parameters: {'iterations': 4, 'max_depth': 7, 'learning_rate': 0.05856117429652094, 'reg_lambda': 1.1145647496070223e-07, 'subsample': 0.21054847148689843, 'colsample_bylevel': 0.8593886025980819}. Best is trial 3 with value: 0.9845227367298944.


3:	learn: 0.5003302	total: 357ms	remaining: 0us
0:	learn: 0.6819949	total: 164ms	remaining: 656ms
1:	learn: 0.6710895	total: 324ms	remaining: 487ms
2:	learn: 0.6604270	total: 494ms	remaining: 330ms
3:	learn: 0.6499961	total: 653ms	remaining: 163ms
4:	learn: 0.6397957	total: 822ms	remaining: 0us
 - fold4 - 0.8913


In [None]:
# 最適な閾値を探索
def tune_threshold(train, oof_pred):
    """
    閾値の探索
    """
    def func(x_list, df, oof):
        score = f1_score(df[TARGET], oof>x_list[0])
        return -score
    x0 = [0.5]
    result = minimize(func, x0,  args=(train, oof_pred), method="nelder-mead")
    threshold = result.x[0]
    return threshold

In [None]:
lgb_threshold = tune_threshold(train, lgb_oof_pred)
cb_threshold = tune_threshold(train, cb_oof_pred)
xgb_threshold = tune_threshold(train, xgb_oof_pred)
print(len(lgb_threshold), len(cb_threshold), len(xgb_threshold))

In [None]:
# testデータのpredict
def test_predict(model, threshold, test=test, how='lgb'):
    """modelのpredictを行ってくれる関数"""
    if how='lgb':
        dataset = Pool(test.drop(columns=TARGET), label=test[TARGET])
    elif how='cb':
        x_train, y_train = X[idx_train], y[idx_train]
        c_train = Pool(x_train, label=y_train)
        pred_y = model.predict(c_valid)
    elif how='xgb':
        dvalid = xgb.DMatrix(X_valid, label=y_valid)
        pred_y = model.predict(dvalid, ntree_limit=model.best_ntree_limit)
        auc = roc_auc_score(y_valid, pred_y)
    probabilities = model.predict_proba(test)[:, 1]
    return (probabilities > threshold).astype(int)

In [None]:
lgb_predict = test_predict(lgb_model[0], lgb_threshold, test=test, how='lgb')

In [None]:
lgb_models[0].predict(Pool(test.drop(columns=TARGET), label=test[TARGET]))

In [None]:
test.drop(columns=TARGET)

In [None]:
all_predictions = [lgb_predict, cb_predict, xgb_predict]
combined_predictions = np.vstack(all_predictions)
final_predictions = [Counter(row).most_common(1)[0][0] for row in combined_predictions]

In [None]:
f1_score()

In [None]:
# with open('../model/model.pickle', mode='wb') as f:
#     pickle.dump(models,f,protocol=2)

In [None]:
# # ========================================
# # feature importance
# # ========================================
# def visualize_importance(models, feat_train_df):
#     feature_importance_df = pd.DataFrame()
#     for i, model in enumerate(models):
#         _df = pd.DataFrame()
#         _df["feature_importance"] = model.feature_importances_
#         _df["column"] = feat_train_df.columns
#         _df["fold"] = i + 1
#         feature_importance_df = pd.concat([feature_importance_df, _df],
#                                           axis=0, ignore_index=True)

#     order = feature_importance_df.groupby("column")\
#         .sum()[["feature_importance"]]\
#         .sort_values("feature_importance", ascending=False).index

#     fig, ax = plt.subplots(figsize=(12, max(6, len(order) * .25)))
#     sns.boxplot(data=feature_importance_df,
#                   x="feature_importance",
#                   y="column",
#                   order=order,
#                   ax=ax,
#                   palette="viridis",
#                   orient="h")
#     ax.tick_params(axis="x", rotation=90)
#     ax.set_title("Importance")
#     ax.grid()
#     fig.tight_layout()
#     return fig, ax, feature_importance_df

# fig, ax, feature_importance_df = visualize_importance(models, train[feature_cols])

In [None]:
models