In [1]:
import gc
from glob import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import yaml

import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics

import optuna
from optuna.visualization import (
    plot_contour
    , plot_edf
    , plot_intermediate_values
    , plot_optimization_history
    , plot_parallel_coordinate
    , plot_param_importances
    , plot_slice
)

import sys
sys.path.append("../utils")
from metrics import compute_recall_at4, compute_normalized_gini, compute_amex_metric

np.random.seed(2112)
pd.set_option('display.max_columns', None)

In [2]:
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "jupyterlab"

In [3]:
lgb.__version__

'3.3.2'

***
## load and prepare data

In [4]:
!ls ../data/processed/dsv04

test.parquet  train.parquet


In [5]:
train = pd.read_parquet("../data/processed/dsv04/train.parquet")
train_labels = pd.read_csv("../data/raw/train_labels.csv", index_col="customer_ID")

In [6]:
input_feats = train.columns.tolist()
len(input_feats)

1639

In [7]:
train = pd.merge(train, train_labels, how="inner", left_index=True, right_index=True)
train = train.reset_index()

del train_labels
gc.collect()

0

***
## model tuning

train with repeated cross validation

In [8]:
skf = StratifiedKFold(n_splits=3, random_state=2112, shuffle=True)
skf_split = list(skf.split(train, train["target"].values))

In [9]:
with open("../data/config/lgbm-bce-dsv04.yml", "r") as file:
    default_params = yaml.safe_load(file)
    file.close()

del default_params["num_iterations"]
default_params["boosting"] = "dart"

display(default_params)

{'bagging_fraction': 1.0,
 'bagging_freq': 1,
 'bin_construct_sample_cnt': 100000000,
 'feature_fraction': 0.15000000000000002,
 'feature_pre_filter': True,
 'force_col_wise': True,
 'lambda_l1': 5.996099571922015,
 'lambda_l2': 2.8900783163910697,
 'learning_rate': 0.05,
 'max_bin': 63,
 'metric': 'None',
 'min_data_in_leaf': 1000,
 'min_gain_to_split': 0.313937968985787,
 'num_leaves': 15,
 'objective': 'binary',
 'path_smooth': 1.476306537276899,
 'seed': 2112,
 'verbosity': -1,
 'boosting': 'dart'}

In [10]:
def train_models(dataframe: pd.DataFrame, split: list, model_params: dict) -> pd.DataFrame:
    
    # dataframe to store the oof predictions
    oof = dataframe[["target"]].copy()
    oof["pred"] = -1

    for train_idx,valid_idx in split:
        
        train_df = dataframe.loc[train_idx,:]
        valid_df = dataframe.loc[valid_idx,:]
        
        train_dset = lgb.Dataset(
            data=train_df[input_feats],
            label=train_df["target"].values,
            free_raw_data=True
        )        
        model = lgb.train(
            params=model_params,
            train_set=train_dset
        )        
        oof.loc[valid_idx,"pred"] = model.predict(valid_df[input_feats])
            
        del train_dset,model
        gc.collect()
    
    return oof

In [11]:
def objective(trial):
    sampled_params = dict(
        num_iterations = trial.suggest_int("num_iterations", 1000, 7000, 50),
        # dart
        drop_rate = trial.suggest_discrete_uniform("drop_rate", 0.05, 0.3, 0.01),
        max_drop = trial.suggest_int("max_drop", 10, 500, 10),
        skip_drop = trial.suggest_discrete_uniform("skip_drop", 0.2, 0.8, 0.05),        
    )
    model_params = {**default_params, **sampled_params}
    
    oof = train_models(train, skf_split, model_params)
    metric = compute_amex_metric(oof.target.values, oof.pred.values)
    return metric

In [12]:
do_optimize = True

study = optuna.create_study(
    study_name="lgbm-dart-bce-dsv04",
    direction='maximize',
    storage='sqlite:///lgbm-dart-bce-dsv04.db',
    load_if_exists=True,
)

if do_optimize:
    study.optimize(
        objective, 
        n_trials=1000, 
        timeout=345600, #4-days
        n_jobs=1, 
        gc_after_trial=True,
    ) 

[32m[I 2022-08-04 03:41:47,137][0m Using an existing study with name 'lgbm-dart-bce-dsv04' instead of creating a new one.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 04:19:02,155][0m Trial 68 finished with value: 0.7957112516672616 and parameters: {'num_iterations': 2750, 'drop_rate': 0.060000000000000005, 'max_drop': 120, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 05:07:05,040][0m Trial 69 finished with value: 0.7964764858825374 and parameters: {'num_iterations': 4300, 'drop_rate': 0.09, 'max_drop': 100, 'skip_drop': 0.75}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 05:53:48,153][0m Trial 70 finished with value: 0.7954780192239252 and parameters: {'num_iterations': 4350, 'drop_rate': 0.09, 'max_drop': 90, 'skip_drop': 0.75}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 06:29:29,620][0m Trial 71 finished with value: 0.796387006592817 and parameters: {'num_iterations': 4950, 'drop_rate': 0.07, 'max_drop': 30, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 07:33:26,039][0m Trial 72 finished with value: 0.7957550371208463 and parameters: {'num_iterations': 4600, 'drop_rate': 0.1, 'max_drop': 110, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 08:10:53,655][0m Trial 73 finished with value: 0.7957111458081036 and parameters: {'num_iterations': 4050, 'drop_rate': 0.14, 'max_drop': 80, 'skip_drop': 0.8}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 09:51:07,518][0m Trial 74 finished with value: 0.7962731045832812 and parameters: {'num_iterations': 4300, 'drop_rate': 0.13, 'max_drop': 220, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 10:33:23,400][0m Trial 75 finished with value: 0.7960451055762632 and parameters: {'num_iterations': 5200, 'drop_rate': 0.09, 'max_drop': 50, 'skip_drop': 0.75}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 12:48:35,999][0m Trial 76 finished with value: 0.7913334344772897 and parameters: {'num_iterations': 3750, 'drop_rate': 0.11, 'max_drop': 270, 'skip_drop': 0.35000000000000003}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 14:19:17,957][0m Trial 77 finished with value: 0.795657199688902 and parameters: {'num_iterations': 3200, 'drop_rate': 0.12000000000000001, 'max_drop': 480, 'skip_drop': 0.6000000000000001}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 15:33:18,096][0m Trial 78 finished with value: 0.7960413707037921 and parameters: {'num_iterations': 4850, 'drop_rate': 0.060000000000000005, 'max_drop': 200, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 16:23:17,091][0m Trial 79 finished with value: 0.7965725304554911 and parameters: {'num_iterations': 4650, 'drop_rate': 0.15000000000000002, 'max_drop': 60, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 17:07:50,908][0m Trial 80 finished with value: 0.795862327608742 and parameters: {'num_iterations': 4700, 'drop_rate': 0.16, 'max_drop': 70, 'skip_drop': 0.75}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 17:43:36,947][0m Trial 81 finished with value: 0.7962678033163437 and parameters: {'num_iterations': 4150, 'drop_rate': 0.15000000000000002, 'max_drop': 40, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 18:31:00,015][0m Trial 82 finished with value: 0.7962281462784069 and parameters: {'num_iterations': 4400, 'drop_rate': 0.12000000000000001, 'max_drop': 60, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 19:32:35,599][0m Trial 83 finished with value: 0.7959700346908047 and parameters: {'num_iterations': 4600, 'drop_rate': 0.21000000000000002, 'max_drop': 100, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 21:22:42,834][0m Trial 84 finished with value: 0.7960564301759181 and parameters: {'num_iterations': 5050, 'drop_rate': 0.08, 'max_drop': 190, 'skip_drop': 0.6000000000000001}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 21:42:13,306][0m Trial 85 finished with value: 0.7961715317009572 and parameters: {'num_iterations': 3500, 'drop_rate': 0.13, 'max_drop': 10, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-04 23:56:39,944][0m Trial 86 finished with value: 0.7962268129065593 and parameters: {'num_iterations': 5300, 'drop_rate': 0.14, 'max_drop': 240, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 01:42:17,830][0m Trial 87 finished with value: 0.7962566448887831 and parameters: {'num_iterations': 4900, 'drop_rate': 0.16, 'max_drop': 140, 'skip_drop': 0.55}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 03:52:00,649][0m Trial 88 finished with value: 0.7962392314462488 and parameters: {'num_iterations': 5600, 'drop_rate': 0.15000000000000002, 'max_drop': 170, 'skip_drop': 0.6000000000000001}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 04:56:33,971][0m Trial 89 finished with value: 0.7954019640791886 and parameters: {'num_iterations': 4050, 'drop_rate': 0.16999999999999998, 'max_drop': 230, 'skip_drop': 0.8}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 07:16:57,192][0m Trial 90 finished with value: 0.796166565254393 and parameters: {'num_iterations': 4400, 'drop_rate': 0.19, 'max_drop': 330, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 07:52:44,614][0m Trial 91 finished with value: 0.796387006592817 and parameters: {'num_iterations': 4950, 'drop_rate': 0.07, 'max_drop': 30, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 08:25:07,321][0m Trial 92 finished with value: 0.7960357106719247 and parameters: {'num_iterations': 5150, 'drop_rate': 0.07, 'max_drop': 20, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 09:15:49,739][0m Trial 93 finished with value: 0.7964109441907348 and parameters: {'num_iterations': 4700, 'drop_rate': 0.1, 'max_drop': 60, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 10:53:47,289][0m Trial 94 finished with value: 0.7958739663029102 and parameters: {'num_iterations': 4250, 'drop_rate': 0.09, 'max_drop': 420, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 11:52:09,678][0m Trial 95 finished with value: 0.7960926991363448 and parameters: {'num_iterations': 4700, 'drop_rate': 0.11, 'max_drop': 80, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 12:36:17,160][0m Trial 96 finished with value: 0.7959541870636127 and parameters: {'num_iterations': 3850, 'drop_rate': 0.1, 'max_drop': 60, 'skip_drop': 0.6000000000000001}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 14:18:54,283][0m Trial 97 finished with value: 0.7955628397897097 and parameters: {'num_iterations': 4550, 'drop_rate': 0.08, 'max_drop': 210, 'skip_drop': 0.6000000000000001}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 16:56:20,085][0m Trial 98 finished with value: 0.7959127943303909 and parameters: {'num_iterations': 5350, 'drop_rate': 0.18, 'max_drop': 270, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 17:42:29,194][0m Trial 99 finished with value: 0.795649271894906 and parameters: {'num_iterations': 4200, 'drop_rate': 0.2, 'max_drop': 90, 'skip_drop': 0.75}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 21:38:17,442][0m Trial 100 finished with value: 0.7959708998279383 and parameters: {'num_iterations': 6050, 'drop_rate': 0.16999999999999998, 'max_drop': 300, 'skip_drop': 0.55}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 22:17:02,506][0m Trial 101 finished with value: 0.7960900573253732 and parameters: {'num_iterations': 4850, 'drop_rate': 0.07, 'max_drop': 40, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 22:46:56,621][0m Trial 102 finished with value: 0.7959105672173384 and parameters: {'num_iterations': 4450, 'drop_rate': 0.060000000000000005, 'max_drop': 30, 'skip_drop': 0.75}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-05 23:37:31,325][0m Trial 103 finished with value: 0.7963323522827768 and parameters: {'num_iterations': 5100, 'drop_rate': 0.1, 'max_drop': 60, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 00:09:02,622][0m Trial 104 finished with value: 0.7963250791502111 and parameters: {'num_iterations': 4750, 'drop_rate': 0.08, 'max_drop': 20, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 00:55:02,713][0m Trial 105 finished with value: 0.7955504115342283 and parameters: {'num_iterations': 5800, 'drop_rate': 0.05, 'max_drop': 40, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 01:38:49,240][0m Trial 106 finished with value: 0.7964391972407712 and parameters: {'num_iterations': 4950, 'drop_rate': 0.1, 'max_drop': 50, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 02:31:04,999][0m Trial 107 finished with value: 0.7960261528940157 and parameters: {'num_iterations': 4600, 'drop_rate': 0.1, 'max_drop': 70, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 03:29:38,132][0m Trial 108 finished with value: 0.7954391711154651 and parameters: {'num_iterations': 4000, 'drop_rate': 0.09, 'max_drop': 230, 'skip_drop': 0.75}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 04:44:15,264][0m Trial 109 finished with value: 0.7961090191865334 and parameters: {'num_iterations': 4700, 'drop_rate': 0.12000000000000001, 'max_drop': 100, 'skip_drop': 0.6000000000000001}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 05:46:25,253][0m Trial 110 finished with value: 0.7960570963525793 and parameters: {'num_iterations': 5000, 'drop_rate': 0.11, 'max_drop': 80, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 06:20:11,090][0m Trial 111 finished with value: 0.7958391011291436 and parameters: {'num_iterations': 5400, 'drop_rate': 0.09, 'max_drop': 20, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 07:03:24,932][0m Trial 112 finished with value: 0.7966806850684205 and parameters: {'num_iterations': 4950, 'drop_rate': 0.19, 'max_drop': 50, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 07:52:42,742][0m Trial 113 finished with value: 0.7962602057968124 and parameters: {'num_iterations': 5550, 'drop_rate': 0.21000000000000002, 'max_drop': 50, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 08:38:52,892][0m Trial 114 finished with value: 0.7966629375923067 and parameters: {'num_iterations': 4300, 'drop_rate': 0.2, 'max_drop': 60, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 09:21:30,747][0m Trial 115 finished with value: 0.796021675257926 and parameters: {'num_iterations': 4350, 'drop_rate': 0.22000000000000003, 'max_drop': 60, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 10:01:13,318][0m Trial 116 finished with value: 0.7961556841381535 and parameters: {'num_iterations': 4800, 'drop_rate': 0.2, 'max_drop': 50, 'skip_drop': 0.75}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 11:05:41,018][0m Trial 117 finished with value: 0.7958277662708002 and parameters: {'num_iterations': 4600, 'drop_rate': 0.19, 'max_drop': 70, 'skip_drop': 0.5}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 12:15:33,541][0m Trial 118 finished with value: 0.7957529957626814 and parameters: {'num_iterations': 5150, 'drop_rate': 0.19, 'max_drop': 90, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 12:49:30,494][0m Trial 119 finished with value: 0.7961947948318175 and parameters: {'num_iterations': 4250, 'drop_rate': 0.18, 'max_drop': 40, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 13:15:10,562][0m Trial 120 finished with value: 0.7959120238279729 and parameters: {'num_iterations': 4500, 'drop_rate': 0.2, 'max_drop': 10, 'skip_drop': 0.6000000000000001}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 15:05:03,834][0m Trial 121 finished with value: 0.7960847071977042 and parameters: {'num_iterations': 4900, 'drop_rate': 0.15000000000000002, 'max_drop': 200, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 16:13:51,587][0m Trial 122 finished with value: 0.7958950773727684 and parameters: {'num_iterations': 4450, 'drop_rate': 0.14, 'max_drop': 110, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 17:56:52,683][0m Trial 123 finished with value: 0.7961906667189884 and parameters: {'num_iterations': 4150, 'drop_rate': 0.18, 'max_drop': 220, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 20:06:59,028][0m Trial 124 finished with value: 0.7954966143437976 and parameters: {'num_iterations': 3750, 'drop_rate': 0.22000000000000003, 'max_drop': 490, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 22:15:49,240][0m Trial 125 finished with value: 0.7958063638425524 and parameters: {'num_iterations': 4650, 'drop_rate': 0.21000000000000002, 'max_drop': 250, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 22:42:53,127][0m Trial 126 finished with value: 0.7933970367522607 and parameters: {'num_iterations': 1550, 'drop_rate': 0.13, 'max_drop': 380, 'skip_drop': 0.6000000000000001}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-06 23:25:34,987][0m Trial 127 finished with value: 0.79568753700347 and parameters: {'num_iterations': 4800, 'drop_rate': 0.2, 'max_drop': 60, 'skip_drop': 0.75}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 00:56:30,039][0m Trial 128 finished with value: 0.7960124819536183 and parameters: {'num_iterations': 3900, 'drop_rate': 0.11, 'max_drop': 270, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 03:47:49,328][0m Trial 129 finished with value: 0.7955087254158286 and parameters: {'num_iterations': 5250, 'drop_rate': 0.19, 'max_drop': 420, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 05:27:19,224][0m Trial 130 finished with value: 0.7958284514939185 and parameters: {'num_iterations': 4300, 'drop_rate': 0.16, 'max_drop': 180, 'skip_drop': 0.6000000000000001}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 06:02:04,186][0m Trial 131 finished with value: 0.7961017274214817 and parameters: {'num_iterations': 4850, 'drop_rate': 0.08, 'max_drop': 30, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 06:38:00,209][0m Trial 132 finished with value: 0.7958209172633458 and parameters: {'num_iterations': 5000, 'drop_rate': 0.1, 'max_drop': 30, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 07:34:57,634][0m Trial 133 finished with value: 0.7959387555028534 and parameters: {'num_iterations': 5150, 'drop_rate': 0.07, 'max_drop': 80, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 08:18:33,801][0m Trial 134 finished with value: 0.7957199647668938 and parameters: {'num_iterations': 5050, 'drop_rate': 0.09, 'max_drop': 40, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 09:37:32,756][0m Trial 135 finished with value: 0.7958069778086923 and parameters: {'num_iterations': 4950, 'drop_rate': 0.08, 'max_drop': 450, 'skip_drop': 0.75}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 10:20:58,279][0m Trial 136 finished with value: 0.7958467823262184 and parameters: {'num_iterations': 4500, 'drop_rate': 0.060000000000000005, 'max_drop': 50, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 11:04:52,462][0m Trial 137 finished with value: 0.7962203124268008 and parameters: {'num_iterations': 4050, 'drop_rate': 0.16999999999999998, 'max_drop': 70, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 14:07:24,740][0m Trial 138 finished with value: 0.7901948509223892 and parameters: {'num_iterations': 4750, 'drop_rate': 0.12000000000000001, 'max_drop': 240, 'skip_drop': 0.30000000000000004}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 15:04:35,159][0m Trial 139 finished with value: 0.7956689023864849 and parameters: {'num_iterations': 4350, 'drop_rate': 0.18, 'max_drop': 160, 'skip_drop': 0.8}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 15:20:26,090][0m Trial 140 finished with value: 0.7958133456991754 and parameters: {'num_iterations': 2600, 'drop_rate': 0.1, 'max_drop': 10, 'skip_drop': 0.6000000000000001}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 15:49:49,426][0m Trial 141 finished with value: 0.7961668075327544 and parameters: {'num_iterations': 4650, 'drop_rate': 0.07, 'max_drop': 20, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 16:25:27,962][0m Trial 142 finished with value: 0.796387006592817 and parameters: {'num_iterations': 4950, 'drop_rate': 0.07, 'max_drop': 30, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 17:08:20,537][0m Trial 143 finished with value: 0.7961019240560185 and parameters: {'num_iterations': 5300, 'drop_rate': 0.09, 'max_drop': 50, 'skip_drop': 0.75}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 17:51:28,700][0m Trial 144 finished with value: 0.7960634142192957 and parameters: {'num_iterations': 4950, 'drop_rate': 0.05, 'max_drop': 40, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 18:35:24,056][0m Trial 145 finished with value: 0.7963947725585567 and parameters: {'num_iterations': 6200, 'drop_rate': 0.07, 'max_drop': 30, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 19:38:02,675][0m Trial 146 finished with value: 0.7953540671955556 and parameters: {'num_iterations': 6400, 'drop_rate': 0.28, 'max_drop': 60, 'skip_drop': 0.7}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-07 22:39:44,677][0m Trial 147 finished with value: 0.7961780762580597 and parameters: {'num_iterations': 6000, 'drop_rate': 0.2, 'max_drop': 210, 'skip_drop': 0.55}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-08 02:36:08,665][0m Trial 148 finished with value: 0.7957706039391443 and parameters: {'num_iterations': 5900, 'drop_rate': 0.08, 'max_drop': 340, 'skip_drop': 0.45}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-08 03:39:03,699][0m Trial 149 finished with value: 0.7962627545979319 and parameters: {'num_iterations': 5500, 'drop_rate': 0.14, 'max_drop': 70, 'skip_drop': 0.65}. Best is trial 62 with value: 0.7968637138993222.[0m



Found `num_iterations` in params. Will use it instead of argument



[32m[I 2022-08-08 04:48:05,968][0m Trial 150 finished with value: 0.7957108310503919 and parameters: {'num_iterations': 4450, 'drop_rate': 0.09, 'max_drop': 290, 'skip_drop': 0.75}. Best is trial 62 with value: 0.7968637138993222.[0m


In [13]:
study.trials_dataframe().sort_values("value", ascending=False).head(20)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_drop_rate,params_max_drop,params_num_iterations,params_skip_drop,state
62,62,0.796864,2022-08-03 22:14:26.784423,2022-08-03 22:50:00.732739,0 days 00:35:33.948316,0.1,40,4350,0.7,COMPLETE
112,112,0.796681,2022-08-06 06:20:11.251245,2022-08-06 07:03:24.908742,0 days 00:43:13.657497,0.19,50,4950,0.7,COMPLETE
114,114,0.796663,2022-08-06 07:52:42.895430,2022-08-06 08:38:52.869665,0 days 00:46:09.974235,0.2,60,4300,0.65,COMPLETE
16,16,0.796638,2022-07-29 14:42:08.222255,2022-07-29 17:21:58.205647,0 days 02:39:49.983392,0.14,220,5750,0.6,COMPLETE
79,79,0.796573,2022-08-04 15:33:18.245685,2022-08-04 16:23:17.068902,0 days 00:49:58.823217,0.15,60,4650,0.65,COMPLETE
28,28,0.796567,2022-07-30 17:30:28.167280,2022-07-30 20:32:23.234022,0 days 03:01:55.066742,0.2,490,5250,0.7,COMPLETE
50,50,0.796478,2022-08-01 23:37:29.570490,2022-08-02 02:05:19.308352,0 days 02:27:49.737862,0.17,250,4800,0.6,COMPLETE
69,69,0.796476,2022-08-04 04:19:02.323358,2022-08-04 05:07:05.016649,0 days 00:48:02.693291,0.09,100,4300,0.75,COMPLETE
22,22,0.796458,2022-07-30 07:36:39.928330,2022-07-30 08:55:42.709861,0 days 01:19:02.781531,0.08,210,4250,0.65,COMPLETE
44,44,0.796451,2022-08-01 09:50:23.076169,2022-08-01 12:27:05.024984,0 days 02:36:41.948815,0.19,360,4750,0.65,COMPLETE


In [14]:
plot_optimization_history(study)

In [15]:
try:
    plot_param_importances(study)
except:
    pass

In [16]:
plot_slice(study)

In [17]:
plot_edf(study)

In [18]:
plot_parallel_coordinate(study)

In [19]:
best_params = dict(study.best_params)
best_params = {**default_params, **best_params}
best_params

{'bagging_fraction': 1.0,
 'bagging_freq': 1,
 'bin_construct_sample_cnt': 100000000,
 'feature_fraction': 0.15000000000000002,
 'feature_pre_filter': True,
 'force_col_wise': True,
 'lambda_l1': 5.996099571922015,
 'lambda_l2': 2.8900783163910697,
 'learning_rate': 0.05,
 'max_bin': 63,
 'metric': 'None',
 'min_data_in_leaf': 1000,
 'min_gain_to_split': 0.313937968985787,
 'num_leaves': 15,
 'objective': 'binary',
 'path_smooth': 1.476306537276899,
 'seed': 2112,
 'verbosity': -1,
 'boosting': 'dart',
 'drop_rate': 0.1,
 'max_drop': 40,
 'num_iterations': 4350,
 'skip_drop': 0.7}

In [20]:
with open("../data/config/lgbm-dart-bce-dsv04.yml", "w") as file:
    yaml.dump(best_params, file, default_flow_style=False)
    file.close()

***