In [1]:
# Required Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import lightgbm
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score, classification_report,confusion_matrix, ConfusionMatrixDisplay, roc_auc_score, auc, roc_curve
import pickle
import logging
import sys
import optuna
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Load Dataset
data = pd.read_csv("../Data/preprocessed_data")

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9730347 entries, 0 to 9730346
Data columns (total 30 columns):
 #   Column                          Dtype  
---  ------                          -----  
 0   Unnamed: 0                      int64  
 1   order_id                        int64  
 2   product_id                      int64  
 3   add_to_cart_order               int64  
 4   reordered                       int64  
 5   user_id                         int64  
 6   order_number                    int64  
 7   days_since_prior_order          float64
 8   average_days_between_purchases  float64
 9   product_purchase_frequency      int64  
 10  total_purchases                 int64  
 11  interval_std_dev                float64
 12  product_reorder_rate            float64
 13  users_general_reorder_rate      float64
 14  avg_add_to_cart_order           float64
 15  order_frequency_by_dow          int64  
 16  aisle_target_enc                float64
 17  department_target_enc      

In [5]:
# Data Preview
data.head()

Unnamed: 0.1,Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,user_id,order_number,days_since_prior_order,average_days_between_purchases,product_purchase_frequency,...,dow_1,dow_2,dow_3,dow_4,dow_5,dow_6,order_hour_sin,order_hour_cos,add_to_cart_order_encoded,days_since_prior_order_binned
0,0,2722718,8619,9,0,41627,5,15.0,15.0,1,...,1,0,0,0,0,0,0.258819,-0.965926,8.0,8-15
1,1,2089674,13870,10,0,146296,28,29.0,29.0,1,...,0,0,0,1,0,0,0.8660254,-0.5,9.0,24-31
2,2,3024155,2029,1,0,6200,2,4.0,4.0,1,...,0,0,0,0,1,0,0.7071068,-0.707107,0.0,0-7
3,3,2890872,16062,2,0,8337,32,2.0,2.0,1,...,0,0,0,0,1,0,1.224647e-16,-1.0,1.0,0-7
4,4,1798802,14335,3,0,155167,1,0.0,7.166667,6,...,0,1,0,0,0,0,-0.9659258,-0.258819,2.0,Unknown


In [6]:
data = data.drop(columns=("Unnamed: 0"))

In [7]:
data.head()

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,user_id,order_number,days_since_prior_order,average_days_between_purchases,product_purchase_frequency,total_purchases,...,dow_1,dow_2,dow_3,dow_4,dow_5,dow_6,order_hour_sin,order_hour_cos,add_to_cart_order_encoded,days_since_prior_order_binned
0,2722718,8619,9,0,41627,5,15.0,15.0,1,1,...,1,0,0,0,0,0,0.258819,-0.965926,8.0,8-15
1,2089674,13870,10,0,146296,28,29.0,29.0,1,1,...,0,0,0,1,0,0,0.8660254,-0.5,9.0,24-31
2,3024155,2029,1,0,6200,2,4.0,4.0,1,1,...,0,0,0,0,1,0,0.7071068,-0.707107,0.0,0-7
3,2890872,16062,2,0,8337,32,2.0,2.0,1,1,...,0,0,0,0,1,0,1.224647e-16,-1.0,1.0,0-7
4,1798802,14335,3,0,155167,1,0.0,7.166667,6,6,...,0,1,0,0,0,0,-0.9659258,-0.258819,2.0,Unknown


In [8]:
data['reordered'].unique()

array([0, 1])

In [9]:
data['reordered'].value_counts()

reordered
1    5737961
0    3992386
Name: count, dtype: int64

# Selected Model - LightGBM & The Hyperparameters to consider

## Learning Parameters
* **learning_rate** - A smaller value may increas overall performance but will require more iterations
* **n_estimators** - Number of boosting rounds
* **num_leaves** - Maximum number of leaves in a tree. Larger values increase accuracy but risk overfitting
* **max_depth** - Maximum Depth of a tree
* **min_data_in_leaf** - Minimum number of samples per leaf. Higher Values prevent overfitting

## Regularization Parameters
* **lambda_l1** - L1 regularization term on weights. Adds sparsity to the model
* **lambda_l2** - L2 regularization term on weights. Helps control overfitting
* **min_gain_to_split** - Minimum loss reduction required to split a leaf node. Larger values make the model training faster
* **early_stopping_round** - Helps stop training when performance does not improve

## Data Sampling Parameters
* **bagging_fraction** - Fraction of data used for each iteration (for bagging). Helps with overfitting
* **bagging_freq** - Frequency of bagging. Also helps deal with overfittiing

### Hyperparameter tuning Technique used is Bayesian Optimization Technique

In [10]:
#Train Test Split
data = data.drop(columns=(["order_id","product_id","user_id","days_since_prior_order_binned"]))
X = data.drop(columns="reordered")
y = data["reordered"]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
print("X_train Shape:",X_train.shape)
print("X_test Shape:",X_test.shape)
print("y_train Shape:", y_train.shape)
print("y_test Shape:",y_test.shape)

X_train Shape: (7784277, 24)
X_test Shape: (1946070, 24)
y_train Shape: (7784277,)
y_test Shape: (1946070,)


In [4]:
# ObJective Function which will be evaluated 
def objective_function(trial):
    # Hyperparameter Search Space
    param_grid = {
        "n_estimators": trial.suggest_int("n_estimators", 50, 1000),
        "num_leaves": trial.suggest_int("num_leaves", 20, 200),
        "max_depth": trial.suggest_int("max_depth", -1, 15),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 10, 100),
        "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-4, 10.0),
        "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-4, 10.0),
        "min_gain_to_split": trial.suggest_loguniform("min_gain_to_split", 1e-4, 1.0),
        "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.6, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 10),
    }
    print(param_grid)
    model = lightgbm.LGBMClassifier(**param_grid, n_jobs=1, learning_rate=0.01, random_state=42)
    model.fit(X_train.fillna(0),y_train)
    y_pred = model.predict(X_test.fillna(0))
    
    return f1_score(y_test,y_pred)



**Day 1 of Hyperparameter tuning**

In [None]:
optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))
study_name = "purchase_hp_tuning"
storage_name = "sqlite:///{}.db".format(study_name)

study = optuna.create_study(study_name = study_name, storage=storage_name, load_if_exists=True, direction='maximize')
study.optimize(objective_function, n_trials=50, n_jobs=2)


[I 2024-11-21 12:40:30,276] A new study created in RDB with name: purchase_hp_tuning


A new study created in RDB with name: purchase_hp_tuning
{'n_estimators': 493, 'num_leaves': 88, 'max_depth': 8, 'min_data_in_leaf': 32, 'lambda_l1': 0.0003197464313668558, 'lambda_l2': 0.3148857374630904, 'min_gain_to_split': 0.047546251367120755, 'bagging_fraction': 0.7560656997649566, 'bagging_freq': 1}
{'n_estimators': 399, 'num_leaves': 51, 'max_depth': 0, 'min_data_in_leaf': 41, 'lambda_l1': 0.2878043360786342, 'lambda_l2': 0.008851088544823778, 'min_gain_to_split': 0.003415565231852756, 'bagging_fraction': 0.7681675131542959, 'bagging_freq': 5}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.239290 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data

[I 2024-11-21 12:49:29,585] Trial 0 finished with value: 0.8466903830583237 and parameters: {'n_estimators': 493, 'num_leaves': 88, 'max_depth': 8, 'min_data_in_leaf': 32, 'lambda_l1': 0.0003197464313668558, 'lambda_l2': 0.3148857374630904, 'min_gain_to_split': 0.047546251367120755, 'bagging_fraction': 0.7560656997649566, 'bagging_freq': 1}. Best is trial 0 with value: 0.8466903830583237.


Trial 0 finished with value: 0.8466903830583237 and parameters: {'n_estimators': 493, 'num_leaves': 88, 'max_depth': 8, 'min_data_in_leaf': 32, 'lambda_l1': 0.0003197464313668558, 'lambda_l2': 0.3148857374630904, 'min_gain_to_split': 0.047546251367120755, 'bagging_fraction': 0.7560656997649566, 'bagging_freq': 1}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 275, 'num_leaves': 45, 'max_depth': 1, 'min_data_in_leaf': 14, 'lambda_l1': 0.02542367993824871, 'lambda_l2': 0.0022976806895551975, 'min_gain_to_split': 0.0025181238264553817, 'bagging_fraction': 0.8893307966155001, 'bagging_freq': 2}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.233316 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the t

[I 2024-11-21 12:50:27,576] Trial 1 finished with value: 0.8465953782461692 and parameters: {'n_estimators': 399, 'num_leaves': 51, 'max_depth': 0, 'min_data_in_leaf': 41, 'lambda_l1': 0.2878043360786342, 'lambda_l2': 0.008851088544823778, 'min_gain_to_split': 0.003415565231852756, 'bagging_fraction': 0.7681675131542959, 'bagging_freq': 5}. Best is trial 0 with value: 0.8466903830583237.


Trial 1 finished with value: 0.8465953782461692 and parameters: {'n_estimators': 399, 'num_leaves': 51, 'max_depth': 0, 'min_data_in_leaf': 41, 'lambda_l1': 0.2878043360786342, 'lambda_l2': 0.008851088544823778, 'min_gain_to_split': 0.003415565231852756, 'bagging_fraction': 0.7681675131542959, 'bagging_freq': 5}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 368, 'num_leaves': 67, 'max_depth': 9, 'min_data_in_leaf': 27, 'lambda_l1': 0.00014330200570152266, 'lambda_l2': 0.0005646851407321899, 'min_gain_to_split': 0.0025090133547010906, 'bagging_fraction': 0.8591359744414431, 'bagging_freq': 2}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.240920 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the

[I 2024-11-21 12:51:44,377] Trial 2 finished with value: 0.8296936905881171 and parameters: {'n_estimators': 275, 'num_leaves': 45, 'max_depth': 1, 'min_data_in_leaf': 14, 'lambda_l1': 0.02542367993824871, 'lambda_l2': 0.0022976806895551975, 'min_gain_to_split': 0.0025181238264553817, 'bagging_fraction': 0.8893307966155001, 'bagging_freq': 2}. Best is trial 0 with value: 0.8466903830583237.


Trial 2 finished with value: 0.8296936905881171 and parameters: {'n_estimators': 275, 'num_leaves': 45, 'max_depth': 1, 'min_data_in_leaf': 14, 'lambda_l1': 0.02542367993824871, 'lambda_l2': 0.0022976806895551975, 'min_gain_to_split': 0.0025181238264553817, 'bagging_fraction': 0.8893307966155001, 'bagging_freq': 2}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 151, 'num_leaves': 67, 'max_depth': 3, 'min_data_in_leaf': 18, 'lambda_l1': 0.0019692064974112253, 'lambda_l2': 0.7463893515176858, 'min_gain_to_split': 0.08869090189807251, 'bagging_fraction': 0.7032484686571885, 'bagging_freq': 1}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.245153 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the tr

[I 2024-11-21 12:53:15,418] Trial 4 finished with value: 0.8296936905881171 and parameters: {'n_estimators': 151, 'num_leaves': 67, 'max_depth': 3, 'min_data_in_leaf': 18, 'lambda_l1': 0.0019692064974112253, 'lambda_l2': 0.7463893515176858, 'min_gain_to_split': 0.08869090189807251, 'bagging_fraction': 0.7032484686571885, 'bagging_freq': 1}. Best is trial 0 with value: 0.8466903830583237.


Trial 4 finished with value: 0.8296936905881171 and parameters: {'n_estimators': 151, 'num_leaves': 67, 'max_depth': 3, 'min_data_in_leaf': 18, 'lambda_l1': 0.0019692064974112253, 'lambda_l2': 0.7463893515176858, 'min_gain_to_split': 0.08869090189807251, 'bagging_fraction': 0.7032484686571885, 'bagging_freq': 1}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 243, 'num_leaves': 32, 'max_depth': 3, 'min_data_in_leaf': 28, 'lambda_l1': 0.009303958513247985, 'lambda_l2': 0.09124779085684168, 'min_gain_to_split': 0.029481873254158356, 'bagging_fraction': 0.982866839230788, 'bagging_freq': 2}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.234297 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train

[I 2024-11-21 12:56:27,638] Trial 5 finished with value: 0.8306096571639574 and parameters: {'n_estimators': 243, 'num_leaves': 32, 'max_depth': 3, 'min_data_in_leaf': 28, 'lambda_l1': 0.009303958513247985, 'lambda_l2': 0.09124779085684168, 'min_gain_to_split': 0.029481873254158356, 'bagging_fraction': 0.982866839230788, 'bagging_freq': 2}. Best is trial 0 with value: 0.8466903830583237.


Trial 5 finished with value: 0.8306096571639574 and parameters: {'n_estimators': 243, 'num_leaves': 32, 'max_depth': 3, 'min_data_in_leaf': 28, 'lambda_l1': 0.009303958513247985, 'lambda_l2': 0.09124779085684168, 'min_gain_to_split': 0.029481873254158356, 'bagging_fraction': 0.982866839230788, 'bagging_freq': 2}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 182, 'num_leaves': 48, 'max_depth': 0, 'min_data_in_leaf': 12, 'lambda_l1': 0.1855059279124565, 'lambda_l2': 0.0018751973684356903, 'min_gain_to_split': 0.021772897336598953, 'bagging_fraction': 0.7501684403361888, 'bagging_freq': 2}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.260007 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the trai

[I 2024-11-21 13:00:25,106] Trial 3 finished with value: 0.8462274041420372 and parameters: {'n_estimators': 368, 'num_leaves': 67, 'max_depth': 9, 'min_data_in_leaf': 27, 'lambda_l1': 0.00014330200570152266, 'lambda_l2': 0.0005646851407321899, 'min_gain_to_split': 0.0025090133547010906, 'bagging_fraction': 0.8591359744414431, 'bagging_freq': 2}. Best is trial 0 with value: 0.8466903830583237.


Trial 3 finished with value: 0.8462274041420372 and parameters: {'n_estimators': 368, 'num_leaves': 67, 'max_depth': 9, 'min_data_in_leaf': 27, 'lambda_l1': 0.00014330200570152266, 'lambda_l2': 0.0005646851407321899, 'min_gain_to_split': 0.0025090133547010906, 'bagging_fraction': 0.8591359744414431, 'bagging_freq': 2}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 424, 'num_leaves': 23, 'max_depth': 0, 'min_data_in_leaf': 12, 'lambda_l1': 0.00015932659098261617, 'lambda_l2': 0.006129852868567366, 'min_gain_to_split': 0.3183289222708715, 'bagging_fraction': 0.9476145823547178, 'bagging_freq': 5}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.225796 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in t

[I 2024-11-21 13:00:46,439] Trial 6 finished with value: 0.8449038645228316 and parameters: {'n_estimators': 182, 'num_leaves': 48, 'max_depth': 0, 'min_data_in_leaf': 12, 'lambda_l1': 0.1855059279124565, 'lambda_l2': 0.0018751973684356903, 'min_gain_to_split': 0.021772897336598953, 'bagging_fraction': 0.7501684403361888, 'bagging_freq': 2}. Best is trial 0 with value: 0.8466903830583237.


Trial 6 finished with value: 0.8449038645228316 and parameters: {'n_estimators': 182, 'num_leaves': 48, 'max_depth': 0, 'min_data_in_leaf': 12, 'lambda_l1': 0.1855059279124565, 'lambda_l2': 0.0018751973684356903, 'min_gain_to_split': 0.021772897336598953, 'bagging_fraction': 0.7501684403361888, 'bagging_freq': 2}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 447, 'num_leaves': 40, 'max_depth': 10, 'min_data_in_leaf': 18, 'lambda_l1': 0.012944472326855815, 'lambda_l2': 0.13994380821261976, 'min_gain_to_split': 0.005614883765746764, 'bagging_fraction': 0.9304797533212285, 'bagging_freq': 2}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.226090 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the tr

[I 2024-11-21 13:08:51,480] Trial 7 finished with value: 0.8451391918859571 and parameters: {'n_estimators': 424, 'num_leaves': 23, 'max_depth': 0, 'min_data_in_leaf': 12, 'lambda_l1': 0.00015932659098261617, 'lambda_l2': 0.006129852868567366, 'min_gain_to_split': 0.3183289222708715, 'bagging_fraction': 0.9476145823547178, 'bagging_freq': 5}. Best is trial 0 with value: 0.8466903830583237.


Trial 7 finished with value: 0.8451391918859571 and parameters: {'n_estimators': 424, 'num_leaves': 23, 'max_depth': 0, 'min_data_in_leaf': 12, 'lambda_l1': 0.00015932659098261617, 'lambda_l2': 0.006129852868567366, 'min_gain_to_split': 0.3183289222708715, 'bagging_fraction': 0.9476145823547178, 'bagging_freq': 5}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 384, 'num_leaves': 47, 'max_depth': 2, 'min_data_in_leaf': 45, 'lambda_l1': 0.027983667813180024, 'lambda_l2': 0.04969925652658965, 'min_gain_to_split': 0.0025066061070099207, 'bagging_fraction': 0.8903597361083495, 'bagging_freq': 1}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.288801 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the t

[I 2024-11-21 13:11:48,456] Trial 9 finished with value: 0.8303964248636165 and parameters: {'n_estimators': 384, 'num_leaves': 47, 'max_depth': 2, 'min_data_in_leaf': 45, 'lambda_l1': 0.027983667813180024, 'lambda_l2': 0.04969925652658965, 'min_gain_to_split': 0.0025066061070099207, 'bagging_fraction': 0.8903597361083495, 'bagging_freq': 1}. Best is trial 0 with value: 0.8466903830583237.


Trial 9 finished with value: 0.8303964248636165 and parameters: {'n_estimators': 384, 'num_leaves': 47, 'max_depth': 2, 'min_data_in_leaf': 45, 'lambda_l1': 0.027983667813180024, 'lambda_l2': 0.04969925652658965, 'min_gain_to_split': 0.0025066061070099207, 'bagging_fraction': 0.8903597361083495, 'bagging_freq': 1}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 434, 'num_leaves': 29, 'max_depth': 6, 'min_data_in_leaf': 33, 'lambda_l1': 0.007096645924927689, 'lambda_l2': 0.0017848948601145074, 'min_gain_to_split': 0.022812888598522558, 'bagging_fraction': 0.7528807202536052, 'bagging_freq': 4}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.225397 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the 

[I 2024-11-21 13:12:10,067] Trial 8 finished with value: 0.8464928296004521 and parameters: {'n_estimators': 447, 'num_leaves': 40, 'max_depth': 10, 'min_data_in_leaf': 18, 'lambda_l1': 0.012944472326855815, 'lambda_l2': 0.13994380821261976, 'min_gain_to_split': 0.005614883765746764, 'bagging_fraction': 0.9304797533212285, 'bagging_freq': 2}. Best is trial 0 with value: 0.8466903830583237.


Trial 8 finished with value: 0.8464928296004521 and parameters: {'n_estimators': 447, 'num_leaves': 40, 'max_depth': 10, 'min_data_in_leaf': 18, 'lambda_l1': 0.012944472326855815, 'lambda_l2': 0.13994380821261976, 'min_gain_to_split': 0.005614883765746764, 'bagging_fraction': 0.9304797533212285, 'bagging_freq': 2}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 107, 'num_leaves': 97, 'max_depth': 7, 'min_data_in_leaf': 37, 'lambda_l1': 0.0010218701878397744, 'lambda_l2': 0.998828814055777, 'min_gain_to_split': 0.00019136430379201072, 'bagging_fraction': 0.8007750643703423, 'bagging_freq': 4}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.227442 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the t

[I 2024-11-21 13:14:42,543] Trial 11 finished with value: 0.846282666713258 and parameters: {'n_estimators': 107, 'num_leaves': 97, 'max_depth': 7, 'min_data_in_leaf': 37, 'lambda_l1': 0.0010218701878397744, 'lambda_l2': 0.998828814055777, 'min_gain_to_split': 0.00019136430379201072, 'bagging_fraction': 0.8007750643703423, 'bagging_freq': 4}. Best is trial 0 with value: 0.8466903830583237.


Trial 11 finished with value: 0.846282666713258 and parameters: {'n_estimators': 107, 'num_leaves': 97, 'max_depth': 7, 'min_data_in_leaf': 37, 'lambda_l1': 0.0010218701878397744, 'lambda_l2': 0.998828814055777, 'min_gain_to_split': 0.00019136430379201072, 'bagging_fraction': 0.8007750643703423, 'bagging_freq': 4}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 500, 'num_leaves': 91, 'max_depth': 6, 'min_data_in_leaf': 38, 'lambda_l1': 0.6040011483399675, 'lambda_l2': 0.019264605810054337, 'min_gain_to_split': 0.00022967241814195936, 'bagging_fraction': 0.7824352051795272, 'bagging_freq': 5}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.238971 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the t

[I 2024-11-21 13:19:20,116] Trial 10 finished with value: 0.8438610332823527 and parameters: {'n_estimators': 434, 'num_leaves': 29, 'max_depth': 6, 'min_data_in_leaf': 33, 'lambda_l1': 0.007096645924927689, 'lambda_l2': 0.0017848948601145074, 'min_gain_to_split': 0.022812888598522558, 'bagging_fraction': 0.7528807202536052, 'bagging_freq': 4}. Best is trial 0 with value: 0.8466903830583237.


Trial 10 finished with value: 0.8438610332823527 and parameters: {'n_estimators': 434, 'num_leaves': 29, 'max_depth': 6, 'min_data_in_leaf': 33, 'lambda_l1': 0.007096645924927689, 'lambda_l2': 0.0017848948601145074, 'min_gain_to_split': 0.022812888598522558, 'bagging_fraction': 0.7528807202536052, 'bagging_freq': 4}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 491, 'num_leaves': 86, 'max_depth': 5, 'min_data_in_leaf': 46, 'lambda_l1': 0.6880093582581738, 'lambda_l2': 0.019613609530793802, 'min_gain_to_split': 0.0003162398997813507, 'bagging_fraction': 0.7994780335754141, 'bagging_freq': 5}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.238562 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the 

[I 2024-11-21 13:23:38,861] Trial 12 finished with value: 0.8447820124261931 and parameters: {'n_estimators': 500, 'num_leaves': 91, 'max_depth': 6, 'min_data_in_leaf': 38, 'lambda_l1': 0.6040011483399675, 'lambda_l2': 0.019264605810054337, 'min_gain_to_split': 0.00022967241814195936, 'bagging_fraction': 0.7824352051795272, 'bagging_freq': 5}. Best is trial 0 with value: 0.8466903830583237.


Trial 12 finished with value: 0.8447820124261931 and parameters: {'n_estimators': 500, 'num_leaves': 91, 'max_depth': 6, 'min_data_in_leaf': 38, 'lambda_l1': 0.6040011483399675, 'lambda_l2': 0.019264605810054337, 'min_gain_to_split': 0.00022967241814195936, 'bagging_fraction': 0.7824352051795272, 'bagging_freq': 5}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 346, 'num_leaves': 82, 'max_depth': 8, 'min_data_in_leaf': 46, 'lambda_l1': 0.15561383788069386, 'lambda_l2': 0.00013976950243964792, 'min_gain_to_split': 0.39596607947656115, 'bagging_fraction': 0.7065225356359828, 'bagging_freq': 4}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.233652 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the 

[I 2024-11-21 13:26:45,182] Trial 13 finished with value: 0.8432263381442427 and parameters: {'n_estimators': 491, 'num_leaves': 86, 'max_depth': 5, 'min_data_in_leaf': 46, 'lambda_l1': 0.6880093582581738, 'lambda_l2': 0.019613609530793802, 'min_gain_to_split': 0.0003162398997813507, 'bagging_fraction': 0.7994780335754141, 'bagging_freq': 5}. Best is trial 0 with value: 0.8466903830583237.


Trial 13 finished with value: 0.8432263381442427 and parameters: {'n_estimators': 491, 'num_leaves': 86, 'max_depth': 5, 'min_data_in_leaf': 46, 'lambda_l1': 0.6880093582581738, 'lambda_l2': 0.019613609530793802, 'min_gain_to_split': 0.0003162398997813507, 'bagging_fraction': 0.7994780335754141, 'bagging_freq': 5}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 339, 'num_leaves': 78, 'max_depth': 8, 'min_data_in_leaf': 50, 'lambda_l1': 0.07786244362755763, 'lambda_l2': 0.00010041587679773909, 'min_gain_to_split': 0.1467888804152651, 'bagging_fraction': 0.7099334786297835, 'bagging_freq': 3}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.254798 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the tr

[I 2024-11-21 13:31:36,479] Trial 14 finished with value: 0.8456888879328657 and parameters: {'n_estimators': 346, 'num_leaves': 82, 'max_depth': 8, 'min_data_in_leaf': 46, 'lambda_l1': 0.15561383788069386, 'lambda_l2': 0.00013976950243964792, 'min_gain_to_split': 0.39596607947656115, 'bagging_fraction': 0.7065225356359828, 'bagging_freq': 4}. Best is trial 0 with value: 0.8466903830583237.


Trial 14 finished with value: 0.8456888879328657 and parameters: {'n_estimators': 346, 'num_leaves': 82, 'max_depth': 8, 'min_data_in_leaf': 46, 'lambda_l1': 0.15561383788069386, 'lambda_l2': 0.00013976950243964792, 'min_gain_to_split': 0.39596607947656115, 'bagging_fraction': 0.7065225356359828, 'bagging_freq': 4}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 321, 'num_leaves': 60, 'max_depth': -1, 'min_data_in_leaf': 38, 'lambda_l1': 0.0006771717222795387, 'lambda_l2': 0.22288396117031878, 'min_gain_to_split': 0.07755978252473082, 'bagging_fraction': 0.8311623017672382, 'bagging_freq': 3}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.221263 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the 

[I 2024-11-21 13:34:39,936] Trial 15 finished with value: 0.8455589371367981 and parameters: {'n_estimators': 339, 'num_leaves': 78, 'max_depth': 8, 'min_data_in_leaf': 50, 'lambda_l1': 0.07786244362755763, 'lambda_l2': 0.00010041587679773909, 'min_gain_to_split': 0.1467888804152651, 'bagging_fraction': 0.7099334786297835, 'bagging_freq': 3}. Best is trial 0 with value: 0.8466903830583237.


Trial 15 finished with value: 0.8455589371367981 and parameters: {'n_estimators': 339, 'num_leaves': 78, 'max_depth': 8, 'min_data_in_leaf': 50, 'lambda_l1': 0.07786244362755763, 'lambda_l2': 0.00010041587679773909, 'min_gain_to_split': 0.1467888804152651, 'bagging_fraction': 0.7099334786297835, 'bagging_freq': 3}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 299, 'num_leaves': 61, 'max_depth': -1, 'min_data_in_leaf': 39, 'lambda_l1': 0.0007202602358759796, 'lambda_l2': 0.20429059859161802, 'min_gain_to_split': 0.001164047823646269, 'bagging_fraction': 0.7573959648278977, 'bagging_freq': 3}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.262766 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the 

[I 2024-11-21 13:39:36,113] Trial 16 finished with value: 0.8459691368704731 and parameters: {'n_estimators': 321, 'num_leaves': 60, 'max_depth': -1, 'min_data_in_leaf': 38, 'lambda_l1': 0.0006771717222795387, 'lambda_l2': 0.22288396117031878, 'min_gain_to_split': 0.07755978252473082, 'bagging_fraction': 0.8311623017672382, 'bagging_freq': 3}. Best is trial 0 with value: 0.8466903830583237.


Trial 16 finished with value: 0.8459691368704731 and parameters: {'n_estimators': 321, 'num_leaves': 60, 'max_depth': -1, 'min_data_in_leaf': 38, 'lambda_l1': 0.0006771717222795387, 'lambda_l2': 0.22288396117031878, 'min_gain_to_split': 0.07755978252473082, 'bagging_fraction': 0.8311623017672382, 'bagging_freq': 3}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 411, 'num_leaves': 56, 'max_depth': 4, 'min_data_in_leaf': 24, 'lambda_l1': 0.002901399821318662, 'lambda_l2': 0.00660364753391927, 'min_gain_to_split': 0.0010690698918110171, 'bagging_fraction': 0.7519632709042083, 'bagging_freq': 1}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.238529 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the 

[I 2024-11-21 13:41:47,498] Trial 17 finished with value: 0.845813075270125 and parameters: {'n_estimators': 299, 'num_leaves': 61, 'max_depth': -1, 'min_data_in_leaf': 39, 'lambda_l1': 0.0007202602358759796, 'lambda_l2': 0.20429059859161802, 'min_gain_to_split': 0.001164047823646269, 'bagging_fraction': 0.7573959648278977, 'bagging_freq': 3}. Best is trial 0 with value: 0.8466903830583237.


Trial 17 finished with value: 0.845813075270125 and parameters: {'n_estimators': 299, 'num_leaves': 61, 'max_depth': -1, 'min_data_in_leaf': 39, 'lambda_l1': 0.0007202602358759796, 'lambda_l2': 0.20429059859161802, 'min_gain_to_split': 0.001164047823646269, 'bagging_fraction': 0.7573959648278977, 'bagging_freq': 3}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 394, 'num_leaves': 74, 'max_depth': 4, 'min_data_in_leaf': 23, 'lambda_l1': 0.002961018741216511, 'lambda_l2': 0.007417400876079939, 'min_gain_to_split': 0.011644369234741976, 'bagging_fraction': 0.8298629291374493, 'bagging_freq': 1}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.233640 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the 

[I 2024-11-21 13:43:56,946] Trial 18 finished with value: 0.8391669920816445 and parameters: {'n_estimators': 411, 'num_leaves': 56, 'max_depth': 4, 'min_data_in_leaf': 24, 'lambda_l1': 0.002901399821318662, 'lambda_l2': 0.00660364753391927, 'min_gain_to_split': 0.0010690698918110171, 'bagging_fraction': 0.7519632709042083, 'bagging_freq': 1}. Best is trial 0 with value: 0.8466903830583237.


Trial 18 finished with value: 0.8391669920816445 and parameters: {'n_estimators': 411, 'num_leaves': 56, 'max_depth': 4, 'min_data_in_leaf': 24, 'lambda_l1': 0.002901399821318662, 'lambda_l2': 0.00660364753391927, 'min_gain_to_split': 0.0010690698918110171, 'bagging_fraction': 0.7519632709042083, 'bagging_freq': 1}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 465, 'num_leaves': 77, 'max_depth': 10, 'min_data_in_leaf': 33, 'lambda_l1': 0.00035623684760966974, 'lambda_l2': 0.0420126788237583, 'min_gain_to_split': 0.009339113457204255, 'bagging_fraction': 0.8350410077825369, 'bagging_freq': 4}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.241582 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the

[I 2024-11-21 13:45:58,993] Trial 19 finished with value: 0.8387436271253699 and parameters: {'n_estimators': 394, 'num_leaves': 74, 'max_depth': 4, 'min_data_in_leaf': 23, 'lambda_l1': 0.002961018741216511, 'lambda_l2': 0.007417400876079939, 'min_gain_to_split': 0.011644369234741976, 'bagging_fraction': 0.8298629291374493, 'bagging_freq': 1}. Best is trial 0 with value: 0.8466903830583237.


Trial 19 finished with value: 0.8387436271253699 and parameters: {'n_estimators': 394, 'num_leaves': 74, 'max_depth': 4, 'min_data_in_leaf': 23, 'lambda_l1': 0.002961018741216511, 'lambda_l2': 0.007417400876079939, 'min_gain_to_split': 0.011644369234741976, 'bagging_fraction': 0.8298629291374493, 'bagging_freq': 1}. Best is trial 0 with value: 0.8466903830583237.
{'n_estimators': 463, 'num_leaves': 72, 'max_depth': 10, 'min_data_in_leaf': 35, 'lambda_l1': 0.00032280984080187275, 'lambda_l2': 0.03900412304355575, 'min_gain_to_split': 0.008543524849183625, 'bagging_fraction': 0.7888876601009247, 'bagging_freq': 4}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.223013 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in th

[I 2024-11-21 13:55:47,124] Trial 20 finished with value: 0.8471655713377146 and parameters: {'n_estimators': 465, 'num_leaves': 77, 'max_depth': 10, 'min_data_in_leaf': 33, 'lambda_l1': 0.00035623684760966974, 'lambda_l2': 0.0420126788237583, 'min_gain_to_split': 0.009339113457204255, 'bagging_fraction': 0.8350410077825369, 'bagging_freq': 4}. Best is trial 20 with value: 0.8471655713377146.


Trial 20 finished with value: 0.8471655713377146 and parameters: {'n_estimators': 465, 'num_leaves': 77, 'max_depth': 10, 'min_data_in_leaf': 33, 'lambda_l1': 0.00035623684760966974, 'lambda_l2': 0.0420126788237583, 'min_gain_to_split': 0.009339113457204255, 'bagging_fraction': 0.8350410077825369, 'bagging_freq': 4}. Best is trial 20 with value: 0.8471655713377146.
{'n_estimators': 452, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 32, 'lambda_l1': 0.0003038437017413591, 'lambda_l2': 0.04181118416064641, 'min_gain_to_split': 0.00876765068601062, 'bagging_fraction': 0.7819739430346578, 'bagging_freq': 4}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.225293 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in t

[I 2024-11-21 13:57:16,197] Trial 21 finished with value: 0.8471681481101274 and parameters: {'n_estimators': 463, 'num_leaves': 72, 'max_depth': 10, 'min_data_in_leaf': 35, 'lambda_l1': 0.00032280984080187275, 'lambda_l2': 0.03900412304355575, 'min_gain_to_split': 0.008543524849183625, 'bagging_fraction': 0.7888876601009247, 'bagging_freq': 4}. Best is trial 21 with value: 0.8471681481101274.


Trial 21 finished with value: 0.8471681481101274 and parameters: {'n_estimators': 463, 'num_leaves': 72, 'max_depth': 10, 'min_data_in_leaf': 35, 'lambda_l1': 0.00032280984080187275, 'lambda_l2': 0.03900412304355575, 'min_gain_to_split': 0.008543524849183625, 'bagging_fraction': 0.7888876601009247, 'bagging_freq': 4}. Best is trial 21 with value: 0.8471681481101274.
{'n_estimators': 465, 'num_leaves': 74, 'max_depth': 10, 'min_data_in_leaf': 34, 'lambda_l1': 0.00031041899191162133, 'lambda_l2': 0.05159077903758788, 'min_gain_to_split': 0.01050078193376217, 'bagging_fraction': 0.8173514840684977, 'bagging_freq': 4}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.218067 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in 

[I 2024-11-21 14:07:31,846] Trial 22 finished with value: 0.8472194433527457 and parameters: {'n_estimators': 452, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 32, 'lambda_l1': 0.0003038437017413591, 'lambda_l2': 0.04181118416064641, 'min_gain_to_split': 0.00876765068601062, 'bagging_fraction': 0.7819739430346578, 'bagging_freq': 4}. Best is trial 22 with value: 0.8472194433527457.


Trial 22 finished with value: 0.8472194433527457 and parameters: {'n_estimators': 452, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 32, 'lambda_l1': 0.0003038437017413591, 'lambda_l2': 0.04181118416064641, 'min_gain_to_split': 0.00876765068601062, 'bagging_fraction': 0.7819739430346578, 'bagging_freq': 4}. Best is trial 22 with value: 0.8472194433527457.
{'n_estimators': 459, 'num_leaves': 96, 'max_depth': 10, 'min_data_in_leaf': 34, 'lambda_l1': 0.0003264693918762482, 'lambda_l2': 0.05039865747223907, 'min_gain_to_split': 0.010219142340477973, 'bagging_fraction': 0.8509226436506518, 'bagging_freq': 4}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.223558 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in t

[I 2024-11-21 14:08:47,724] Trial 23 finished with value: 0.8471747406535517 and parameters: {'n_estimators': 465, 'num_leaves': 74, 'max_depth': 10, 'min_data_in_leaf': 34, 'lambda_l1': 0.00031041899191162133, 'lambda_l2': 0.05159077903758788, 'min_gain_to_split': 0.01050078193376217, 'bagging_fraction': 0.8173514840684977, 'bagging_freq': 4}. Best is trial 22 with value: 0.8472194433527457.


Trial 23 finished with value: 0.8471747406535517 and parameters: {'n_estimators': 465, 'num_leaves': 74, 'max_depth': 10, 'min_data_in_leaf': 34, 'lambda_l1': 0.00031041899191162133, 'lambda_l2': 0.05159077903758788, 'min_gain_to_split': 0.01050078193376217, 'bagging_fraction': 0.8173514840684977, 'bagging_freq': 4}. Best is trial 22 with value: 0.8472194433527457.
{'n_estimators': 453, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 35, 'lambda_l1': 0.0001046423545327664, 'lambda_l2': 0.045763436034925845, 'min_gain_to_split': 0.012304957820122324, 'bagging_fraction': 0.7995226531201381, 'bagging_freq': 4}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.227673 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in

[I 2024-11-21 14:20:36,999] Trial 24 finished with value: 0.8472556501582889 and parameters: {'n_estimators': 459, 'num_leaves': 96, 'max_depth': 10, 'min_data_in_leaf': 34, 'lambda_l1': 0.0003264693918762482, 'lambda_l2': 0.05039865747223907, 'min_gain_to_split': 0.010219142340477973, 'bagging_fraction': 0.8509226436506518, 'bagging_freq': 4}. Best is trial 24 with value: 0.8472556501582889.


Trial 24 finished with value: 0.8472556501582889 and parameters: {'n_estimators': 459, 'num_leaves': 96, 'max_depth': 10, 'min_data_in_leaf': 34, 'lambda_l1': 0.0003264693918762482, 'lambda_l2': 0.05039865747223907, 'min_gain_to_split': 0.010219142340477973, 'bagging_fraction': 0.8509226436506518, 'bagging_freq': 4}. Best is trial 24 with value: 0.8472556501582889.
{'n_estimators': 236, 'num_leaves': 96, 'max_depth': 9, 'min_data_in_leaf': 29, 'lambda_l1': 0.00011168403713809393, 'lambda_l2': 0.01943842030516792, 'min_gain_to_split': 0.015135754836616498, 'bagging_fraction': 0.8717695119709388, 'bagging_freq': 4}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.258615 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in t

[I 2024-11-21 14:21:32,891] Trial 25 finished with value: 0.847211075934245 and parameters: {'n_estimators': 453, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 35, 'lambda_l1': 0.0001046423545327664, 'lambda_l2': 0.045763436034925845, 'min_gain_to_split': 0.012304957820122324, 'bagging_fraction': 0.7995226531201381, 'bagging_freq': 4}. Best is trial 24 with value: 0.8472556501582889.


Trial 25 finished with value: 0.847211075934245 and parameters: {'n_estimators': 453, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 35, 'lambda_l1': 0.0001046423545327664, 'lambda_l2': 0.045763436034925845, 'min_gain_to_split': 0.012304957820122324, 'bagging_fraction': 0.7995226531201381, 'bagging_freq': 4}. Best is trial 24 with value: 0.8472556501582889.
{'n_estimators': 52, 'num_leaves': 98, 'max_depth': 9, 'min_data_in_leaf': 30, 'lambda_l1': 0.00012338204890208603, 'lambda_l2': 0.021329998337834354, 'min_gain_to_split': 0.0007344700778356326, 'bagging_fraction': 0.864740526268772, 'bagging_freq': 3}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.299071 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in 

[I 2024-11-21 14:23:16,416] Trial 27 finished with value: 0.8452538493778555 and parameters: {'n_estimators': 52, 'num_leaves': 98, 'max_depth': 9, 'min_data_in_leaf': 30, 'lambda_l1': 0.00012338204890208603, 'lambda_l2': 0.021329998337834354, 'min_gain_to_split': 0.0007344700778356326, 'bagging_fraction': 0.864740526268772, 'bagging_freq': 3}. Best is trial 24 with value: 0.8472556501582889.


Trial 27 finished with value: 0.8452538493778555 and parameters: {'n_estimators': 52, 'num_leaves': 98, 'max_depth': 9, 'min_data_in_leaf': 30, 'lambda_l1': 0.00012338204890208603, 'lambda_l2': 0.021329998337834354, 'min_gain_to_split': 0.0007344700778356326, 'bagging_fraction': 0.864740526268772, 'bagging_freq': 3}. Best is trial 24 with value: 0.8472556501582889.
{'n_estimators': 243, 'num_leaves': 93, 'max_depth': 9, 'min_data_in_leaf': 42, 'lambda_l1': 0.0013328525373152269, 'lambda_l2': 0.1012631875208148, 'min_gain_to_split': 0.005613312097323764, 'bagging_fraction': 0.8960104361164523, 'bagging_freq': 4}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.259854 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the

[I 2024-11-21 14:27:30,236] Trial 26 finished with value: 0.845802657271946 and parameters: {'n_estimators': 236, 'num_leaves': 96, 'max_depth': 9, 'min_data_in_leaf': 29, 'lambda_l1': 0.00011168403713809393, 'lambda_l2': 0.01943842030516792, 'min_gain_to_split': 0.015135754836616498, 'bagging_fraction': 0.8717695119709388, 'bagging_freq': 4}. Best is trial 24 with value: 0.8472556501582889.


Trial 26 finished with value: 0.845802657271946 and parameters: {'n_estimators': 236, 'num_leaves': 96, 'max_depth': 9, 'min_data_in_leaf': 29, 'lambda_l1': 0.00011168403713809393, 'lambda_l2': 0.01943842030516792, 'min_gain_to_split': 0.015135754836616498, 'bagging_fraction': 0.8717695119709388, 'bagging_freq': 4}. Best is trial 24 with value: 0.8472556501582889.
{'n_estimators': 430, 'num_leaves': 92, 'max_depth': 7, 'min_data_in_leaf': 42, 'lambda_l1': 0.0014768857659330484, 'lambda_l2': 0.0810868345873645, 'min_gain_to_split': 0.004235127166065474, 'bagging_fraction': 0.7323047005404864, 'bagging_freq': 3}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.238507 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the 

[I 2024-11-21 14:30:11,172] Trial 28 finished with value: 0.8457417650535638 and parameters: {'n_estimators': 243, 'num_leaves': 93, 'max_depth': 9, 'min_data_in_leaf': 42, 'lambda_l1': 0.0013328525373152269, 'lambda_l2': 0.1012631875208148, 'min_gain_to_split': 0.005613312097323764, 'bagging_fraction': 0.8960104361164523, 'bagging_freq': 4}. Best is trial 24 with value: 0.8472556501582889.


Trial 28 finished with value: 0.8457417650535638 and parameters: {'n_estimators': 243, 'num_leaves': 93, 'max_depth': 9, 'min_data_in_leaf': 42, 'lambda_l1': 0.0013328525373152269, 'lambda_l2': 0.1012631875208148, 'min_gain_to_split': 0.005613312097323764, 'bagging_fraction': 0.8960104361164523, 'bagging_freq': 4}. Best is trial 24 with value: 0.8472556501582889.
{'n_estimators': 358, 'num_leaves': 86, 'max_depth': 7, 'min_data_in_leaf': 24, 'lambda_l1': 0.0004485890591057891, 'lambda_l2': 0.36415172527194584, 'min_gain_to_split': 0.051072551392503364, 'bagging_fraction': 0.7275287693637206, 'bagging_freq': 5}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.226976 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the 

[I 2024-11-21 14:36:29,722] Trial 29 finished with value: 0.8452693182903903 and parameters: {'n_estimators': 430, 'num_leaves': 92, 'max_depth': 7, 'min_data_in_leaf': 42, 'lambda_l1': 0.0014768857659330484, 'lambda_l2': 0.0810868345873645, 'min_gain_to_split': 0.004235127166065474, 'bagging_fraction': 0.7323047005404864, 'bagging_freq': 3}. Best is trial 24 with value: 0.8472556501582889.


Trial 29 finished with value: 0.8452693182903903 and parameters: {'n_estimators': 430, 'num_leaves': 92, 'max_depth': 7, 'min_data_in_leaf': 42, 'lambda_l1': 0.0014768857659330484, 'lambda_l2': 0.0810868345873645, 'min_gain_to_split': 0.004235127166065474, 'bagging_fraction': 0.7323047005404864, 'bagging_freq': 3}. Best is trial 24 with value: 0.8472556501582889.
{'n_estimators': 357, 'num_leaves': 86, 'max_depth': 7, 'min_data_in_leaf': 25, 'lambda_l1': 0.00023918672578613118, 'lambda_l2': 0.36329345609816727, 'min_gain_to_split': 0.038816490337086784, 'bagging_fraction': 0.8133902117958752, 'bagging_freq': 5}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.229644 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the

[I 2024-11-21 14:37:35,636] Trial 30 finished with value: 0.8444098433827493 and parameters: {'n_estimators': 358, 'num_leaves': 86, 'max_depth': 7, 'min_data_in_leaf': 24, 'lambda_l1': 0.0004485890591057891, 'lambda_l2': 0.36415172527194584, 'min_gain_to_split': 0.051072551392503364, 'bagging_fraction': 0.7275287693637206, 'bagging_freq': 5}. Best is trial 24 with value: 0.8472556501582889.


Trial 30 finished with value: 0.8444098433827493 and parameters: {'n_estimators': 358, 'num_leaves': 86, 'max_depth': 7, 'min_data_in_leaf': 24, 'lambda_l1': 0.0004485890591057891, 'lambda_l2': 0.36415172527194584, 'min_gain_to_split': 0.051072551392503364, 'bagging_fraction': 0.7275287693637206, 'bagging_freq': 5}. Best is trial 24 with value: 0.8472556501582889.
{'n_estimators': 472, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 35, 'lambda_l1': 0.00022263396207797137, 'lambda_l2': 0.03404409194756445, 'min_gain_to_split': 0.03997431193169202, 'bagging_fraction': 0.8147245992254589, 'bagging_freq': 4}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.243283 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in t

[I 2024-11-21 14:44:15,369] Trial 31 finished with value: 0.8443984698925877 and parameters: {'n_estimators': 357, 'num_leaves': 86, 'max_depth': 7, 'min_data_in_leaf': 25, 'lambda_l1': 0.00023918672578613118, 'lambda_l2': 0.36329345609816727, 'min_gain_to_split': 0.038816490337086784, 'bagging_fraction': 0.8133902117958752, 'bagging_freq': 5}. Best is trial 24 with value: 0.8472556501582889.


Trial 31 finished with value: 0.8443984698925877 and parameters: {'n_estimators': 357, 'num_leaves': 86, 'max_depth': 7, 'min_data_in_leaf': 25, 'lambda_l1': 0.00023918672578613118, 'lambda_l2': 0.36329345609816727, 'min_gain_to_split': 0.038816490337086784, 'bagging_fraction': 0.8133902117958752, 'bagging_freq': 5}. Best is trial 24 with value: 0.8472556501582889.
{'n_estimators': 476, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 35, 'lambda_l1': 0.00022587921190618158, 'lambda_l2': 0.04709362563936111, 'min_gain_to_split': 0.01583586676625686, 'bagging_fraction': 0.8147438025482064, 'bagging_freq': 4}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.245986 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in 

[I 2024-11-21 14:50:24,381] Trial 32 finished with value: 0.8473495380911816 and parameters: {'n_estimators': 472, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 35, 'lambda_l1': 0.00022263396207797137, 'lambda_l2': 0.03404409194756445, 'min_gain_to_split': 0.03997431193169202, 'bagging_fraction': 0.8147245992254589, 'bagging_freq': 4}. Best is trial 32 with value: 0.8473495380911816.


Trial 32 finished with value: 0.8473495380911816 and parameters: {'n_estimators': 472, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 35, 'lambda_l1': 0.00022263396207797137, 'lambda_l2': 0.03404409194756445, 'min_gain_to_split': 0.03997431193169202, 'bagging_fraction': 0.8147245992254589, 'bagging_freq': 4}. Best is trial 32 with value: 0.8473495380911816.
{'n_estimators': 485, 'num_leaves': 98, 'max_depth': 9, 'min_data_in_leaf': 31, 'lambda_l1': 0.0002056362076497198, 'lambda_l2': 0.010452132525854979, 'min_gain_to_split': 0.01774293615061127, 'bagging_fraction': 0.7790527376033966, 'bagging_freq': 4}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.247528 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in t

[I 2024-11-21 14:57:02,582] Trial 33 finished with value: 0.847310351232179 and parameters: {'n_estimators': 476, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 35, 'lambda_l1': 0.00022587921190618158, 'lambda_l2': 0.04709362563936111, 'min_gain_to_split': 0.01583586676625686, 'bagging_fraction': 0.8147438025482064, 'bagging_freq': 4}. Best is trial 32 with value: 0.8473495380911816.


Trial 33 finished with value: 0.847310351232179 and parameters: {'n_estimators': 476, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 35, 'lambda_l1': 0.00022587921190618158, 'lambda_l2': 0.04709362563936111, 'min_gain_to_split': 0.01583586676625686, 'bagging_fraction': 0.8147438025482064, 'bagging_freq': 4}. Best is trial 32 with value: 0.8473495380911816.
{'n_estimators': 484, 'num_leaves': 100, 'max_depth': 9, 'min_data_in_leaf': 31, 'lambda_l1': 0.0005194573920006524, 'lambda_l2': 0.003496296782240002, 'min_gain_to_split': 0.06339161950714192, 'bagging_fraction': 0.7766216783151613, 'bagging_freq': 4}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.241266 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in t

[I 2024-11-21 15:02:40,657] Trial 34 finished with value: 0.8471259310545065 and parameters: {'n_estimators': 485, 'num_leaves': 98, 'max_depth': 9, 'min_data_in_leaf': 31, 'lambda_l1': 0.0002056362076497198, 'lambda_l2': 0.010452132525854979, 'min_gain_to_split': 0.01774293615061127, 'bagging_fraction': 0.7790527376033966, 'bagging_freq': 4}. Best is trial 32 with value: 0.8473495380911816.


Trial 34 finished with value: 0.8471259310545065 and parameters: {'n_estimators': 485, 'num_leaves': 98, 'max_depth': 9, 'min_data_in_leaf': 31, 'lambda_l1': 0.0002056362076497198, 'lambda_l2': 0.010452132525854979, 'min_gain_to_split': 0.01774293615061127, 'bagging_fraction': 0.7790527376033966, 'bagging_freq': 4}. Best is trial 32 with value: 0.8473495380911816.
{'n_estimators': 391, 'num_leaves': 91, 'max_depth': 9, 'min_data_in_leaf': 31, 'lambda_l1': 0.0005559699391079436, 'lambda_l2': 0.0038056082825090654, 'min_gain_to_split': 0.05994535690662769, 'bagging_fraction': 0.8477176637574408, 'bagging_freq': 5}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.228072 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in th

[I 2024-11-21 15:09:16,875] Trial 35 finished with value: 0.8471333369066123 and parameters: {'n_estimators': 484, 'num_leaves': 100, 'max_depth': 9, 'min_data_in_leaf': 31, 'lambda_l1': 0.0005194573920006524, 'lambda_l2': 0.003496296782240002, 'min_gain_to_split': 0.06339161950714192, 'bagging_fraction': 0.7766216783151613, 'bagging_freq': 4}. Best is trial 32 with value: 0.8473495380911816.


Trial 35 finished with value: 0.8471333369066123 and parameters: {'n_estimators': 484, 'num_leaves': 100, 'max_depth': 9, 'min_data_in_leaf': 31, 'lambda_l1': 0.0005194573920006524, 'lambda_l2': 0.003496296782240002, 'min_gain_to_split': 0.06339161950714192, 'bagging_fraction': 0.7766216783151613, 'bagging_freq': 4}. Best is trial 32 with value: 0.8473495380911816.
{'n_estimators': 414, 'num_leaves': 92, 'max_depth': 8, 'min_data_in_leaf': 36, 'lambda_l1': 0.0008407971367883483, 'lambda_l2': 0.010524297459055157, 'min_gain_to_split': 0.14266946264219435, 'bagging_fraction': 0.8510250566001288, 'bagging_freq': 5}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.240086 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in th

[I 2024-11-21 15:12:51,607] Trial 36 finished with value: 0.8465263090087285 and parameters: {'n_estimators': 391, 'num_leaves': 91, 'max_depth': 9, 'min_data_in_leaf': 31, 'lambda_l1': 0.0005559699391079436, 'lambda_l2': 0.0038056082825090654, 'min_gain_to_split': 0.05994535690662769, 'bagging_fraction': 0.8477176637574408, 'bagging_freq': 5}. Best is trial 32 with value: 0.8473495380911816.


Trial 36 finished with value: 0.8465263090087285 and parameters: {'n_estimators': 391, 'num_leaves': 91, 'max_depth': 9, 'min_data_in_leaf': 31, 'lambda_l1': 0.0005559699391079436, 'lambda_l2': 0.0038056082825090654, 'min_gain_to_split': 0.05994535690662769, 'bagging_fraction': 0.8477176637574408, 'bagging_freq': 5}. Best is trial 32 with value: 0.8473495380911816.
{'n_estimators': 422, 'num_leaves': 82, 'max_depth': 8, 'min_data_in_leaf': 27, 'lambda_l1': 0.004790446752160439, 'lambda_l2': 0.011536608587824303, 'min_gain_to_split': 0.20292100734405055, 'bagging_fraction': 0.8488399094680075, 'bagging_freq': 3}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.249869 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the

In [15]:
# Best trial
print("Best trial parameters:", study.best_params)
print("Best trial score:", study.best_value)

Best trial parameters: {'n_estimators': 472, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 35, 'lambda_l1': 0.00022263396207797137, 'lambda_l2': 0.03404409194756445, 'min_gain_to_split': 0.03997431193169202, 'bagging_fraction': 0.8147245992254589, 'bagging_freq': 4}
Best trial score: 0.8473495380911816


**Day 2 of Hyperparameter Tuning**

In [12]:
study_name = "purchase_hp_tuning"
storage_name = "sqlite:///{}.db".format(study_name)

study1 = optuna.load_study(study_name=study_name, storage=storage_name)
print("Best trial parameters:", study1.best_params)
print("Best trial score:", study1.best_value)

Best trial parameters: {'n_estimators': 472, 'num_leaves': 100, 'max_depth': 10, 'min_data_in_leaf': 35, 'lambda_l1': 0.00022263396207797137, 'lambda_l2': 0.03404409194756445, 'min_gain_to_split': 0.03997431193169202, 'bagging_fraction': 0.8147245992254589, 'bagging_freq': 4}
Best trial score: 0.8473495380911816


In [13]:
study1.optimize(objective_function, n_trials=50, n_jobs=2)

{'n_estimators': 580, 'num_leaves': 128, 'max_depth': 13, 'min_data_in_leaf': 66, 'lambda_l1': 9.818222835036108, 'lambda_l2': 0.9108136174317132, 'min_gain_to_split': 0.007381564861038712, 'bagging_fraction': 0.6290078644213785, 'bagging_freq': 6}
{'n_estimators': 581, 'num_leaves': 136, 'max_depth': 13, 'min_data_in_leaf': 62, 'lambda_l1': 0.016734542565462994, 'lambda_l2': 0.6168662711899038, 'min_gain_to_split': 0.006741367107895389, 'bagging_fraction': 0.9005260047576603, 'bagging_freq': 6}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.217732 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used feature

[I 2024-11-29 10:53:22,995] Trial 50 finished with value: 0.8481961160694583 and parameters: {'n_estimators': 580, 'num_leaves': 128, 'max_depth': 13, 'min_data_in_leaf': 66, 'lambda_l1': 9.818222835036108, 'lambda_l2': 0.9108136174317132, 'min_gain_to_split': 0.007381564861038712, 'bagging_fraction': 0.6290078644213785, 'bagging_freq': 6}. Best is trial 50 with value: 0.8481961160694583.


{'n_estimators': 587, 'num_leaves': 138, 'max_depth': 13, 'min_data_in_leaf': 64, 'lambda_l1': 0.06339283716206691, 'lambda_l2': 4.310763423562925, 'min_gain_to_split': 0.006156028802268479, 'bagging_fraction': 0.6425911214891878, 'bagging_freq': 7}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.228715 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 10:55:29,393] Trial 51 finished with value: 0.848234670480591 and parameters: {'n_estimators': 581, 'num_leaves': 136, 'max_depth': 13, 'min_data_in_leaf': 62, 'lambda_l1': 0.016734542565462994, 'lambda_l2': 0.6168662711899038, 'min_gain_to_split': 0.006741367107895389, 'bagging_fraction': 0.9005260047576603, 'bagging_freq': 6}. Best is trial 51 with value: 0.848234670480591.


{'n_estimators': 577, 'num_leaves': 127, 'max_depth': 13, 'min_data_in_leaf': 66, 'lambda_l1': 7.332015954838193, 'lambda_l2': 5.084231068986714, 'min_gain_to_split': 0.005673102094047818, 'bagging_fraction': 0.6658263654260979, 'bagging_freq': 8}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.239290 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 11:10:38,122] Trial 52 finished with value: 0.848277239268015 and parameters: {'n_estimators': 587, 'num_leaves': 138, 'max_depth': 13, 'min_data_in_leaf': 64, 'lambda_l1': 0.06339283716206691, 'lambda_l2': 4.310763423562925, 'min_gain_to_split': 0.006156028802268479, 'bagging_fraction': 0.6425911214891878, 'bagging_freq': 7}. Best is trial 52 with value: 0.848277239268015.


{'n_estimators': 588, 'num_leaves': 130, 'max_depth': 13, 'min_data_in_leaf': 64, 'lambda_l1': 3.4838002781642707, 'lambda_l2': 5.5332049755098955, 'min_gain_to_split': 0.691428193654404, 'bagging_fraction': 0.6122765237338306, 'bagging_freq': 7}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.228388 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 11:12:12,623] Trial 53 finished with value: 0.8481988144704681 and parameters: {'n_estimators': 577, 'num_leaves': 127, 'max_depth': 13, 'min_data_in_leaf': 66, 'lambda_l1': 7.332015954838193, 'lambda_l2': 5.084231068986714, 'min_gain_to_split': 0.005673102094047818, 'bagging_fraction': 0.6658263654260979, 'bagging_freq': 8}. Best is trial 52 with value: 0.848277239268015.


{'n_estimators': 598, 'num_leaves': 137, 'max_depth': 13, 'min_data_in_leaf': 69, 'lambda_l1': 6.769400664515117, 'lambda_l2': 5.414449062772567, 'min_gain_to_split': 0.0017720089374739987, 'bagging_fraction': 0.6357247012113031, 'bagging_freq': 8}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.231507 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 11:27:28,731] Trial 54 finished with value: 0.848219411569635 and parameters: {'n_estimators': 588, 'num_leaves': 130, 'max_depth': 13, 'min_data_in_leaf': 64, 'lambda_l1': 3.4838002781642707, 'lambda_l2': 5.5332049755098955, 'min_gain_to_split': 0.691428193654404, 'bagging_fraction': 0.6122765237338306, 'bagging_freq': 7}. Best is trial 52 with value: 0.848277239268015.


{'n_estimators': 616, 'num_leaves': 136, 'max_depth': 14, 'min_data_in_leaf': 66, 'lambda_l1': 9.962637573498188, 'lambda_l2': 5.712695370285232, 'min_gain_to_split': 0.7631694806918913, 'bagging_fraction': 0.6242637620604123, 'bagging_freq': 8}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.239544 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 11:29:31,238] Trial 55 finished with value: 0.8482909925980603 and parameters: {'n_estimators': 598, 'num_leaves': 137, 'max_depth': 13, 'min_data_in_leaf': 69, 'lambda_l1': 6.769400664515117, 'lambda_l2': 5.414449062772567, 'min_gain_to_split': 0.0017720089374739987, 'bagging_fraction': 0.6357247012113031, 'bagging_freq': 8}. Best is trial 55 with value: 0.8482909925980603.


{'n_estimators': 592, 'num_leaves': 134, 'max_depth': 14, 'min_data_in_leaf': 66, 'lambda_l1': 8.87872291793136, 'lambda_l2': 5.541443459781396, 'min_gain_to_split': 0.6542560956996818, 'bagging_fraction': 0.6182389142721794, 'bagging_freq': 8}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.249823 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 11:45:24,447] Trial 56 finished with value: 0.8483002438920029 and parameters: {'n_estimators': 616, 'num_leaves': 136, 'max_depth': 14, 'min_data_in_leaf': 66, 'lambda_l1': 9.962637573498188, 'lambda_l2': 5.712695370285232, 'min_gain_to_split': 0.7631694806918913, 'bagging_fraction': 0.6242637620604123, 'bagging_freq': 8}. Best is trial 56 with value: 0.8483002438920029.


{'n_estimators': 734, 'num_leaves': 161, 'max_depth': 14, 'min_data_in_leaf': 65, 'lambda_l1': 9.188474639416581, 'lambda_l2': 6.301290892639148, 'min_gain_to_split': 0.9650265734645751, 'bagging_fraction': 0.6171509720593636, 'bagging_freq': 8}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.239719 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 11:46:34,952] Trial 57 finished with value: 0.8482584636069692 and parameters: {'n_estimators': 592, 'num_leaves': 134, 'max_depth': 14, 'min_data_in_leaf': 66, 'lambda_l1': 8.87872291793136, 'lambda_l2': 5.541443459781396, 'min_gain_to_split': 0.6542560956996818, 'bagging_fraction': 0.6182389142721794, 'bagging_freq': 8}. Best is trial 56 with value: 0.8483002438920029.


{'n_estimators': 720, 'num_leaves': 157, 'max_depth': 15, 'min_data_in_leaf': 72, 'lambda_l1': 3.855655150853066, 'lambda_l2': 7.857779609116667, 'min_gain_to_split': 0.5169044574667718, 'bagging_fraction': 0.6008518975403975, 'bagging_freq': 8}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.230448 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 12:08:24,096] Trial 58 finished with value: 0.8486034278788634 and parameters: {'n_estimators': 734, 'num_leaves': 161, 'max_depth': 14, 'min_data_in_leaf': 65, 'lambda_l1': 9.188474639416581, 'lambda_l2': 6.301290892639148, 'min_gain_to_split': 0.9650265734645751, 'bagging_fraction': 0.6171509720593636, 'bagging_freq': 8}. Best is trial 58 with value: 0.8486034278788634.


{'n_estimators': 697, 'num_leaves': 162, 'max_depth': 15, 'min_data_in_leaf': 80, 'lambda_l1': 2.2749797086092207, 'lambda_l2': 9.761753440458039, 'min_gain_to_split': 0.798531756673978, 'bagging_fraction': 0.6582101553496833, 'bagging_freq': 9}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.227744 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 12:09:23,326] Trial 59 finished with value: 0.8486220667172276 and parameters: {'n_estimators': 720, 'num_leaves': 157, 'max_depth': 15, 'min_data_in_leaf': 72, 'lambda_l1': 3.855655150853066, 'lambda_l2': 7.857779609116667, 'min_gain_to_split': 0.5169044574667718, 'bagging_fraction': 0.6008518975403975, 'bagging_freq': 8}. Best is trial 59 with value: 0.8486220667172276.


{'n_estimators': 723, 'num_leaves': 169, 'max_depth': 15, 'min_data_in_leaf': 79, 'lambda_l1': 2.420587368742679, 'lambda_l2': 8.92074589120954, 'min_gain_to_split': 0.6476562407693731, 'bagging_fraction': 0.6516672347946925, 'bagging_freq': 10}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.275209 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 12:31:05,773] Trial 60 finished with value: 0.8486315893596231 and parameters: {'n_estimators': 697, 'num_leaves': 162, 'max_depth': 15, 'min_data_in_leaf': 80, 'lambda_l1': 2.2749797086092207, 'lambda_l2': 9.761753440458039, 'min_gain_to_split': 0.798531756673978, 'bagging_fraction': 0.6582101553496833, 'bagging_freq': 9}. Best is trial 60 with value: 0.8486315893596231.


{'n_estimators': 723, 'num_leaves': 171, 'max_depth': 15, 'min_data_in_leaf': 80, 'lambda_l1': 2.4147317157324264, 'lambda_l2': 3.0936951195361506, 'min_gain_to_split': 0.9346951166366092, 'bagging_fraction': 0.6514917219151862, 'bagging_freq': 10}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.237298 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 12:33:12,988] Trial 61 finished with value: 0.8487124425277115 and parameters: {'n_estimators': 723, 'num_leaves': 169, 'max_depth': 15, 'min_data_in_leaf': 79, 'lambda_l1': 2.420587368742679, 'lambda_l2': 8.92074589120954, 'min_gain_to_split': 0.6476562407693731, 'bagging_fraction': 0.6516672347946925, 'bagging_freq': 10}. Best is trial 61 with value: 0.8487124425277115.


{'n_estimators': 728, 'num_leaves': 173, 'max_depth': 15, 'min_data_in_leaf': 82, 'lambda_l1': 1.9550945511449571, 'lambda_l2': 9.973530348867941, 'min_gain_to_split': 0.9223844103809695, 'bagging_fraction': 0.6480280664963405, 'bagging_freq': 10}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.252383 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 12:55:13,252] Trial 62 finished with value: 0.8486796357744959 and parameters: {'n_estimators': 723, 'num_leaves': 171, 'max_depth': 15, 'min_data_in_leaf': 80, 'lambda_l1': 2.4147317157324264, 'lambda_l2': 3.0936951195361506, 'min_gain_to_split': 0.9346951166366092, 'bagging_fraction': 0.6514917219151862, 'bagging_freq': 10}. Best is trial 61 with value: 0.8487124425277115.


{'n_estimators': 769, 'num_leaves': 170, 'max_depth': 15, 'min_data_in_leaf': 80, 'lambda_l1': 2.4169055646244706, 'lambda_l2': 9.170947266332275, 'min_gain_to_split': 0.9069450300819447, 'bagging_fraction': 0.6664727068874228, 'bagging_freq': 10}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.227698 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 12:57:10,091] Trial 63 finished with value: 0.8487623524213063 and parameters: {'n_estimators': 728, 'num_leaves': 173, 'max_depth': 15, 'min_data_in_leaf': 82, 'lambda_l1': 1.9550945511449571, 'lambda_l2': 9.973530348867941, 'min_gain_to_split': 0.9223844103809695, 'bagging_fraction': 0.6480280664963405, 'bagging_freq': 10}. Best is trial 63 with value: 0.8487623524213063.


{'n_estimators': 769, 'num_leaves': 172, 'max_depth': 15, 'min_data_in_leaf': 82, 'lambda_l1': 1.6621169292272655, 'lambda_l2': 8.954984418368104, 'min_gain_to_split': 0.44679433205297975, 'bagging_fraction': 0.6670867100475298, 'bagging_freq': 10}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.297945 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 13:21:35,572] Trial 64 finished with value: 0.8488001104780403 and parameters: {'n_estimators': 769, 'num_leaves': 170, 'max_depth': 15, 'min_data_in_leaf': 80, 'lambda_l1': 2.4169055646244706, 'lambda_l2': 9.170947266332275, 'min_gain_to_split': 0.9069450300819447, 'bagging_fraction': 0.6664727068874228, 'bagging_freq': 10}. Best is trial 64 with value: 0.8488001104780403.


{'n_estimators': 762, 'num_leaves': 175, 'max_depth': 15, 'min_data_in_leaf': 82, 'lambda_l1': 1.9394664539880553, 'lambda_l2': 9.642111516888106, 'min_gain_to_split': 0.4460398044547496, 'bagging_fraction': 0.6003969811478104, 'bagging_freq': 10}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.232863 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 13:23:34,889] Trial 65 finished with value: 0.8487851166769133 and parameters: {'n_estimators': 769, 'num_leaves': 172, 'max_depth': 15, 'min_data_in_leaf': 82, 'lambda_l1': 1.6621169292272655, 'lambda_l2': 8.954984418368104, 'min_gain_to_split': 0.44679433205297975, 'bagging_fraction': 0.6670867100475298, 'bagging_freq': 10}. Best is trial 64 with value: 0.8488001104780403.


{'n_estimators': 818, 'num_leaves': 187, 'max_depth': 15, 'min_data_in_leaf': 83, 'lambda_l1': 1.4696121636104367, 'lambda_l2': 2.50024026322588, 'min_gain_to_split': 0.4534604502812257, 'bagging_fraction': 0.6795247053908916, 'bagging_freq': 10}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.228465 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 13:45:10,508] Trial 66 finished with value: 0.8488065787521701 and parameters: {'n_estimators': 762, 'num_leaves': 175, 'max_depth': 15, 'min_data_in_leaf': 82, 'lambda_l1': 1.9394664539880553, 'lambda_l2': 9.642111516888106, 'min_gain_to_split': 0.4460398044547496, 'bagging_fraction': 0.6003969811478104, 'bagging_freq': 10}. Best is trial 66 with value: 0.8488065787521701.


{'n_estimators': 841, 'num_leaves': 192, 'max_depth': 15, 'min_data_in_leaf': 84, 'lambda_l1': 1.0898682222853961, 'lambda_l2': 2.651497207725445, 'min_gain_to_split': 0.33010290361613104, 'bagging_fraction': 0.6752415365831845, 'bagging_freq': 10}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.231124 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 13:50:08,816] Trial 67 finished with value: 0.848928862081054 and parameters: {'n_estimators': 818, 'num_leaves': 187, 'max_depth': 15, 'min_data_in_leaf': 83, 'lambda_l1': 1.4696121636104367, 'lambda_l2': 2.50024026322588, 'min_gain_to_split': 0.4534604502812257, 'bagging_fraction': 0.6795247053908916, 'bagging_freq': 10}. Best is trial 67 with value: 0.848928862081054.


{'n_estimators': 890, 'num_leaves': 190, 'max_depth': 15, 'min_data_in_leaf': 95, 'lambda_l1': 1.0351965249006005, 'lambda_l2': 2.8198659724604758, 'min_gain_to_split': 0.28259223405574774, 'bagging_fraction': 0.6908719473605575, 'bagging_freq': 10}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.254749 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 14:19:09,038] Trial 68 finished with value: 0.8490039453793452 and parameters: {'n_estimators': 841, 'num_leaves': 192, 'max_depth': 15, 'min_data_in_leaf': 84, 'lambda_l1': 1.0898682222853961, 'lambda_l2': 2.651497207725445, 'min_gain_to_split': 0.33010290361613104, 'bagging_fraction': 0.6752415365831845, 'bagging_freq': 10}. Best is trial 68 with value: 0.8490039453793452.


{'n_estimators': 907, 'num_leaves': 196, 'max_depth': 12, 'min_data_in_leaf': 98, 'lambda_l1': 0.9246984423656615, 'lambda_l2': 2.1760485832789027, 'min_gain_to_split': 0.2926903048079212, 'bagging_fraction': 0.6821414952193527, 'bagging_freq': 10}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.287690 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 14:24:39,492] Trial 69 finished with value: 0.8489561568967775 and parameters: {'n_estimators': 890, 'num_leaves': 190, 'max_depth': 15, 'min_data_in_leaf': 95, 'lambda_l1': 1.0351965249006005, 'lambda_l2': 2.8198659724604758, 'min_gain_to_split': 0.28259223405574774, 'bagging_fraction': 0.6908719473605575, 'bagging_freq': 10}. Best is trial 68 with value: 0.8490039453793452.


{'n_estimators': 896, 'num_leaves': 199, 'max_depth': 12, 'min_data_in_leaf': 96, 'lambda_l1': 0.7818862166822925, 'lambda_l2': 2.689305061409751, 'min_gain_to_split': 0.32873196537104754, 'bagging_fraction': 0.6815124772530332, 'bagging_freq': 10}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.198499 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 14:48:43,210] Trial 70 finished with value: 0.8488486936625694 and parameters: {'n_estimators': 907, 'num_leaves': 196, 'max_depth': 12, 'min_data_in_leaf': 98, 'lambda_l1': 0.9246984423656615, 'lambda_l2': 2.1760485832789027, 'min_gain_to_split': 0.2926903048079212, 'bagging_fraction': 0.6821414952193527, 'bagging_freq': 10}. Best is trial 68 with value: 0.8490039453793452.


{'n_estimators': 907, 'num_leaves': 200, 'max_depth': 12, 'min_data_in_leaf': 98, 'lambda_l1': 0.8351489322152781, 'lambda_l2': 2.0265460314121295, 'min_gain_to_split': 0.33647892791852574, 'bagging_fraction': 0.6793664671719118, 'bagging_freq': 10}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.264010 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 14:58:07,889] Trial 71 finished with value: 0.8488317913127147 and parameters: {'n_estimators': 896, 'num_leaves': 199, 'max_depth': 12, 'min_data_in_leaf': 96, 'lambda_l1': 0.7818862166822925, 'lambda_l2': 2.689305061409751, 'min_gain_to_split': 0.32873196537104754, 'bagging_fraction': 0.6815124772530332, 'bagging_freq': 10}. Best is trial 68 with value: 0.8490039453793452.


{'n_estimators': 913, 'num_leaves': 198, 'max_depth': 14, 'min_data_in_leaf': 98, 'lambda_l1': 0.8545888850259123, 'lambda_l2': 1.9370204406076479, 'min_gain_to_split': 0.29200229817882095, 'bagging_fraction': 0.6785186169854743, 'bagging_freq': 9}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.263952 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 15:17:06,984] Trial 72 finished with value: 0.8488339407417109 and parameters: {'n_estimators': 907, 'num_leaves': 200, 'max_depth': 12, 'min_data_in_leaf': 98, 'lambda_l1': 0.8351489322152781, 'lambda_l2': 2.0265460314121295, 'min_gain_to_split': 0.33647892791852574, 'bagging_fraction': 0.6793664671719118, 'bagging_freq': 10}. Best is trial 68 with value: 0.8490039453793452.


{'n_estimators': 917, 'num_leaves': 198, 'max_depth': 12, 'min_data_in_leaf': 97, 'lambda_l1': 0.7874011450940903, 'lambda_l2': 2.091625917436643, 'min_gain_to_split': 0.27354303133238606, 'bagging_fraction': 0.6854499927179337, 'bagging_freq': 9}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.220414 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 15:28:39,246] Trial 73 finished with value: 0.8490418110706236 and parameters: {'n_estimators': 913, 'num_leaves': 198, 'max_depth': 14, 'min_data_in_leaf': 98, 'lambda_l1': 0.8545888850259123, 'lambda_l2': 1.9370204406076479, 'min_gain_to_split': 0.29200229817882095, 'bagging_fraction': 0.6785186169854743, 'bagging_freq': 9}. Best is trial 73 with value: 0.8490418110706236.


{'n_estimators': 933, 'num_leaves': 200, 'max_depth': 12, 'min_data_in_leaf': 100, 'lambda_l1': 0.8334329005096631, 'lambda_l2': 1.934523954870038, 'min_gain_to_split': 0.2924322485905712, 'bagging_fraction': 0.684436953229155, 'bagging_freq': 9}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.249274 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 15:45:17,238] Trial 74 finished with value: 0.848856879994119 and parameters: {'n_estimators': 917, 'num_leaves': 198, 'max_depth': 12, 'min_data_in_leaf': 97, 'lambda_l1': 0.7874011450940903, 'lambda_l2': 2.091625917436643, 'min_gain_to_split': 0.27354303133238606, 'bagging_fraction': 0.6854499927179337, 'bagging_freq': 9}. Best is trial 73 with value: 0.8490418110706236.


{'n_estimators': 930, 'num_leaves': 200, 'max_depth': 12, 'min_data_in_leaf': 98, 'lambda_l1': 0.5891322610259099, 'lambda_l2': 1.8126635148007308, 'min_gain_to_split': 0.22757515405598033, 'bagging_fraction': 0.6893543920321131, 'bagging_freq': 9}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.230337 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 15:57:06,672] Trial 75 finished with value: 0.8488794516080456 and parameters: {'n_estimators': 933, 'num_leaves': 200, 'max_depth': 12, 'min_data_in_leaf': 100, 'lambda_l1': 0.8334329005096631, 'lambda_l2': 1.934523954870038, 'min_gain_to_split': 0.2924322485905712, 'bagging_fraction': 0.684436953229155, 'bagging_freq': 9}. Best is trial 73 with value: 0.8490418110706236.


{'n_estimators': 956, 'num_leaves': 200, 'max_depth': 12, 'min_data_in_leaf': 96, 'lambda_l1': 0.5486605773722949, 'lambda_l2': 1.6462310434939174, 'min_gain_to_split': 0.22819265147730255, 'bagging_fraction': 0.6869936115417846, 'bagging_freq': 9}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.236306 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 16:14:16,534] Trial 76 finished with value: 0.8488604349741958 and parameters: {'n_estimators': 930, 'num_leaves': 200, 'max_depth': 12, 'min_data_in_leaf': 98, 'lambda_l1': 0.5891322610259099, 'lambda_l2': 1.8126635148007308, 'min_gain_to_split': 0.22757515405598033, 'bagging_fraction': 0.6893543920321131, 'bagging_freq': 9}. Best is trial 73 with value: 0.8490418110706236.


{'n_estimators': 966, 'num_leaves': 190, 'max_depth': 11, 'min_data_in_leaf': 93, 'lambda_l1': 0.3947363797713747, 'lambda_l2': 1.3825414480066722, 'min_gain_to_split': 0.2581538075953894, 'bagging_fraction': 0.6886740991589452, 'bagging_freq': 9}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.236530 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


[I 2024-11-29 16:27:20,279] Trial 77 finished with value: 0.8489227993183261 and parameters: {'n_estimators': 956, 'num_leaves': 200, 'max_depth': 12, 'min_data_in_leaf': 96, 'lambda_l1': 0.5486605773722949, 'lambda_l2': 1.6462310434939174, 'min_gain_to_split': 0.22819265147730255, 'bagging_fraction': 0.6869936115417846, 'bagging_freq': 9}. Best is trial 73 with value: 0.8490418110706236.


{'n_estimators': 971, 'num_leaves': 190, 'max_depth': 14, 'min_data_in_leaf': 92, 'lambda_l1': 0.3439142698852568, 'lambda_l2': 1.3288318289273937, 'min_gain_to_split': 0.24035195998981623, 'bagging_fraction': 0.699417894976562, 'bagging_freq': 9}
[LightGBM] [Info] Number of positive: 4591576, number of negative: 3192701
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.239932 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2105
[LightGBM] [Info] Number of data points in the train set: 7784277, number of used features: 24
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.589853 -> initscore=0.363356
[LightGBM] [Info] Start training from score 0.363356


KeyboardInterrupt: 

In [17]:
# Best trial
print("Best trial parameters:", study1.best_params)
print("Best trial score:", study1.best_value)

Best trial parameters: {'n_estimators': 913, 'num_leaves': 198, 'max_depth': 14, 'min_data_in_leaf': 98, 'lambda_l1': 0.8545888850259123, 'lambda_l2': 1.9370204406076479, 'min_gain_to_split': 0.29200229817882095, 'bagging_fraction': 0.6785186169854743, 'bagging_freq': 9}
Best trial score: 0.8490418110706236


In [2]:
study_name = "purchase_hp_tuning"
storage_name = "sqlite:///{}.db".format(study_name)

study2 = optuna.load_study(study_name=study_name, storage=storage_name)
print("Best trial parameters:", study2.best_params)
print("Best trial score:", study2.best_value)

Best trial parameters: {'n_estimators': 971, 'num_leaves': 190, 'max_depth': 14, 'min_data_in_leaf': 92, 'lambda_l1': 0.3439142698852568, 'lambda_l2': 1.3288318289273937, 'min_gain_to_split': 0.24035195998981623, 'bagging_fraction': 0.699417894976562, 'bagging_freq': 9}
Best trial score: 0.8490936029671841


In [3]:
len(study2.trials)

80