This will be the final notebook in the modeling and expermentation phase, after this i'll have the final model which i can use for the final pipeline and further in testing and deployment 

# Importing Libraries

In [1]:
import pandas as pd
import optuna # For hyperparameter optimization
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import lightgbm as lgb
import json

# Load the Engineered data

In [2]:
# Define file paths
ENGINEERED_DATA_PATH = '../data/processed/application_train_engineered.csv'
TOP_FEATURES_PATH = '../config/top_features.json'

# Load the engineered data
app_eng_df = pd.read_csv(ENGINEERED_DATA_PATH)
with open(TOP_FEATURES_PATH, 'r') as f:
    top_features = json.load(f)

In [3]:
app_eng_df.head()

Unnamed: 0,TARGET,FLAG_OWN_CAR,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,...,EMERGENCYSTATE_MODE_Yes,CREDIT_INCOME_PERCENT,ANNUITY_INCOME_PERCENT,PAYMENT_RATE,EXT_SOURCE_PRODUCT,INCOME_QUANTILE_BINS,CREDIT_QUANTILE_BINS,ANNUITY_QUANTILE_BINS,GOODS_PRICE_QUANTILE_BINS,SK_ID_CURR
0,1,0,0.0,202500.0,406597.5,24700.5,351000.0,0.018801,-9461.0,-637.0,...,False,2.007889,0.121978,0.060749,0.036649,3,1,2,1,100002
1,0,0,0.0,270000.0,1293502.5,35698.5,1129500.0,0.003541,-16765.0,-1188.0,...,False,4.79075,0.132217,0.027598,0.333073,4,4,3,4,100003
2,0,1,0.0,67500.0,135000.0,6750.0,135000.0,0.010032,-19046.0,-225.0,...,False,2.0,0.1,0.05,0.405575,0,0,0,0,100004
3,0,0,0.0,135000.0,312682.5,29686.5,297000.0,0.008019,-19005.0,-3039.0,...,False,2.316167,0.2199,0.094941,0.348166,1,1,3,1,100006
4,0,0,0.0,121500.0,513000.0,21865.5,513000.0,0.028663,-19932.0,-3038.0,...,False,4.222222,0.179963,0.042623,0.172754,1,2,1,2,100007


In [4]:
print(len(top_features), "features loaded.")

60 features loaded.


In [5]:
print("Data loaded successfully. Shape:", app_eng_df.shape)

Data loaded successfully. Shape: (307511, 114)


In [6]:
# Clean column names to match those in top_features
app_eng_df.columns = ["".join (c if c.isalnum() else "_" for c in str(x)) for x in app_eng_df.columns]

# Preparing Data

In [7]:
# select features and target
X = app_eng_df[top_features]
y = app_eng_df['TARGET']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("Training set shape:", X_train.shape)
print("Test set shape:", X_test.shape)

Training set shape: (246008, 60)
Test set shape: (61503, 60)


# Hyperparameter Tuning 

For this i'll be using the Optuna framework. It's highly efficient and easy to use, much better than grid search or random search. Optuna quickly focuses on the most promising hyperparameter regions and spends less time on bad ones. It also has a pruning feature, which allows it to stop unpromising trials early, saving a huge amount of time.

In [8]:
# Define Optuna objective function
def objective(trial):
    param = {
        'objective': 'binary',
        'metric': 'auc',
        'n_estinimators': trial.suggest_int('n_estimators', 100, 2000),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.2, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'num_leaves': trial.suggest_int('num_leaves', 20, 400),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 100),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.5, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.5, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.4, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-3, 10.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-3, 10.0, log=True),
        'random_state': 42,
        'n_jobs': -1
    }

    model = lgb.LGBMClassifier(**param)
    model.fit(X_train, y_train, 
            eval_set=[(X_test, y_test)], 
            eval_metric='auc', 
            callbacks=[lgb.early_stopping(50, verbose=False)]
            )
    preds = model.predict_proba(X_test)[:, 1]
    auc = roc_auc_score(y_test, preds)
    return auc

# Run Optuna study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

[I 2025-08-24 15:30:40,366] A new study created in memory with name: no-name-149ca9e5-90bb-4d4a-be1e-1d910432ba43


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043619 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:30:50,980] Trial 0 finished with value: 0.7559630562617083 and parameters: {'n_estimators': 736, 'learning_rate': 0.06788429490190807, 'max_depth': 11, 'num_leaves': 337, 'min_data_in_leaf': 45, 'feature_fraction': 0.7675761951338131, 'bagging_fraction': 0.6583515016501184, 'bagging_freq': 2, 'subsample': 0.8702235379870574, 'colsample_bytree': 0.4705199351934558, 'reg_alpha': 0.0014600003671825835, 'reg_lambda': 0.029873369877351477}. Best is trial 0 with value: 0.7559630562617083.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.021127 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:30:54,359] Trial 1 finished with value: 0.7606598447792441 and parameters: {'n_estimators': 1623, 'learning_rate': 0.14769977135435663, 'max_depth': 4, 'num_leaves': 218, 'min_data_in_leaf': 49, 'feature_fraction': 0.7331405160021978, 'bagging_fraction': 0.5312820816945389, 'bagging_freq': 7, 'subsample': 0.7736852353814642, 'colsample_bytree': 0.7703205665103576, 'reg_alpha': 0.018583052904241757, 'reg_lambda': 0.018640735723171747}. Best is trial 1 with value: 0.7606598447792441.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.054778 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:02,133] Trial 2 finished with value: 0.7527367364825562 and parameters: {'n_estimators': 1711, 'learning_rate': 0.016820955570004337, 'max_depth': 12, 'num_leaves': 276, 'min_data_in_leaf': 79, 'feature_fraction': 0.8106412736949853, 'bagging_fraction': 0.5143082549564828, 'bagging_freq': 5, 'subsample': 0.8294445993691002, 'colsample_bytree': 0.8627431678553443, 'reg_alpha': 0.1381864439657951, 'reg_lambda': 0.0015110640428551854}. Best is trial 1 with value: 0.7606598447792441.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041099 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:04,462] Trial 3 finished with value: 0.7409348530733565 and parameters: {'n_estimators': 594, 'learning_rate': 0.0029596136031891703, 'max_depth': 13, 'num_leaves': 58, 'min_data_in_leaf': 67, 'feature_fraction': 0.5667018871418739, 'bagging_fraction': 0.8649271294769416, 'bagging_freq': 5, 'subsample': 0.9259094831552588, 'colsample_bytree': 0.7555493831716873, 'reg_alpha': 0.04858196491728288, 'reg_lambda': 0.01687512396760829}. Best is trial 1 with value: 0.7606598447792441.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041291 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:07,770] Trial 4 finished with value: 0.7522704422485219 and parameters: {'n_estimators': 1408, 'learning_rate': 0.016202233624283906, 'max_depth': 9, 'num_leaves': 226, 'min_data_in_leaf': 29, 'feature_fraction': 0.5365470028197299, 'bagging_fraction': 0.8835554209904146, 'bagging_freq': 4, 'subsample': 0.5460872708306861, 'colsample_bytree': 0.6211825675349881, 'reg_alpha': 0.015422002966251885, 'reg_lambda': 0.4221120390521773}. Best is trial 1 with value: 0.7606598447792441.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.035146 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:09,927] Trial 5 finished with value: 0.7597899399585702 and parameters: {'n_estimators': 801, 'learning_rate': 0.19081173565028606, 'max_depth': 11, 'num_leaves': 76, 'min_data_in_leaf': 53, 'feature_fraction': 0.7986684245952282, 'bagging_fraction': 0.8878926345238086, 'bagging_freq': 1, 'subsample': 0.5038646093758238, 'colsample_bytree': 0.8524397924325554, 'reg_alpha': 0.9622836792741242, 'reg_lambda': 0.04372471350165028}. Best is trial 1 with value: 0.7606598447792441.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012318 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:12,733] Trial 6 finished with value: 0.7588139438840285 and parameters: {'n_estimators': 1182, 'learning_rate': 0.09474990658305468, 'max_depth': 11, 'num_leaves': 146, 'min_data_in_leaf': 62, 'feature_fraction': 0.9257710810782032, 'bagging_fraction': 0.7439329628718101, 'bagging_freq': 9, 'subsample': 0.5834698029137817, 'colsample_bytree': 0.7183948862557954, 'reg_alpha': 0.8990083731531454, 'reg_lambda': 0.17971993097073405}. Best is trial 1 with value: 0.7606598447792441.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.038171 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:13,949] Trial 7 finished with value: 0.7568072406951246 and parameters: {'n_estimators': 651, 'learning_rate': 0.0658838926949342, 'max_depth': 4, 'num_leaves': 219, 'min_data_in_leaf': 95, 'feature_fraction': 0.802380134605831, 'bagging_fraction': 0.5262419572788599, 'bagging_freq': 7, 'subsample': 0.5134083246816141, 'colsample_bytree': 0.6054995706725668, 'reg_alpha': 0.004517844644408994, 'reg_lambda': 0.022158662412528576}. Best is trial 1 with value: 0.7606598447792441.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.036964 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:18,582] Trial 8 finished with value: 0.7434672157862474 and parameters: {'n_estimators': 433, 'learning_rate': 0.002498342275359953, 'max_depth': 15, 'num_leaves': 206, 'min_data_in_leaf': 30, 'feature_fraction': 0.7081016854159294, 'bagging_fraction': 0.5922117237220366, 'bagging_freq': 9, 'subsample': 0.5598585642791045, 'colsample_bytree': 0.5690129181815, 'reg_alpha': 3.127303747845, 'reg_lambda': 7.424634654627988}. Best is trial 1 with value: 0.7606598447792441.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.036798 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:22,324] Trial 9 finished with value: 0.7541858238131386 and parameters: {'n_estimators': 232, 'learning_rate': 0.03107128186368921, 'max_depth': 7, 'num_leaves': 220, 'min_data_in_leaf': 47, 'feature_fraction': 0.5490230109221548, 'bagging_fraction': 0.9783254150611098, 'bagging_freq': 10, 'subsample': 0.6970500098744261, 'colsample_bytree': 0.4375788949929257, 'reg_alpha': 0.0021908665487253114, 'reg_lambda': 1.0766802828305972}. Best is trial 1 with value: 0.7606598447792441.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031811 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:23,933] Trial 10 finished with value: 0.7284566250071203 and parameters: {'n_estimators': 1960, 'learning_rate': 0.005843891683803292, 'max_depth': 4, 'num_leaves': 398, 'min_data_in_leaf': 11, 'feature_fraction': 0.6694700400574913, 'bagging_fraction': 0.6907101952121303, 'bagging_freq': 7, 'subsample': 0.999085707843689, 'colsample_bytree': 0.9954114615878243, 'reg_alpha': 0.11907878268802298, 'reg_lambda': 0.0022940182816717676}. Best is trial 1 with value: 0.7606598447792441.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.036452 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:25,295] Trial 11 finished with value: 0.7630046926882176 and parameters: {'n_estimators': 979, 'learning_rate': 0.18631077790401301, 'max_depth': 7, 'num_leaves': 24, 'min_data_in_leaf': 36, 'feature_fraction': 0.910779183243672, 'bagging_fraction': 0.8539556357024725, 'bagging_freq': 1, 'subsample': 0.7010833532703716, 'colsample_bytree': 0.8500380874272491, 'reg_alpha': 0.7457344575152605, 'reg_lambda': 0.006444627345025418}. Best is trial 11 with value: 0.7630046926882176.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.037160 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:27,011] Trial 12 finished with value: 0.7642894010950828 and parameters: {'n_estimators': 1205, 'learning_rate': 0.18811373488178246, 'max_depth': 6, 'num_leaves': 127, 'min_data_in_leaf': 33, 'feature_fraction': 0.9758570564128638, 'bagging_fraction': 0.8006813393197532, 'bagging_freq': 3, 'subsample': 0.7262528347909926, 'colsample_bytree': 0.8451209083797222, 'reg_alpha': 9.815186483608784, 'reg_lambda': 0.005710103512072669}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.032644 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:29,327] Trial 13 finished with value: 0.7551173791908603 and parameters: {'n_estimators': 1082, 'learning_rate': 0.04504180464447702, 'max_depth': 7, 'num_leaves': 23, 'min_data_in_leaf': 28, 'feature_fraction': 0.9971720983251221, 'bagging_fraction': 0.7984122105586752, 'bagging_freq': 3, 'subsample': 0.6670988782581776, 'colsample_bytree': 0.9620691282674676, 'reg_alpha': 7.46517362159641, 'reg_lambda': 0.004278969167625002}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.018793 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:33,047] Trial 14 finished with value: 0.732908052786072 and parameters: {'n_estimators': 1290, 'learning_rate': 0.0011656452921182034, 'max_depth': 7, 'num_leaves': 117, 'min_data_in_leaf': 12, 'feature_fraction': 0.9100945068584533, 'bagging_fraction': 0.7916571202513994, 'bagging_freq': 1, 'subsample': 0.6534914542357919, 'colsample_bytree': 0.8576660529542794, 'reg_alpha': 0.5556825653741287, 'reg_lambda': 0.005845004038440178}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.016795 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:36,089] Trial 15 finished with value: 0.763057594038741 and parameters: {'n_estimators': 913, 'learning_rate': 0.18303001691446322, 'max_depth': 6, 'num_leaves': 139, 'min_data_in_leaf': 34, 'feature_fraction': 0.9992533028499789, 'bagging_fraction': 0.9725907969489039, 'bagging_freq': 3, 'subsample': 0.7479947985702831, 'colsample_bytree': 0.9123052149951689, 'reg_alpha': 8.167595614052612, 'reg_lambda': 0.0010563573649908374}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.037022 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:37,887] Trial 16 finished with value: 0.7455148614143141 and parameters: {'n_estimators': 986, 'learning_rate': 0.026878222998635324, 'max_depth': 5, 'num_leaves': 141, 'min_data_in_leaf': 18, 'feature_fraction': 0.9961061126072474, 'bagging_fraction': 0.9802639746414332, 'bagging_freq': 3, 'subsample': 0.8061302790352317, 'colsample_bytree': 0.9468138575698872, 'reg_alpha': 6.387901104342226, 'reg_lambda': 0.0016286956407091718}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.032638 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:40,800] Trial 17 finished with value: 0.7619345892078324 and parameters: {'n_estimators': 1487, 'learning_rate': 0.10070401160568307, 'max_depth': 9, 'num_leaves': 166, 'min_data_in_leaf': 38, 'feature_fraction': 0.8708317845425291, 'bagging_fraction': 0.9439626702879379, 'bagging_freq': 3, 'subsample': 0.7463297867882068, 'colsample_bytree': 0.905687918978682, 'reg_alpha': 4.041355984860141, 'reg_lambda': 0.0011184454998403057}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.036074 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:46,568] Trial 18 finished with value: 0.7356004625679841 and parameters: {'n_estimators': 168, 'learning_rate': 0.008001212094914218, 'max_depth': 6, 'num_leaves': 93, 'min_data_in_leaf': 22, 'feature_fraction': 0.965156501486461, 'bagging_fraction': 0.9274105398163174, 'bagging_freq': 4, 'subsample': 0.6210771424954431, 'colsample_bytree': 0.790055648330206, 'reg_alpha': 2.071279173449161, 'reg_lambda': 0.10222798930807765}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.042651 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:48,982] Trial 19 finished with value: 0.7424731495365859 and parameters: {'n_estimators': 878, 'learning_rate': 0.03832713487315202, 'max_depth': 3, 'num_leaves': 173, 'min_data_in_leaf': 64, 'feature_fraction': 0.8654726404122635, 'bagging_fraction': 0.8038729265631823, 'bagging_freq': 6, 'subsample': 0.7418096549037609, 'colsample_bytree': 0.6696773636858147, 'reg_alpha': 9.966484481283855, 'reg_lambda': 0.004689215286535003}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.036911 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:51,683] Trial 20 finished with value: 0.7600516680543918 and parameters: {'n_estimators': 457, 'learning_rate': 0.1064188497586732, 'max_depth': 8, 'num_leaves': 111, 'min_data_in_leaf': 40, 'feature_fraction': 0.6329752512351021, 'bagging_fraction': 0.7137232537115865, 'bagging_freq': 2, 'subsample': 0.904936012427731, 'colsample_bytree': 0.9282146498207334, 'reg_alpha': 0.27884437296684134, 'reg_lambda': 0.009998220710235107}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.038164 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:53,540] Trial 21 finished with value: 0.7632581881226885 and parameters: {'n_estimators': 1007, 'learning_rate': 0.19789583002870895, 'max_depth': 6, 'num_leaves': 49, 'min_data_in_leaf': 37, 'feature_fraction': 0.9366536297842202, 'bagging_fraction': 0.8418951115208194, 'bagging_freq': 2, 'subsample': 0.7027153971960634, 'colsample_bytree': 0.8197876196556861, 'reg_alpha': 1.6525596651587138, 'reg_lambda': 0.004249135050516468}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014782 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:55,593] Trial 22 finished with value: 0.7642444367283282 and parameters: {'n_estimators': 1184, 'learning_rate': 0.12767736591640094, 'max_depth': 5, 'num_leaves': 62, 'min_data_in_leaf': 26, 'feature_fraction': 0.9565549829255887, 'bagging_fraction': 0.8237656534975865, 'bagging_freq': 2, 'subsample': 0.7014046691125388, 'colsample_bytree': 0.8095060973276778, 'reg_alpha': 1.7098299333221518, 'reg_lambda': 0.0028499103988476326}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.015759 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:31:58,237] Trial 23 finished with value: 0.7569290099856019 and parameters: {'n_estimators': 1199, 'learning_rate': 0.05934578648398814, 'max_depth': 5, 'num_leaves': 63, 'min_data_in_leaf': 20, 'feature_fraction': 0.9453406062998396, 'bagging_fraction': 0.8241806241383421, 'bagging_freq': 2, 'subsample': 0.7047918702560807, 'colsample_bytree': 0.8219553379728607, 'reg_alpha': 3.169648317885902, 'reg_lambda': 0.0032931778301998416}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.019234 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:00,220] Trial 24 finished with value: 0.7572740728486151 and parameters: {'n_estimators': 1377, 'learning_rate': 0.12159182366230105, 'max_depth': 3, 'num_leaves': 55, 'min_data_in_leaf': 25, 'feature_fraction': 0.865856524834012, 'bagging_fraction': 0.7568461635291556, 'bagging_freq': 4, 'subsample': 0.6244625443030909, 'colsample_bytree': 0.7101056611258497, 'reg_alpha': 1.5977762147985024, 'reg_lambda': 0.010439070414930423}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.036132 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:02,003] Trial 25 finished with value: 0.7592904372134532 and parameters: {'n_estimators': 1110, 'learning_rate': 0.07668234606773258, 'max_depth': 5, 'num_leaves': 94, 'min_data_in_leaf': 42, 'feature_fraction': 0.8779049451789505, 'bagging_fraction': 0.8344956528997598, 'bagging_freq': 2, 'subsample': 0.7978893562111998, 'colsample_bytree': 0.8027808061993429, 'reg_alpha': 0.3546162647748195, 'reg_lambda': 0.048431230405985896}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031297 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:03,534] Trial 26 finished with value: 0.763883581832529 and parameters: {'n_estimators': 1531, 'learning_rate': 0.13210581386246692, 'max_depth': 6, 'num_leaves': 35, 'min_data_in_leaf': 54, 'feature_fraction': 0.9589110722137294, 'bagging_fraction': 0.908425954199694, 'bagging_freq': 1, 'subsample': 0.666287566511219, 'colsample_bytree': 0.7486261615743082, 'reg_alpha': 1.3902322238930724, 'reg_lambda': 0.002544177123804563}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.036879 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:05,618] Trial 27 finished with value: 0.7478677104299055 and parameters: {'n_estimators': 1770, 'learning_rate': 0.023816655575214504, 'max_depth': 8, 'num_leaves': 32, 'min_data_in_leaf': 72, 'feature_fraction': 0.9618326663611654, 'bagging_fraction': 0.9080744379263591, 'bagging_freq': 1, 'subsample': 0.616024134271659, 'colsample_bytree': 0.6714164502903405, 'reg_alpha': 0.24402239134691323, 'reg_lambda': 0.0026218518235954042}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.017243 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:08,848] Trial 28 finished with value: 0.7603828946315174 and parameters: {'n_estimators': 1572, 'learning_rate': 0.0457275898188599, 'max_depth': 8, 'num_leaves': 95, 'min_data_in_leaf': 59, 'feature_fraction': 0.9006205171815769, 'bagging_fraction': 0.7598449987220873, 'bagging_freq': 1, 'subsample': 0.6668573878812852, 'colsample_bytree': 0.7463642237647016, 'reg_alpha': 3.986409628310271, 'reg_lambda': 0.00942639106040337}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.034009 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:13,383] Trial 29 finished with value: 0.7561004608402295 and parameters: {'n_estimators': 1866, 'learning_rate': 0.07887017617024679, 'max_depth': 10, 'num_leaves': 258, 'min_data_in_leaf': 82, 'feature_fraction': 0.8300565308373116, 'bagging_fraction': 0.6636967365497, 'bagging_freq': 2, 'subsample': 0.8409180506665086, 'colsample_bytree': 0.5274528997860255, 'reg_alpha': 0.043285322303951886, 'reg_lambda': 0.041534730054856535}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041793 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:15,822] Trial 30 finished with value: 0.7620358854975383 and parameters: {'n_estimators': 1289, 'learning_rate': 0.13035593354667752, 'max_depth': 5, 'num_leaves': 322, 'min_data_in_leaf': 53, 'feature_fraction': 0.7654797832412217, 'bagging_fraction': 0.9319531757538413, 'bagging_freq': 4, 'subsample': 0.7825294228699189, 'colsample_bytree': 0.8918640782801015, 'reg_alpha': 1.587511023971391, 'reg_lambda': 0.002264910284156659}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.045178 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:18,414] Trial 31 finished with value: 0.7637381761473903 and parameters: {'n_estimators': 1278, 'learning_rate': 0.13812278738484246, 'max_depth': 6, 'num_leaves': 46, 'min_data_in_leaf': 45, 'feature_fraction': 0.9442028621150124, 'bagging_fraction': 0.8320788069439893, 'bagging_freq': 2, 'subsample': 0.7176869027197925, 'colsample_bytree': 0.8186309157917375, 'reg_alpha': 1.55580654245381, 'reg_lambda': 0.004125992806651312}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.037905 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:20,246] Trial 32 finished with value: 0.7631353643675811 and parameters: {'n_estimators': 1531, 'learning_rate': 0.12677504336362508, 'max_depth': 6, 'num_leaves': 82, 'min_data_in_leaf': 45, 'feature_fraction': 0.968359540215817, 'bagging_fraction': 0.7884703999691477, 'bagging_freq': 2, 'subsample': 0.7246158793200254, 'colsample_bytree': 0.7628742609512139, 'reg_alpha': 0.4570525985075773, 'reg_lambda': 0.0142461610017383}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.038353 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:21,888] Trial 33 finished with value: 0.753744122472932 and parameters: {'n_estimators': 1318, 'learning_rate': 0.058911011519443356, 'max_depth': 4, 'num_leaves': 44, 'min_data_in_leaf': 50, 'feature_fraction': 0.8435269915889881, 'bagging_fraction': 0.8890915360994877, 'bagging_freq': 3, 'subsample': 0.7708259422677105, 'colsample_bytree': 0.6736005509071822, 'reg_alpha': 4.825686307905922, 'reg_lambda': 0.007537668472024036}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.036814 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:23,636] Trial 34 finished with value: 0.7585742865166356 and parameters: {'n_estimators': 1700, 'learning_rate': 0.14332885029943182, 'max_depth': 3, 'num_leaves': 117, 'min_data_in_leaf': 56, 'feature_fraction': 0.9659749506647423, 'bagging_fraction': 0.7312865157576012, 'bagging_freq': 1, 'subsample': 0.6621097797520931, 'colsample_bytree': 0.7924649433915358, 'reg_alpha': 2.395443084656299, 'reg_lambda': 0.001737783279037073}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.020102 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:26,218] Trial 35 finished with value: 0.7345811639771941 and parameters: {'n_estimators': 1444, 'learning_rate': 0.010556879784849324, 'max_depth': 5, 'num_leaves': 67, 'min_data_in_leaf': 32, 'feature_fraction': 0.8930500472788994, 'bagging_fraction': 0.8198984879401136, 'bagging_freq': 5, 'subsample': 0.6358593656553759, 'colsample_bytree': 0.8813043188852809, 'reg_alpha': 1.1304027901858356, 'reg_lambda': 0.025483277269896104}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.020797 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:29,527] Trial 36 finished with value: 0.7617447998239614 and parameters: {'n_estimators': 1199, 'learning_rate': 0.08746475938353329, 'max_depth': 6, 'num_leaves': 41, 'min_data_in_leaf': 45, 'feature_fraction': 0.9372092147741748, 'bagging_fraction': 0.86031929038579, 'bagging_freq': 2, 'subsample': 0.8432882119412115, 'colsample_bytree': 0.7284924492836417, 'reg_alpha': 0.04040142026836446, 'reg_lambda': 0.0031804371786209665}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042132 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:31,675] Trial 37 finished with value: 0.7595170972355677 and parameters: {'n_estimators': 1572, 'learning_rate': 0.14729868454279904, 'max_depth': 8, 'num_leaves': 73, 'min_data_in_leaf': 16, 'feature_fraction': 0.9746519566447652, 'bagging_fraction': 0.7775977505483403, 'bagging_freq': 3, 'subsample': 0.5806939538707969, 'colsample_bytree': 0.7668188716696867, 'reg_alpha': 0.20333514905065664, 'reg_lambda': 0.014433104925287233}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.034237 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:33,094] Trial 38 finished with value: 0.7533886004607511 and parameters: {'n_estimators': 1676, 'learning_rate': 0.05459819723847869, 'max_depth': 4, 'num_leaves': 189, 'min_data_in_leaf': 69, 'feature_fraction': 0.6128250088439215, 'bagging_fraction': 0.8741742260139876, 'bagging_freq': 5, 'subsample': 0.6871868649951943, 'colsample_bytree': 0.8416778700954519, 'reg_alpha': 0.6491054814604925, 'reg_lambda': 0.5154243022477766}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031325 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:35,866] Trial 39 finished with value: 0.763396151994949 and parameters: {'n_estimators': 1135, 'learning_rate': 0.09220928621271604, 'max_depth': 10, 'num_leaves': 109, 'min_data_in_leaf': 24, 'feature_fraction': 0.7237773723859525, 'bagging_fraction': 0.8965770964224473, 'bagging_freq': 1, 'subsample': 0.7686196367223308, 'colsample_bytree': 0.823495590907413, 'reg_alpha': 0.018075032838291167, 'reg_lambda': 3.788423252493925}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031322 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:37,351] Trial 40 finished with value: 0.763800129506781 and parameters: {'n_estimators': 819, 'learning_rate': 0.1603457322501581, 'max_depth': 15, 'num_leaves': 21, 'min_data_in_leaf': 50, 'feature_fraction': 0.5050195932450507, 'bagging_fraction': 0.9518906645781817, 'bagging_freq': 4, 'subsample': 0.7233716199168241, 'colsample_bytree': 0.639976662446522, 'reg_alpha': 1.0869064460876277, 'reg_lambda': 0.09411103268082104}. Best is trial 12 with value: 0.7642894010950828.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031098 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:39,097] Trial 41 finished with value: 0.7652571573835126 and parameters: {'n_estimators': 766, 'learning_rate': 0.14661696258280738, 'max_depth': 15, 'num_leaves': 24, 'min_data_in_leaf': 51, 'feature_fraction': 0.9287838567079353, 'bagging_fraction': 0.9495916039524935, 'bagging_freq': 4, 'subsample': 0.7231683030135212, 'colsample_bytree': 0.6204910468966424, 'reg_alpha': 1.1168937615210697, 'reg_lambda': 0.10128872382319574}. Best is trial 41 with value: 0.7652571573835126.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.029977 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:41,201] Trial 42 finished with value: 0.7624792237515878 and parameters: {'n_estimators': 771, 'learning_rate': 0.17466905001031252, 'max_depth': 15, 'num_leaves': 20, 'min_data_in_leaf': 55, 'feature_fraction': 0.6777130376568589, 'bagging_fraction': 0.9546062879132984, 'bagging_freq': 6, 'subsample': 0.7347292896993906, 'colsample_bytree': 0.6381181274059095, 'reg_alpha': 1.030709744186971, 'reg_lambda': 0.20635693958132478}. Best is trial 41 with value: 0.7652571573835126.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013985 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:44,529] Trial 43 finished with value: 0.7614955899332398 and parameters: {'n_estimators': 672, 'learning_rate': 0.11027736297672688, 'max_depth': 14, 'num_leaves': 63, 'min_data_in_leaf': 50, 'feature_fraction': 0.5095892969796203, 'bagging_fraction': 0.9080956316963628, 'bagging_freq': 4, 'subsample': 0.6851418962885516, 'colsample_bytree': 0.5351731109224319, 'reg_alpha': 0.16270468123246562, 'reg_lambda': 0.10832295609635234}. Best is trial 41 with value: 0.7652571573835126.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.036977 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:46,309] Trial 44 finished with value: 0.7629379301151428 and parameters: {'n_estimators': 461, 'learning_rate': 0.07464157503598753, 'max_depth': 13, 'num_leaves': 35, 'min_data_in_leaf': 60, 'feature_fraction': 0.5947281744095182, 'bagging_fraction': 0.995876058330604, 'bagging_freq': 5, 'subsample': 0.6444071644109786, 'colsample_bytree': 0.5794604249341831, 'reg_alpha': 0.06504978613262571, 'reg_lambda': 0.061591050553045305}. Best is trial 41 with value: 0.7652571573835126.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.032523 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:48,316] Trial 45 finished with value: 0.7543923492606297 and parameters: {'n_estimators': 874, 'learning_rate': 0.15610204941607353, 'max_depth': 14, 'num_leaves': 81, 'min_data_in_leaf': 29, 'feature_fraction': 0.9276693376190261, 'bagging_fraction': 0.5845515134523959, 'bagging_freq': 4, 'subsample': 0.5981368179721271, 'colsample_bytree': 0.6175744845784831, 'reg_alpha': 0.008371279005986679, 'reg_lambda': 0.24906087362705048}. Best is trial 41 with value: 0.7652571573835126.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.016109 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:51,149] Trial 46 finished with value: 0.7429072968489283 and parameters: {'n_estimators': 599, 'learning_rate': 0.02039196943057147, 'max_depth': 12, 'num_leaves': 21, 'min_data_in_leaf': 76, 'feature_fraction': 0.7852595620779705, 'bagging_fraction': 0.923983948950049, 'bagging_freq': 3, 'subsample': 0.812626013596398, 'colsample_bytree': 0.7007319761428732, 'reg_alpha': 2.69618252890796, 'reg_lambda': 1.1089834995335461}. Best is trial 41 with value: 0.7652571573835126.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.037812 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:54,800] Trial 47 finished with value: 0.7411267478241069 and parameters: {'n_estimators': 698, 'learning_rate': 0.0032954913547812215, 'max_depth': 14, 'num_leaves': 243, 'min_data_in_leaf': 42, 'feature_fraction': 0.8364234493123177, 'bagging_fraction': 0.9623317253392998, 'bagging_freq': 5, 'subsample': 0.6795775368300405, 'colsample_bytree': 0.6452850134643302, 'reg_alpha': 5.7179987962448875, 'reg_lambda': 0.7793178899205635}. Best is trial 41 with value: 0.7652571573835126.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.034623 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:32:57,071] Trial 48 finished with value: 0.7604923345230615 and parameters: {'n_estimators': 816, 'learning_rate': 0.18723791753372176, 'max_depth': 15, 'num_leaves': 55, 'min_data_in_leaf': 86, 'feature_fraction': 0.9825540059673618, 'bagging_fraction': 0.8672318424233271, 'bagging_freq': 3, 'subsample': 0.7624538042450668, 'colsample_bytree': 0.7327063259249915, 'reg_alpha': 0.48467624807483606, 'reg_lambda': 0.1302283837878539}. Best is trial 41 with value: 0.7652571573835126.


[LightGBM] [Info] Number of positive: 19860, number of negative: 226148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.037472 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5333
[LightGBM] [Info] Number of data points in the train set: 246008, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080729 -> initscore=-2.432482
[LightGBM] [Info] Start training from score -2.432482


[I 2025-08-24 15:33:01,748] Trial 49 finished with value: 0.7577791578439861 and parameters: {'n_estimators': 1036, 'learning_rate': 0.0365749195890617, 'max_depth': 13, 'num_leaves': 128, 'min_data_in_leaf': 66, 'feature_fraction': 0.9152752214656884, 'bagging_fraction': 0.9467071562705948, 'bagging_freq': 4, 'subsample': 0.7293957576920673, 'colsample_bytree': 0.5892699602343289, 'reg_alpha': 0.8827072185155921, 'reg_lambda': 0.06622541297166086}. Best is trial 41 with value: 0.7652571573835126.




In [9]:
# Print best parameters
print("Best trial:")
trial = study.best_trial
print(f"  AUC: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

Best trial:
  AUC: 0.7652571573835126
  Params: 
    n_estimators: 766
    learning_rate: 0.14661696258280738
    max_depth: 15
    num_leaves: 24
    min_data_in_leaf: 51
    feature_fraction: 0.9287838567079353
    bagging_fraction: 0.9495916039524935
    bagging_freq: 4
    subsample: 0.7231683030135212
    colsample_bytree: 0.6204910468966424
    reg_alpha: 1.1168937615210697
    reg_lambda: 0.10128872382319574


In [10]:
# Save best parameters to a JSON file
best_params = study.best_params

with open('../config/best_lgbm_params.json', 'w') as f:
    json.dump(best_params, f, indent=4)

print("Best hyperparameters saved successfully.")

Best hyperparameters saved successfully.
