# LightGBM Modeling & Hyperparameter Tuning

In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
import optuna
import mlflow, mlflow.lightgbm
from sklearn.metrics import mean_absolute_error

In [3]:
# Load preprocessed data
train_df = pd.read_parquet('../data/processed/transformed_train.parquet')
valid_df = pd.read_parquet('../data/processed/transformed_valid.parquet')

In [4]:
# Define target and features
target = 'log_loss'
X_train, Y_train = train_df.drop(columns=['loss', target]), train_df[target]
X_valid, Y_valid = valid_df.drop(columns=['loss', target]), valid_df[target]

print('Train shape : ', X_train.shape)
print('Valid shape : ', X_valid.shape)

Train shape :  (150655, 5548)
Valid shape :  (18831, 5548)


In [None]:
# Prepare lightgbm dataset
#train_lgb = lgb.Dataset(X_train, label=Y_train)
#valid_lgb = lgb.Dataset(X_valid, label=Y_valid)

In [5]:
# Define Optuna task (with MLflow)

def objective(trial):
    
    params = {
        'objective': 'regression',
        'metric': 'mae',

        'num_leaves': trial.suggest_int('num_leaves', 64, 512, log=True),
        'max_depth': trial.suggest_int('max_depth', 6, 14),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 200, 3000, log=True),

        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.05, log=True),

        'feature_fraction': trial.suggest_float('feature_fraction', 0.5, 0.9),
        'feature_fraction_bynode': trial.suggest_float('feature_fraction_bynode', 0.5, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.6, 0.9),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),

        'lambda_l1': trial.suggest_float('lambda_l1', 0.0, 10.0),
        'lambda_l2': trial.suggest_float('lambda_l2', 0.0, 50.0),

        'min_gain_to_split': trial.suggest_float('min_gain_to_split', 0.0,  5.0),

        'max_bin': trial.suggest_categorical('max_bin', [127, 255]),

        'boosting_type': 'gbdt',
        'verbosity': -1,
        'seed': 42,
    }

    train_lgb = lgb.Dataset(X_train, label=Y_train, params=params)
    valid_lgb = lgb.Dataset(X_valid, label=Y_valid, params=params, reference=train_lgb)

    with mlflow.start_run(nested=True):
        mlflow.log_params(params)

        model = lgb.train(
            params,
            train_lgb,
            valid_sets = [valid_lgb],
            num_boost_round=10000,
            callbacks = [
                lgb.early_stopping(stopping_rounds=200),
                lgb.log_evaluation(period=100),
            ],
        )

        y_pred = model.predict(X_valid, num_iteration=model.best_iteration)
        mae = mean_absolute_error(np.exp(Y_valid), np.exp(y_pred))  # Calculate MAE based on true loss
        mlflow.log_params(params)
        mlflow.log_metric('mae', mae)
        mlflow.log_metric('best_iteration', model.best_iteration)

    return mae           

In [6]:
# Run Optuna study

# Use mlruns folder for MLflow results
mlflow.set_tracking_uri("../mlruns")

# Set and start experiment
mlflow.set_experiment('lgbm_optuna_allstate')

study = optuna.create_study(
    direction = 'minimize',
    sampler = optuna.samplers.TPESampler(seed=42),
)

#study.optimize(objective, timeout=3*60*60)  # Run for 3 hours
study.optimize(objective, n_trials=20)

print('Best MAE: ', study.best_value)
print('Best params: ', study.best_params)

  return FileStore(store_uri, store_uri)
2025/12/16 20:52:12 INFO mlflow.tracking.fluent: Experiment with name 'lgbm_optuna_allstate' does not exist. Creating a new experiment.
[I 2025-12-16 20:52:12,812] A new study created in memory with name: no-name-0e5b2434-416c-4c68-96f4-0595f99e933c


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.47046
[200]	valid_0's l1: 0.441336
[300]	valid_0's l1: 0.433231
[400]	valid_0's l1: 0.429583
[500]	valid_0's l1: 0.427568
[600]	valid_0's l1: 0.426355
[700]	valid_0's l1: 0.425624
[800]	valid_0's l1: 0.425188
[900]	valid_0's l1: 0.424698
[1000]	valid_0's l1: 0.424349
[1100]	valid_0's l1: 0.424159
[1200]	valid_0's l1: 0.423939
[1300]	valid_0's l1: 0.423804
[1400]	valid_0's l1: 0.423721
[1500]	valid_0's l1: 0.423674
[1600]	valid_0's l1: 0.423556
[1700]	valid_0's l1: 0.423512
[1800]	valid_0's l1: 0.423343
[1900]	valid_0's l1: 0.423348
[2000]	valid_0's l1: 0.423301
[2100]	valid_0's l1: 0.423247
[2200]	valid_0's l1: 0.42325
[2300]	valid_0's l1: 0.423199
[2400]	valid_0's l1: 0.423221
Early stopping, best iteration is:
[2279]	valid_0's l1: 0.423174


[I 2025-12-16 20:57:43,779] Trial 0 finished with value: 1162.2211620581852 and parameters: {'num_leaves': 139, 'max_depth': 14, 'min_data_in_leaf': 1451, 'learning_rate': 0.019843966652221864, 'feature_fraction': 0.5624074561769746, 'feature_fraction_bynode': 0.5779972601681014, 'bagging_fraction': 0.6174250836504598, 'bagging_freq': 9, 'lambda_l1': 6.011150117432088, 'lambda_l2': 35.40362888980228, 'min_gain_to_split': 0.10292247147901223, 'max_bin': 127}. Best is trial 0 with value: 1162.2211620581852.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.51048
[200]	valid_0's l1: 0.465508
[300]	valid_0's l1: 0.447319
[400]	valid_0's l1: 0.438655
[500]	valid_0's l1: 0.433666
[600]	valid_0's l1: 0.430811
[700]	valid_0's l1: 0.428915
[800]	valid_0's l1: 0.427561
[900]	valid_0's l1: 0.426588
[1000]	valid_0's l1: 0.425851
[1100]	valid_0's l1: 0.425412
[1200]	valid_0's l1: 0.425138
[1300]	valid_0's l1: 0.424947
[1400]	valid_0's l1: 0.424815
[1500]	valid_0's l1: 0.424752
[1600]	valid_0's l1: 0.424752
Early stopping, best iteration is:
[1476]	valid_0's l1: 0.42475


[I 2025-12-16 20:59:58,686] Trial 1 finished with value: 1163.6001971997146 and parameters: {'num_leaves': 99, 'max_depth': 7, 'min_data_in_leaf': 328, 'learning_rate': 0.010074238942079334, 'feature_fraction': 0.7099025726528951, 'feature_fraction_bynode': 0.7159725093210578, 'bagging_fraction': 0.6873687420594126, 'bagging_freq': 7, 'lambda_l1': 1.3949386065204183, 'lambda_l2': 14.607232426760907, 'min_gain_to_split': 1.8318092164684585, 'max_bin': 255}. Best is trial 0 with value: 1162.2211620581852.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.557475
[200]	valid_0's l1: 0.505013
[300]	valid_0's l1: 0.47782
[400]	valid_0's l1: 0.462262
[500]	valid_0's l1: 0.451941
[600]	valid_0's l1: 0.445281
[700]	valid_0's l1: 0.440706
[800]	valid_0's l1: 0.437415
[900]	valid_0's l1: 0.434972
[1000]	valid_0's l1: 0.433108
[1100]	valid_0's l1: 0.431818
[1200]	valid_0's l1: 0.430731
[1300]	valid_0's l1: 0.429937
[1400]	valid_0's l1: 0.429211
[1500]	valid_0's l1: 0.42876
[1600]	valid_0's l1: 0.428332
[1700]	valid_0's l1: 0.427931
[1800]	valid_0's l1: 0.427642
[1900]	valid_0's l1: 0.427419
[2000]	valid_0's l1: 0.427224
[2100]	valid_0's l1: 0.427086
[2200]	valid_0's l1: 0.427006
[2300]	valid_0's l1: 0.426867
[2400]	valid_0's l1: 0.426792
[2500]	valid_0's l1: 0.426719
[2600]	valid_0's l1: 0.42665
[2700]	valid_0's l1: 0.426579
[2800]	valid_0's l1: 0.426508
[2900]	valid_0's l1: 0.426473
[3000]	valid_0's l1: 0.426429
[3100]	valid_0's l1: 0.426385
[3200]	valid_0's l1:

[I 2025-12-16 21:03:42,621] Trial 2 finished with value: 1171.9519187355224 and parameters: {'num_leaves': 96, 'max_depth': 10, 'min_data_in_leaf': 994, 'learning_rate': 0.005564426587452866, 'feature_fraction': 0.7430179407605754, 'feature_fraction_bynode': 0.5852620618436457, 'bagging_fraction': 0.6195154778955838, 'bagging_freq': 10, 'lambda_l1': 9.656320330745594, 'lambda_l2': 40.419867405823055, 'min_gain_to_split': 1.5230688458668533, 'max_bin': 255}. Best is trial 0 with value: 1162.2211620581852.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.55989
[200]	valid_0's l1: 0.507613
[300]	valid_0's l1: 0.480418
[400]	valid_0's l1: 0.464598
[500]	valid_0's l1: 0.454519
[600]	valid_0's l1: 0.447332
[700]	valid_0's l1: 0.442562
[800]	valid_0's l1: 0.439177
[900]	valid_0's l1: 0.436633
[1000]	valid_0's l1: 0.434624
[1100]	valid_0's l1: 0.433029
[1200]	valid_0's l1: 0.431777
[1300]	valid_0's l1: 0.430731
[1400]	valid_0's l1: 0.429915
[1500]	valid_0's l1: 0.429148
[1600]	valid_0's l1: 0.428508
[1700]	valid_0's l1: 0.427925
[1800]	valid_0's l1: 0.427411
[1900]	valid_0's l1: 0.427005
[2000]	valid_0's l1: 0.426599
[2100]	valid_0's l1: 0.426292
[2200]	valid_0's l1: 0.426007
[2300]	valid_0's l1: 0.425759
[2400]	valid_0's l1: 0.425583
[2500]	valid_0's l1: 0.425407
[2600]	valid_0's l1: 0.425247
[2700]	valid_0's l1: 0.425093
[2800]	valid_0's l1: 0.424952
[2900]	valid_0's l1: 0.424851
[3000]	valid_0's l1: 0.424775
[3100]	valid_0's l1: 0.424709
[3200]	valid_0's l

[I 2025-12-16 21:09:12,347] Trial 3 finished with value: 1164.5944928881702 and parameters: {'num_leaves': 159, 'max_depth': 7, 'min_data_in_leaf': 764, 'learning_rate': 0.00541200919075048, 'feature_fraction': 0.8637281608315128, 'feature_fraction_bynode': 0.6293899908000085, 'bagging_fraction': 0.7987566853061946, 'bagging_freq': 4, 'lambda_l1': 5.200680211778108, 'lambda_l2': 27.335513967163983, 'min_gain_to_split': 0.9242722776276352, 'max_bin': 127}. Best is trial 0 with value: 1162.2211620581852.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.437476
[200]	valid_0's l1: 0.430494
[300]	valid_0's l1: 0.429639
[400]	valid_0's l1: 0.429496
[500]	valid_0's l1: 0.429496
Early stopping, best iteration is:
[335]	valid_0's l1: 0.429496


[I 2025-12-16 21:10:44,450] Trial 4 finished with value: 1180.5735000973336 and parameters: {'num_leaves': 452, 'max_depth': 14, 'min_data_in_leaf': 1009, 'learning_rate': 0.0417680537765588, 'feature_fraction': 0.5353970008207678, 'feature_fraction_bynode': 0.5979914312095727, 'bagging_fraction': 0.6135681866731614, 'bagging_freq': 4, 'lambda_l1': 3.8867728968948203, 'lambda_l2': 13.567451588694796, 'min_gain_to_split': 4.143687545759647, 'max_bin': 127}. Best is trial 0 with value: 1162.2211620581852.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.558247
[200]	valid_0's l1: 0.50812
[300]	valid_0's l1: 0.482746
[400]	valid_0's l1: 0.467655
[500]	valid_0's l1: 0.45762
[600]	valid_0's l1: 0.450899
[700]	valid_0's l1: 0.446253
[800]	valid_0's l1: 0.442832
[900]	valid_0's l1: 0.440219
[1000]	valid_0's l1: 0.438252
[1100]	valid_0's l1: 0.436783
[1200]	valid_0's l1: 0.435589
[1300]	valid_0's l1: 0.434705
[1400]	valid_0's l1: 0.433952
[1500]	valid_0's l1: 0.433356
[1600]	valid_0's l1: 0.432894
[1700]	valid_0's l1: 0.432523
[1800]	valid_0's l1: 0.432234
[1900]	valid_0's l1: 0.432064
[2000]	valid_0's l1: 0.431913
[2100]	valid_0's l1: 0.431793
[2200]	valid_0's l1: 0.43169
[2300]	valid_0's l1: 0.4316
[2400]	valid_0's l1: 0.431535
[2500]	valid_0's l1: 0.431474
[2600]	valid_0's l1: 0.431424
[2700]	valid_0's l1: 0.431384
[2800]	valid_0's l1: 0.431351
[2900]	valid_0's l1: 0.431321
[3000]	valid_0's l1: 0.431299
[3100]	valid_0's l1: 0.43127
[3200]	valid_0's l1: 0.

[I 2025-12-16 21:16:14,751] Trial 5 finished with value: 1186.8398474487526 and parameters: {'num_leaves': 197, 'max_depth': 7, 'min_data_in_leaf': 1755, 'learning_rate': 0.00593636571266795, 'feature_fraction': 0.8947547746402069, 'feature_fraction_bynode': 0.8861223846483287, 'bagging_fraction': 0.6596147044602517, 'bagging_freq': 1, 'lambda_l1': 8.154614284548341, 'lambda_l2': 35.34286719238086, 'min_gain_to_split': 3.6450358402049368, 'max_bin': 127}. Best is trial 0 with value: 1162.2211620581852.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.474491
[200]	valid_0's l1: 0.446723
[300]	valid_0's l1: 0.43808
[400]	valid_0's l1: 0.434727
[500]	valid_0's l1: 0.43328
[600]	valid_0's l1: 0.432727
[700]	valid_0's l1: 0.432571
[800]	valid_0's l1: 0.432571
Early stopping, best iteration is:
[662]	valid_0's l1: 0.432571


[I 2025-12-16 21:17:30,511] Trial 6 finished with value: 1192.2644905485001 and parameters: {'num_leaves': 134, 'max_depth': 7, 'min_data_in_leaf': 2070, 'learning_rate': 0.021002361583510997, 'feature_fraction': 0.6323592099410597, 'feature_fraction_bynode': 0.5317791751430119, 'bagging_fraction': 0.6932946965146987, 'bagging_freq': 4, 'lambda_l1': 7.29606178338064, 'lambda_l2': 31.877873567760656, 'min_gain_to_split': 4.436063712881633, 'max_bin': 127}. Best is trial 0 with value: 1162.2211620581852.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.444059
[200]	valid_0's l1: 0.429925
[300]	valid_0's l1: 0.427709
[400]	valid_0's l1: 0.427227
[500]	valid_0's l1: 0.427027
[600]	valid_0's l1: 0.426912
[700]	valid_0's l1: 0.426843
[800]	valid_0's l1: 0.42679
[900]	valid_0's l1: 0.426751
[1000]	valid_0's l1: 0.426733
[1100]	valid_0's l1: 0.426714
[1200]	valid_0's l1: 0.42666
[1300]	valid_0's l1: 0.426644
[1400]	valid_0's l1: 0.426635
[1500]	valid_0's l1: 0.426624
[1600]	valid_0's l1: 0.426607
[1700]	valid_0's l1: 0.426612
Early stopping, best iteration is:
[1598]	valid_0's l1: 0.426603


[I 2025-12-16 21:19:11,890] Trial 7 finished with value: 1173.3753312945717 and parameters: {'num_leaves': 282, 'max_depth': 12, 'min_data_in_leaf': 913, 'learning_rate': 0.029507823990116946, 'feature_fraction': 0.6975182385457563, 'feature_fraction_bynode': 0.7613664146909971, 'bagging_fraction': 0.7282623055075649, 'bagging_freq': 1, 'lambda_l1': 1.0789142699330445, 'lambda_l2': 1.5714592843367126, 'min_gain_to_split': 3.182052056318902, 'max_bin': 255}. Best is trial 0 with value: 1162.2211620581852.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.451067
[200]	valid_0's l1: 0.433628
[300]	valid_0's l1: 0.429654
[400]	valid_0's l1: 0.428612
[500]	valid_0's l1: 0.428289
[600]	valid_0's l1: 0.428116
[700]	valid_0's l1: 0.428062
[800]	valid_0's l1: 0.428062
Early stopping, best iteration is:
[683]	valid_0's l1: 0.428062


[I 2025-12-16 21:21:05,632] Trial 8 finished with value: 1176.0913880421645 and parameters: {'num_leaves': 423, 'max_depth': 8, 'min_data_in_leaf': 607, 'learning_rate': 0.028478764407599222, 'feature_fraction': 0.591519266196649, 'feature_fraction_bynode': 0.5384899549143964, 'bagging_fraction': 0.6869254358741304, 'bagging_freq': 2, 'lambda_l1': 9.29697652342573, 'lambda_l2': 40.40601897822085, 'min_gain_to_split': 3.1670187825521174, 'max_bin': 127}. Best is trial 0 with value: 1162.2211620581852.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.444418
[200]	valid_0's l1: 0.43204
[300]	valid_0's l1: 0.430303
[400]	valid_0's l1: 0.430117
[500]	valid_0's l1: 0.430117
Early stopping, best iteration is:
[326]	valid_0's l1: 0.430117


[I 2025-12-16 21:22:08,008] Trial 9 finished with value: 1183.9655849222524 and parameters: {'num_leaves': 94, 'max_depth': 14, 'min_data_in_leaf': 861, 'learning_rate': 0.03209298842662205, 'feature_fraction': 0.8584365199693973, 'feature_fraction_bynode': 0.6590017374859319, 'bagging_fraction': 0.633015577358303, 'bagging_freq': 3, 'lambda_l1': 4.271077886262563, 'lambda_l2': 40.90073829612466, 'min_gain_to_split': 4.303652916281717, 'max_bin': 255}. Best is trial 0 with value: 1162.2211620581852.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.505772
[200]	valid_0's l1: 0.463899
[300]	valid_0's l1: 0.448055
[400]	valid_0's l1: 0.440529
[500]	valid_0's l1: 0.4362
[600]	valid_0's l1: 0.433596
[700]	valid_0's l1: 0.431699
[800]	valid_0's l1: 0.430386
[900]	valid_0's l1: 0.429427
[1000]	valid_0's l1: 0.428671
[1100]	valid_0's l1: 0.428129
[1200]	valid_0's l1: 0.427598
[1300]	valid_0's l1: 0.427168
[1400]	valid_0's l1: 0.426817
[1500]	valid_0's l1: 0.426549
[1600]	valid_0's l1: 0.426298
[1700]	valid_0's l1: 0.426062
[1800]	valid_0's l1: 0.425844
[1900]	valid_0's l1: 0.425637
[2000]	valid_0's l1: 0.425511
[2100]	valid_0's l1: 0.425371
[2200]	valid_0's l1: 0.425188
[2300]	valid_0's l1: 0.42508
[2400]	valid_0's l1: 0.424996
[2500]	valid_0's l1: 0.424914
[2600]	valid_0's l1: 0.424817
[2700]	valid_0's l1: 0.424708
[2800]	valid_0's l1: 0.42469
[2900]	valid_0's l1: 0.42461
[3000]	valid_0's l1: 0.424551
[3100]	valid_0's l1: 0.424501
[3200]	valid_0's l1: 0

[I 2025-12-16 21:30:29,133] Trial 10 finished with value: 1169.7600152285754 and parameters: {'num_leaves': 69, 'max_depth': 11, 'min_data_in_leaf': 2763, 'learning_rate': 0.011978157145787342, 'feature_fraction': 0.5061470949312418, 'feature_fraction_bynode': 0.9538323976412588, 'bagging_fraction': 0.8879312237836259, 'bagging_freq': 10, 'lambda_l1': 6.268836449065676, 'lambda_l2': 49.61813327301685, 'min_gain_to_split': 0.13495737918034806, 'max_bin': 127}. Best is trial 0 with value: 1162.2211620581852.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.494668
[200]	valid_0's l1: 0.452497
[300]	valid_0's l1: 0.43754
[400]	valid_0's l1: 0.431132
[500]	valid_0's l1: 0.42784
[600]	valid_0's l1: 0.425944
[700]	valid_0's l1: 0.424724
[800]	valid_0's l1: 0.423981
[900]	valid_0's l1: 0.423664
[1000]	valid_0's l1: 0.423466
[1100]	valid_0's l1: 0.423298
[1200]	valid_0's l1: 0.423249
[1300]	valid_0's l1: 0.423179
[1400]	valid_0's l1: 0.423169
[1500]	valid_0's l1: 0.423169
Early stopping, best iteration is:
[1316]	valid_0's l1: 0.423168


[I 2025-12-16 21:32:55,256] Trial 11 finished with value: 1159.769907611715 and parameters: {'num_leaves': 113, 'max_depth': 9, 'min_data_in_leaf': 219, 'learning_rate': 0.011385882170463265, 'feature_fraction': 0.7435809445445398, 'feature_fraction_bynode': 0.7435616837189413, 'bagging_fraction': 0.7865261460393744, 'bagging_freq': 7, 'lambda_l1': 0.973601564508952, 'lambda_l2': 17.284687262413335, 'min_gain_to_split': 1.7792832253762318, 'max_bin': 255}. Best is trial 11 with value: 1159.769907611715.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.471777
[200]	valid_0's l1: 0.439536
[300]	valid_0's l1: 0.430074
[400]	valid_0's l1: 0.426219
[500]	valid_0's l1: 0.424269
[600]	valid_0's l1: 0.422922
[700]	valid_0's l1: 0.422018
[800]	valid_0's l1: 0.421513
[900]	valid_0's l1: 0.421233
[1000]	valid_0's l1: 0.420862
[1100]	valid_0's l1: 0.420679
[1200]	valid_0's l1: 0.420474
[1300]	valid_0's l1: 0.420225
[1400]	valid_0's l1: 0.42007
[1500]	valid_0's l1: 0.419998
[1600]	valid_0's l1: 0.419951
[1700]	valid_0's l1: 0.419938
[1800]	valid_0's l1: 0.419888
[1900]	valid_0's l1: 0.419998
Early stopping, best iteration is:
[1780]	valid_0's l1: 0.419871


[I 2025-12-16 21:36:53,190] Trial 12 finished with value: 1152.6475239390618 and parameters: {'num_leaves': 236, 'max_depth': 9, 'min_data_in_leaf': 203, 'learning_rate': 0.015748462517233382, 'feature_fraction': 0.7604063212686378, 'feature_fraction_bynode': 0.7966713343930665, 'bagging_fraction': 0.8106707103422641, 'bagging_freq': 7, 'lambda_l1': 2.721190423876234, 'lambda_l2': 18.412028624412162, 'min_gain_to_split': 0.17783197825737135, 'max_bin': 255}. Best is trial 12 with value: 1152.6475239390618.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.495199
[200]	valid_0's l1: 0.452696
[300]	valid_0's l1: 0.437829
[400]	valid_0's l1: 0.431274
[500]	valid_0's l1: 0.427854
[600]	valid_0's l1: 0.426084
[700]	valid_0's l1: 0.425003
[800]	valid_0's l1: 0.424304
[900]	valid_0's l1: 0.423998
[1000]	valid_0's l1: 0.423808
[1100]	valid_0's l1: 0.42369
[1200]	valid_0's l1: 0.423651
[1300]	valid_0's l1: 0.423651
Early stopping, best iteration is:
[1169]	valid_0's l1: 0.423651


[I 2025-12-16 21:39:15,047] Trial 13 finished with value: 1161.659320869909 and parameters: {'num_leaves': 223, 'max_depth': 9, 'min_data_in_leaf': 207, 'learning_rate': 0.011197966972304333, 'feature_fraction': 0.7836571038453249, 'feature_fraction_bynode': 0.8225430545290794, 'bagging_fraction': 0.8071310846295144, 'bagging_freq': 7, 'lambda_l1': 2.381137453969918, 'lambda_l2': 17.388276728208318, 'min_gain_to_split': 2.2524362569123646, 'max_bin': 255}. Best is trial 12 with value: 1152.6475239390618.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.514829
[200]	valid_0's l1: 0.464823
[300]	valid_0's l1: 0.445489
[400]	valid_0's l1: 0.435987
[500]	valid_0's l1: 0.430821
[600]	valid_0's l1: 0.427942
[700]	valid_0's l1: 0.426122
[800]	valid_0's l1: 0.424695
[900]	valid_0's l1: 0.423814
[1000]	valid_0's l1: 0.423102
[1100]	valid_0's l1: 0.422605
[1200]	valid_0's l1: 0.422151
[1300]	valid_0's l1: 0.421867
[1400]	valid_0's l1: 0.421627
[1500]	valid_0's l1: 0.421458
[1600]	valid_0's l1: 0.421349
[1700]	valid_0's l1: 0.421282
[1800]	valid_0's l1: 0.42123
[1900]	valid_0's l1: 0.421199
[2000]	valid_0's l1: 0.42115
[2100]	valid_0's l1: 0.421102
[2200]	valid_0's l1: 0.421077
[2300]	valid_0's l1: 0.421063
[2400]	valid_0's l1: 0.421022
[2500]	valid_0's l1: 0.420989
[2600]	valid_0's l1: 0.420985
[2700]	valid_0's l1: 0.420985
Early stopping, best iteration is:
[2543]	valid_0's l1: 0.420979


[I 2025-12-16 21:43:12,868] Trial 14 finished with value: 1154.0012911591648 and parameters: {'num_leaves': 285, 'max_depth': 9, 'min_data_in_leaf': 221, 'learning_rate': 0.008505333929195983, 'feature_fraction': 0.7956113143850287, 'feature_fraction_bynode': 0.7997509191284597, 'bagging_fraction': 0.804337885475948, 'bagging_freq': 7, 'lambda_l1': 0.013154101388857864, 'lambda_l2': 4.953749377422664, 'min_gain_to_split': 0.9652749922550536, 'max_bin': 255}. Best is trial 12 with value: 1152.6475239390618.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.517349
[200]	valid_0's l1: 0.466465
[300]	valid_0's l1: 0.445681
[400]	valid_0's l1: 0.436081
[500]	valid_0's l1: 0.430841
[600]	valid_0's l1: 0.427878
[700]	valid_0's l1: 0.426083
[800]	valid_0's l1: 0.424846
[900]	valid_0's l1: 0.423964
[1000]	valid_0's l1: 0.42326
[1100]	valid_0's l1: 0.422782
[1200]	valid_0's l1: 0.422472
[1300]	valid_0's l1: 0.42229
[1400]	valid_0's l1: 0.422157
[1500]	valid_0's l1: 0.422087
[1600]	valid_0's l1: 0.42201
[1700]	valid_0's l1: 0.421945
[1800]	valid_0's l1: 0.421913
[1900]	valid_0's l1: 0.421879
[2000]	valid_0's l1: 0.42186
[2100]	valid_0's l1: 0.42186
Early stopping, best iteration is:
[1944]	valid_0's l1: 0.421859


[I 2025-12-16 21:47:04,059] Trial 15 finished with value: 1157.168801462053 and parameters: {'num_leaves': 295, 'max_depth': 11, 'min_data_in_leaf': 391, 'learning_rate': 0.008170588528931063, 'feature_fraction': 0.8065881129063617, 'feature_fraction_bynode': 0.8421731728173273, 'bagging_fraction': 0.8590211913856, 'bagging_freq': 6, 'lambda_l1': 2.9478382054796044, 'lambda_l2': 4.1052452231967145, 'min_gain_to_split': 1.0351499195586866, 'max_bin': 255}. Best is trial 12 with value: 1152.6475239390618.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.472084
[200]	valid_0's l1: 0.439884
[300]	valid_0's l1: 0.430472
[400]	valid_0's l1: 0.426626
[500]	valid_0's l1: 0.424685
[600]	valid_0's l1: 0.423325
[700]	valid_0's l1: 0.422536
[800]	valid_0's l1: 0.422181
[900]	valid_0's l1: 0.42188
[1000]	valid_0's l1: 0.42173
[1100]	valid_0's l1: 0.421655
[1200]	valid_0's l1: 0.421596
[1300]	valid_0's l1: 0.421533
[1400]	valid_0's l1: 0.421457
[1500]	valid_0's l1: 0.421419
[1600]	valid_0's l1: 0.421353
[1700]	valid_0's l1: 0.421353
Early stopping, best iteration is:
[1583]	valid_0's l1: 0.421353


[I 2025-12-16 21:49:47,016] Trial 16 finished with value: 1154.9390736939624 and parameters: {'num_leaves': 292, 'max_depth': 9, 'min_data_in_leaf': 328, 'learning_rate': 0.015471247486832018, 'feature_fraction': 0.8151511236524632, 'feature_fraction_bynode': 0.9707275717501355, 'bagging_fraction': 0.8456270514246387, 'bagging_freq': 8, 'lambda_l1': 0.1428298676742746, 'lambda_l2': 7.630397023050882, 'min_gain_to_split': 0.7096577573449048, 'max_bin': 255}. Best is trial 12 with value: 1152.6475239390618.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.534942
[200]	valid_0's l1: 0.48624
[300]	valid_0's l1: 0.464101
[400]	valid_0's l1: 0.451422
[500]	valid_0's l1: 0.443851
[600]	valid_0's l1: 0.439076
[700]	valid_0's l1: 0.435826
[800]	valid_0's l1: 0.433432
[900]	valid_0's l1: 0.43172
[1000]	valid_0's l1: 0.430422
[1100]	valid_0's l1: 0.429313
[1200]	valid_0's l1: 0.428481
[1300]	valid_0's l1: 0.427725
[1400]	valid_0's l1: 0.427094
[1500]	valid_0's l1: 0.42646
[1600]	valid_0's l1: 0.426
[1700]	valid_0's l1: 0.425606
[1800]	valid_0's l1: 0.425275
[1900]	valid_0's l1: 0.424933
[2000]	valid_0's l1: 0.424648
[2100]	valid_0's l1: 0.424418
[2200]	valid_0's l1: 0.424224
[2300]	valid_0's l1: 0.424079
[2400]	valid_0's l1: 0.423958
[2500]	valid_0's l1: 0.423809
[2600]	valid_0's l1: 0.423669
[2700]	valid_0's l1: 0.423562
[2800]	valid_0's l1: 0.42343
[2900]	valid_0's l1: 0.423351
[3000]	valid_0's l1: 0.423268
[3100]	valid_0's l1: 0.423186
[3200]	valid_0's l1: 0.4

[I 2025-12-16 21:55:54,937] Trial 17 finished with value: 1158.2783193949426 and parameters: {'num_leaves': 357, 'max_depth': 6, 'min_data_in_leaf': 477, 'learning_rate': 0.007874082416926177, 'feature_fraction': 0.6672616329128297, 'feature_fraction_bynode': 0.7947120850904492, 'bagging_fraction': 0.7428997089918629, 'bagging_freq': 6, 'lambda_l1': 2.6702193253752666, 'lambda_l2': 22.341681796875367, 'min_gain_to_split': 0.5378014060984336, 'max_bin': 255}. Best is trial 12 with value: 1152.6475239390618.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.519777
[200]	valid_0's l1: 0.467155
[300]	valid_0's l1: 0.445639
[400]	valid_0's l1: 0.435673
[500]	valid_0's l1: 0.430281
[600]	valid_0's l1: 0.427286
[700]	valid_0's l1: 0.425576
[800]	valid_0's l1: 0.424407
[900]	valid_0's l1: 0.423637
[1000]	valid_0's l1: 0.423102
[1100]	valid_0's l1: 0.422716
[1200]	valid_0's l1: 0.422463
[1300]	valid_0's l1: 0.42235
[1400]	valid_0's l1: 0.422227
[1500]	valid_0's l1: 0.422157
[1600]	valid_0's l1: 0.422096
[1700]	valid_0's l1: 0.422067
[1800]	valid_0's l1: 0.422058
[1900]	valid_0's l1: 0.422058
Early stopping, best iteration is:
[1760]	valid_0's l1: 0.422053


[I 2025-12-16 21:59:13,469] Trial 18 finished with value: 1157.8246963258237 and parameters: {'num_leaves': 213, 'max_depth': 12, 'min_data_in_leaf': 262, 'learning_rate': 0.007769969817539154, 'feature_fraction': 0.7615645284400153, 'feature_fraction_bynode': 0.8943720345909655, 'bagging_fraction': 0.7721047063967424, 'bagging_freq': 8, 'lambda_l1': 0.14140474515899223, 'lambda_l2': 9.506593017393062, 'min_gain_to_split': 1.3878041045756073, 'max_bin': 255}. Best is trial 12 with value: 1152.6475239390618.


Training until validation scores don't improve for 200 rounds
[100]	valid_0's l1: 0.472496
[200]	valid_0's l1: 0.440257
[300]	valid_0's l1: 0.431065
[400]	valid_0's l1: 0.427633
[500]	valid_0's l1: 0.425942
[600]	valid_0's l1: 0.425219
[700]	valid_0's l1: 0.424954
[800]	valid_0's l1: 0.424877
[900]	valid_0's l1: 0.424852
[1000]	valid_0's l1: 0.424852
Early stopping, best iteration is:
[844]	valid_0's l1: 0.424852


[I 2025-12-16 22:01:01,543] Trial 19 finished with value: 1166.372416866214 and parameters: {'num_leaves': 251, 'max_depth': 10, 'min_data_in_leaf': 500, 'learning_rate': 0.015753242561424064, 'feature_fraction': 0.8281848300150375, 'feature_fraction_bynode': 0.685840191091291, 'bagging_fraction': 0.8303921550345197, 'bagging_freq': 5, 'lambda_l1': 3.606445241280038, 'lambda_l2': 22.71013606420069, 'min_gain_to_split': 2.2686329296463446, 'max_bin': 255}. Best is trial 12 with value: 1152.6475239390618.


Best MAE:  1152.6475239390618
Best params:  {'num_leaves': 236, 'max_depth': 9, 'min_data_in_leaf': 203, 'learning_rate': 0.015748462517233382, 'feature_fraction': 0.7604063212686378, 'feature_fraction_bynode': 0.7966713343930665, 'bagging_fraction': 0.8106707103422641, 'bagging_freq': 7, 'lambda_l1': 2.721190423876234, 'lambda_l2': 18.412028624412162, 'min_gain_to_split': 0.17783197825737135, 'max_bin': 255}


In [None]:
'''
Run 20 trials
Best MAE:  1152.6475239390618
Best params:  
{'num_leaves': 236, 
 'max_depth': 9, 
 'min_data_in_leaf': 203, 
 'learning_rate': 0.015748462517233382, 
 'feature_fraction': 0.7604063212686378, 
 'feature_fraction_bynode': 0.7966713343930665, 
 'bagging_fraction': 0.8106707103422641, 
 'bagging_freq': 7, 
 'lambda_l1': 2.721190423876234, 
 'lambda_l2': 18.412028624412162, 
 'min_gain_to_split': 0.17783197825737135, 
 'max_bin': 255}
'''


'''
# Run 40 trials
Early stopping, best iteration is:
[1400]	valid_0's l1: 0.416135
[I 2025-12-13 01:03:05,485] Trial 36 finished with value: 1140.1649585398006 and parameters: {'num_leaves': 404, 'max_depth': 8, 'min_data_in_leaf': 202, 'learning_rate': 0.02361306858113298, 'feature_fraction': 0.7211579800007945, 'feature_fraction_bynode': 0.9081132434036039, 'bagging_fraction': 0.8727967232517579, 'bagging_freq': 7, 'lambda_l1': 5.763583420124947, 'lambda_l2': 14.460054311753241, 'min_gain_to_split': 0.032912218537174215, 'max_bin': 127}. Best is trial 36 with value: 1140.1649585398006.

# Best params 
# MAE: 1140.1649585398006 
# trial 36 (40 trials)
{'num_leaves': 404, 
 'max_depth': 8, 
 'min_data_in_leaf': 202, 
 'learning_rate': 0.02361306858113298, 
 'feature_fraction': 0.7211579800007945, 
 'feature_fraction_bynode': 0.9081132434036039, 
 'bagging_fraction': 0.8727967232517579, 
 'bagging_freq': 7, 
 'lambda_l1': 5.763583420124947, 
 'lambda_l2': 14.460054311753241, 
 'min_gain_to_split': 0.032912218537174215, 
 'max_bin': 127}
'''
