In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

from src.models.tuning import tune_model, tune_and_evaluate, build_best_pipeline
from src.data_loader import load_data, prepare_data


# BASELINE

In [2]:
df = load_data('../home-credit-default-risk', handle_outliers=False, add_features=False, merge_bureau=False, merge_previous=False)

df['TARGET'].value_counts(normalize=True)

df shape: (307511, 122)


TARGET
0    0.919271
1    0.080729
Name: proportion, dtype: float64

In [4]:
X1, y1 = prepare_data(df, encode=True)
X2, y2 = prepare_data(df, encode=False)

## Logistic Regression


In [5]:
results_lr = tune_and_evaluate(
    X1, y1,
    model_type='logistic',
    n_trials=10,
    n_folds=3
)

[32m[I 2026-02-08 18:24:39,692][0m A new study created in memory with name: no-name-e54a8bd7-1701-4362-8261-9ff70e1ceade[0m
Best trial: 0. Best value: 0.743422:  10%|█         | 1/10 [00:12<01:55, 12.80s/it]

[32m[I 2026-02-08 18:24:52,494][0m Trial 0 finished with value: 0.7434221280117628 and parameters: {'C': 0.0074593432857265485, 'solver': 'lbfgs', 'class_weight': None}. Best is trial 0 with value: 0.7434221280117628.[0m


Best trial: 0. Best value: 0.743422:  20%|██        | 2/10 [01:20<06:01, 45.21s/it]

[32m[I 2026-02-08 18:26:00,392][0m Trial 1 finished with value: 0.7426671757199689 and parameters: {'C': 0.000602521573620386, 'solver': 'saga', 'class_weight': 'balanced'}. Best is trial 0 with value: 0.7434221280117628.[0m


Best trial: 0. Best value: 0.743422:  30%|███       | 3/10 [01:31<03:25, 29.38s/it]

[32m[I 2026-02-08 18:26:10,931][0m Trial 2 finished with value: 0.7396760800241832 and parameters: {'C': 0.00012674255898937226, 'solver': 'lbfgs', 'class_weight': None}. Best is trial 0 with value: 0.7434221280117628.[0m


Best trial: 0. Best value: 0.743422:  40%|████      | 4/10 [02:53<05:01, 50.17s/it]

[32m[I 2026-02-08 18:27:32,985][0m Trial 3 finished with value: 0.7421311355786319 and parameters: {'C': 0.0008260808399079611, 'solver': 'saga', 'class_weight': None}. Best is trial 0 with value: 0.7434221280117628.[0m


Best trial: 4. Best value: 0.744255:  50%|█████     | 5/10 [08:08<12:08, 145.63s/it]

[32m[I 2026-02-08 18:32:47,871][0m Trial 4 finished with value: 0.7442553457855022 and parameters: {'C': 0.11462107403425033, 'solver': 'saga', 'class_weight': 'balanced'}. Best is trial 4 with value: 0.7442553457855022.[0m


Best trial: 4. Best value: 0.744255:  60%|██████    | 6/10 [13:49<14:08, 212.13s/it]

[32m[I 2026-02-08 18:38:29,096][0m Trial 5 finished with value: 0.7439157486719118 and parameters: {'C': 0.8431013932082461, 'solver': 'saga', 'class_weight': None}. Best is trial 4 with value: 0.7442553457855022.[0m


Best trial: 6. Best value: 0.74468:  70%|███████   | 7/10 [14:11<07:29, 149.91s/it] 

[32m[I 2026-02-08 18:38:50,889][0m Trial 6 finished with value: 0.7446802447124606 and parameters: {'C': 0.10907475835157694, 'solver': 'lbfgs', 'class_weight': 'balanced'}. Best is trial 6 with value: 0.7446802447124606.[0m


Best trial: 6. Best value: 0.74468:  80%|████████  | 8/10 [14:23<03:32, 106.25s/it]

[32m[I 2026-02-08 18:39:03,656][0m Trial 7 finished with value: 0.7440255207531398 and parameters: {'C': 1.1015056790269626, 'solver': 'lbfgs', 'class_weight': None}. Best is trial 6 with value: 0.7446802447124606.[0m


Best trial: 6. Best value: 0.74468:  90%|█████████ | 9/10 [14:34<01:16, 76.35s/it] 

[32m[I 2026-02-08 18:39:14,262][0m Trial 8 finished with value: 0.7417566370934523 and parameters: {'C': 0.0004075596440072873, 'solver': 'lbfgs', 'class_weight': None}. Best is trial 6 with value: 0.7446802447124606.[0m


Best trial: 6. Best value: 0.74468: 100%|██████████| 10/10 [19:44<00:00, 118.42s/it]


[32m[I 2026-02-08 18:44:23,913][0m Trial 9 finished with value: 0.7439048876891917 and parameters: {'C': 0.20540519425388448, 'solver': 'saga', 'class_weight': None}. Best is trial 6 with value: 0.7446802447124606.[0m
Best params: {'C': 0.10907475835157694, 'solver': 'lbfgs', 'class_weight': 'balanced'}
Results:
Train ROC-AUC: 0.7489
Test ROC-AUC:  0.7486


## Random Forest

In [8]:
results_rf = tune_and_evaluate(
    X1, y1,
    model_type='random_forest',
    n_trials=10,
    n_folds=3
)

[32m[I 2026-02-08 18:53:56,412][0m A new study created in memory with name: no-name-512b625c-4ddb-4bdf-b933-bf42849b6e0e[0m
Best trial: 0. Best value: 0.733714:  10%|█         | 1/10 [00:52<07:53, 52.59s/it]

[32m[I 2026-02-08 18:54:49,003][0m Trial 0 finished with value: 0.7337138784248226 and parameters: {'n_estimators': 144, 'max_depth': 20, 'min_samples_split': 15, 'min_samples_leaf': 6, 'class_weight': None}. Best is trial 0 with value: 0.7337138784248226.[0m


Best trial: 0. Best value: 0.733714:  20%|██        | 2/10 [01:18<04:57, 37.17s/it]

[32m[I 2026-02-08 18:55:15,383][0m Trial 1 finished with value: 0.7210728047557916 and parameters: {'n_estimators': 64, 'max_depth': 18, 'min_samples_split': 13, 'min_samples_leaf': 8, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.7337138784248226.[0m


Best trial: 0. Best value: 0.733714:  30%|███       | 3/10 [01:59<04:29, 38.52s/it]

[32m[I 2026-02-08 18:55:55,503][0m Trial 2 finished with value: 0.7220203271505139 and parameters: {'n_estimators': 258, 'max_depth': 6, 'min_samples_split': 5, 'min_samples_leaf': 2, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.7337138784248226.[0m


Best trial: 0. Best value: 0.733714:  40%|████      | 4/10 [02:33<03:41, 36.87s/it]

[32m[I 2026-02-08 18:56:29,836][0m Trial 3 finished with value: 0.7251203865534587 and parameters: {'n_estimators': 158, 'max_depth': 8, 'min_samples_split': 13, 'min_samples_leaf': 2, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.7337138784248226.[0m


Best trial: 4. Best value: 0.735803:  50%|█████     | 5/10 [03:25<03:31, 42.26s/it]

[32m[I 2026-02-08 18:57:21,650][0m Trial 4 finished with value: 0.7358026096391806 and parameters: {'n_estimators': 164, 'max_depth': 17, 'min_samples_split': 5, 'min_samples_leaf': 6, 'class_weight': None}. Best is trial 4 with value: 0.7358026096391806.[0m


Best trial: 4. Best value: 0.735803:  60%|██████    | 6/10 [03:55<02:32, 38.09s/it]

[32m[I 2026-02-08 18:57:51,664][0m Trial 5 finished with value: 0.7239187739794124 and parameters: {'n_estimators': 202, 'max_depth': 6, 'min_samples_split': 3, 'min_samples_leaf': 10, 'class_weight': None}. Best is trial 4 with value: 0.7358026096391806.[0m


Best trial: 4. Best value: 0.735803:  70%|███████   | 7/10 [04:12<01:34, 31.38s/it]

[32m[I 2026-02-08 18:58:09,210][0m Trial 6 finished with value: 0.7146369945377583 and parameters: {'n_estimators': 126, 'max_depth': 4, 'min_samples_split': 15, 'min_samples_leaf': 5, 'class_weight': 'balanced'}. Best is trial 4 with value: 0.7358026096391806.[0m


Best trial: 4. Best value: 0.735803:  80%|████████  | 8/10 [04:38<00:59, 29.67s/it]

[32m[I 2026-02-08 18:58:35,212][0m Trial 7 finished with value: 0.7203285135226526 and parameters: {'n_estimators': 58, 'max_depth': 19, 'min_samples_split': 6, 'min_samples_leaf': 7, 'class_weight': 'balanced'}. Best is trial 4 with value: 0.7358026096391806.[0m


Best trial: 4. Best value: 0.735803:  90%|█████████ | 9/10 [05:12<00:30, 30.90s/it]

[32m[I 2026-02-08 18:59:08,835][0m Trial 8 finished with value: 0.7240372051907237 and parameters: {'n_estimators': 187, 'max_depth': 6, 'min_samples_split': 20, 'min_samples_leaf': 8, 'class_weight': None}. Best is trial 4 with value: 0.7358026096391806.[0m


Best trial: 4. Best value: 0.735803: 100%|██████████| 10/10 [06:07<00:00, 36.78s/it]


[32m[I 2026-02-08 19:00:04,201][0m Trial 9 finished with value: 0.7125143758411966 and parameters: {'n_estimators': 200, 'max_depth': 19, 'min_samples_split': 3, 'min_samples_leaf': 2, 'class_weight': 'balanced'}. Best is trial 4 with value: 0.7358026096391806.[0m
Best params: {'n_estimators': 164, 'max_depth': 17, 'min_samples_split': 5, 'min_samples_leaf': 6, 'class_weight': None}
Results:
Train ROC-AUC: 0.9296
Test ROC-AUC:  0.7392


## Gradient Boosting

In [9]:
results_gb = tune_and_evaluate(
    X1, y1,
    model_type='gradient_boosting',
    n_trials=10,
    n_folds=3,
    timeout=18000
)

[32m[I 2026-02-08 19:27:17,782][0m A new study created in memory with name: no-name-ab5f8f90-5dbc-4a4f-9425-b7cc6bab3599[0m
Best trial: 0. Best value: 0.745027:  10%|█         | 1/10 [05:31<49:40, 331.13s/it, 331.13/18000 seconds]

[32m[I 2026-02-08 19:32:48,909][0m Trial 0 finished with value: 0.7450271972306598 and parameters: {'n_estimators': 106, 'max_depth': 8, 'learning_rate': 0.1205712628744377, 'subsample': 0.8394633936788146, 'min_samples_split': 4}. Best is trial 0 with value: 0.7450271972306598.[0m


Best trial: 1. Best value: 0.748183:  20%|██        | 2/10 [06:34<23:09, 173.67s/it, 394.57/18000 seconds]

[32m[I 2026-02-08 19:33:52,355][0m Trial 1 finished with value: 0.7481832448120578 and parameters: {'n_estimators': 73, 'max_depth': 2, 'learning_rate': 0.19030368381735815, 'subsample': 0.8404460046972835, 'min_samples_split': 15}. Best is trial 1 with value: 0.7481832448120578.[0m


Best trial: 1. Best value: 0.748183:  30%|███       | 3/10 [09:15<19:33, 167.64s/it, 555.04/18000 seconds]

[32m[I 2026-02-08 19:36:32,818][0m Trial 2 finished with value: 0.7423737467236086 and parameters: {'n_estimators': 53, 'max_depth': 8, 'learning_rate': 0.16967533607196555, 'subsample': 0.6849356442713105, 'min_samples_split': 5}. Best is trial 1 with value: 0.7481832448120578.[0m


Best trial: 1. Best value: 0.748183:  40%|████      | 4/10 [11:12<14:47, 147.90s/it, 672.69/18000 seconds]

[32m[I 2026-02-08 19:38:30,468][0m Trial 3 finished with value: 0.7448621014173283 and parameters: {'n_estimators': 77, 'max_depth': 4, 'learning_rate': 0.05958389350068958, 'subsample': 0.7727780074568463, 'min_samples_split': 7}. Best is trial 1 with value: 0.7481832448120578.[0m


Best trial: 1. Best value: 0.748183:  50%|█████     | 5/10 [13:05<11:16, 135.21s/it, 785.40/18000 seconds]

[32m[I 2026-02-08 19:40:23,180][0m Trial 4 finished with value: 0.7311054955839662 and parameters: {'n_estimators': 142, 'max_depth': 2, 'learning_rate': 0.027010527749605478, 'subsample': 0.7465447373174767, 'min_samples_split': 10}. Best is trial 1 with value: 0.7481832448120578.[0m


Best trial: 5. Best value: 0.749902:  60%|██████    | 6/10 [16:13<10:12, 153.12s/it, 973.29/18000 seconds]

[32m[I 2026-02-08 19:43:31,074][0m Trial 5 finished with value: 0.7499016310135539 and parameters: {'n_estimators': 168, 'max_depth': 3, 'learning_rate': 0.05748924681991978, 'subsample': 0.836965827544817, 'min_samples_split': 2}. Best is trial 5 with value: 0.7499016310135539.[0m


Best trial: 5. Best value: 0.749902:  70%|███████   | 7/10 [19:23<08:15, 165.19s/it, 1163.33/18000 seconds]

[32m[I 2026-02-08 19:46:41,107][0m Trial 6 finished with value: 0.7229859933086384 and parameters: {'n_estimators': 141, 'max_depth': 3, 'learning_rate': 0.012476394272569451, 'subsample': 0.9795542149013333, 'min_samples_split': 20}. Best is trial 5 with value: 0.7499016310135539.[0m


Best trial: 5. Best value: 0.749902:  80%|████████  | 8/10 [23:51<06:36, 198.06s/it, 1431.77/18000 seconds]

[32m[I 2026-02-08 19:51:09,556][0m Trial 7 finished with value: 0.7351708110741525 and parameters: {'n_estimators': 172, 'max_depth': 4, 'learning_rate': 0.013940346079873234, 'subsample': 0.8736932106048627, 'min_samples_split': 10}. Best is trial 5 with value: 0.7499016310135539.[0m


Best trial: 5. Best value: 0.749902:  90%|█████████ | 9/10 [26:22<03:03, 183.19s/it, 1582.25/18000 seconds]

[32m[I 2026-02-08 19:53:40,030][0m Trial 8 finished with value: 0.7228715668728096 and parameters: {'n_estimators': 68, 'max_depth': 5, 'learning_rate': 0.011240768803005551, 'subsample': 0.9637281608315128, 'min_samples_split': 6}. Best is trial 5 with value: 0.7499016310135539.[0m


Best trial: 9. Best value: 0.751437: 100%|██████████| 10/10 [29:57<00:00, 179.76s/it, 1797.59/18000 seconds]


[32m[I 2026-02-08 19:57:15,369][0m Trial 9 finished with value: 0.7514372772956811 and parameters: {'n_estimators': 150, 'max_depth': 4, 'learning_rate': 0.05864129169696527, 'subsample': 0.8186841117373118, 'min_samples_split': 5}. Best is trial 9 with value: 0.7514372772956811.[0m
Best params: {'n_estimators': 150, 'max_depth': 4, 'learning_rate': 0.05864129169696527, 'subsample': 0.8186841117373118, 'min_samples_split': 5}
Results:
Train ROC-AUC: 0.7675
Test ROC-AUC:  0.7558


## XGBoost

In [4]:
results_xgb = tune_and_evaluate(
    X2, y2,
    model_type='xgboost',
    n_trials=30,
    n_folds=3,
    use_gpu=True
)


[32m[I 2026-02-08 19:17:11,489][0m A new study created in memory with name: no-name-13656814-7ca4-4d82-9112-ea54b3735153[0m
Best trial: 0. Best value: 0.691968:   3%|▎         | 1/30 [00:12<05:54, 12.23s/it]

[32m[I 2026-02-08 19:17:23,721][0m Trial 0 finished with value: 0.6919678021003062 and parameters: {'n_estimators': 144, 'max_depth': 10, 'learning_rate': 0.1205712628744377, 'subsample': 0.8394633936788146, 'colsample_bytree': 0.6624074561769746, 'reg_alpha': 2.5348407664333426e-07, 'reg_lambda': 3.3323645788192616e-08, 'scale_pos_weight': 13.126466040849092}. Best is trial 0 with value: 0.6919678021003062.[0m


Best trial: 1. Best value: 0.741974:   7%|▋         | 2/30 [00:22<05:06, 10.94s/it]

[32m[I 2026-02-08 19:17:33,757][0m Trial 1 finished with value: 0.7419738683374254 and parameters: {'n_estimators': 200, 'max_depth': 8, 'learning_rate': 0.010725209743171996, 'subsample': 0.9879639408647978, 'colsample_bytree': 0.9329770563201687, 'reg_alpha': 8.148018307012941e-07, 'reg_lambda': 4.329370014459266e-07, 'scale_pos_weight': 3.5676631379480734}. Best is trial 1 with value: 0.7419738683374254.[0m


Best trial: 2. Best value: 0.748108:  10%|█         | 3/30 [00:26<03:38,  8.10s/it]

[32m[I 2026-02-08 19:17:38,483][0m Trial 2 finished with value: 0.74810782522624 and parameters: {'n_estimators': 126, 'max_depth': 6, 'learning_rate': 0.04345454109729477, 'subsample': 0.7164916560792167, 'colsample_bytree': 0.8447411578889518, 'reg_alpha': 1.8007140198129195e-07, 'reg_lambda': 4.258943089524393e-06, 'scale_pos_weight': 6.129065806111684}. Best is trial 2 with value: 0.74810782522624.[0m


Best trial: 2. Best value: 0.748108:  13%|█▎        | 4/30 [00:38<04:01,  9.28s/it]

[32m[I 2026-02-08 19:17:49,578][0m Trial 3 finished with value: 0.7453212217085277 and parameters: {'n_estimators': 164, 'max_depth': 9, 'learning_rate': 0.019721610970574007, 'subsample': 0.8056937753654446, 'colsample_bytree': 0.836965827544817, 'reg_alpha': 2.6185068507773707e-08, 'reg_lambda': 0.0029369981104377003, 'scale_pos_weight': 3.3873377316220816}. Best is trial 2 with value: 0.74810782522624.[0m


Best trial: 2. Best value: 0.748108:  17%|█▋        | 5/30 [00:44<03:25,  8.23s/it]

[32m[I 2026-02-08 19:17:55,947][0m Trial 4 finished with value: 0.6845716755170833 and parameters: {'n_estimators': 66, 'max_depth': 10, 'learning_rate': 0.26690431824362526, 'subsample': 0.9233589392465844, 'colsample_bytree': 0.7218455076693483, 'reg_alpha': 7.569183361880229e-08, 'reg_lambda': 0.014391207615728067, 'scale_pos_weight': 7.162134912354418}. Best is trial 2 with value: 0.74810782522624.[0m


Best trial: 2. Best value: 0.748108:  20%|██        | 6/30 [00:48<02:40,  6.70s/it]

[32m[I 2026-02-08 19:17:59,671][0m Trial 5 finished with value: 0.7372323260620363 and parameters: {'n_estimators': 80, 'max_depth': 6, 'learning_rate': 0.011240768803005551, 'subsample': 0.9637281608315128, 'colsample_bytree': 0.7035119926400067, 'reg_alpha': 0.009176996354542699, 'reg_lambda': 6.388511557344611e-06, 'scale_pos_weight': 8.280952296489351}. Best is trial 2 with value: 0.74810782522624.[0m


Best trial: 2. Best value: 0.748108:  23%|██▎       | 7/30 [00:52<02:12,  5.76s/it]

[32m[I 2026-02-08 19:18:03,504][0m Trial 6 finished with value: 0.7464379249480354 and parameters: {'n_estimators': 187, 'max_depth': 3, 'learning_rate': 0.27051668818999286, 'subsample': 0.9100531293444458, 'colsample_bytree': 0.9757995766256756, 'reg_alpha': 1.1309571585271483, 'reg_lambda': 0.002404915432737351, 'scale_pos_weight': 13.906239290323636}. Best is trial 2 with value: 0.74810782522624.[0m


Best trial: 2. Best value: 0.748108:  27%|██▋       | 8/30 [00:54<01:45,  4.80s/it]

[32m[I 2026-02-08 19:18:06,230][0m Trial 7 finished with value: 0.7227015651534051 and parameters: {'n_estimators': 72, 'max_depth': 3, 'learning_rate': 0.011662890273931383, 'subsample': 0.7301321323053057, 'colsample_bytree': 0.7554709158757928, 'reg_alpha': 2.7678419414850017e-06, 'reg_lambda': 0.28749982347407854, 'scale_pos_weight': 5.99454657371025}. Best is trial 2 with value: 0.74810782522624.[0m


Best trial: 2. Best value: 0.748108:  30%|███       | 9/30 [00:59<01:39,  4.73s/it]

[32m[I 2026-02-08 19:18:10,821][0m Trial 8 finished with value: 0.7450289144328561 and parameters: {'n_estimators': 120, 'max_depth': 6, 'learning_rate': 0.016149614799999188, 'subsample': 0.9208787923016158, 'colsample_bytree': 0.6298202574719083, 'reg_alpha': 7.620481786158549, 'reg_lambda': 0.08916674715636537, 'scale_pos_weight': 3.7820195414784137}. Best is trial 2 with value: 0.74810782522624.[0m


Best trial: 2. Best value: 0.748108:  33%|███▎      | 10/30 [01:04<01:34,  4.72s/it]

[32m[I 2026-02-08 19:18:15,516][0m Trial 9 finished with value: 0.7387981462709319 and parameters: {'n_estimators': 51, 'max_depth': 9, 'learning_rate': 0.11069143219393454, 'subsample': 0.8916028672163949, 'colsample_bytree': 0.9085081386743783, 'reg_alpha': 4.638759594322625e-08, 'reg_lambda': 1.683416412018213e-05, 'scale_pos_weight': 2.622166833351816}. Best is trial 2 with value: 0.74810782522624.[0m


Best trial: 10. Best value: 0.751874:  37%|███▋      | 11/30 [01:10<01:38,  5.20s/it]

[32m[I 2026-02-08 19:18:21,798][0m Trial 10 finished with value: 0.7518740868073986 and parameters: {'n_estimators': 267, 'max_depth': 5, 'learning_rate': 0.03504750508385009, 'subsample': 0.6071847502459279, 'colsample_bytree': 0.8277250010609204, 'reg_alpha': 9.721161790477586e-05, 'reg_lambda': 4.3444691085504035, 'scale_pos_weight': 10.336015851156098}. Best is trial 10 with value: 0.7518740868073986.[0m


Best trial: 10. Best value: 0.751874:  40%|████      | 12/30 [01:16<01:41,  5.63s/it]

[32m[I 2026-02-08 19:18:28,405][0m Trial 11 finished with value: 0.7518141680265519 and parameters: {'n_estimators': 287, 'max_depth': 5, 'learning_rate': 0.03529010699062179, 'subsample': 0.604153251963001, 'colsample_bytree': 0.83445433467569, 'reg_alpha': 8.113466471626533e-05, 'reg_lambda': 4.188416507348491, 'scale_pos_weight': 10.217724595455632}. Best is trial 10 with value: 0.7518740868073986.[0m


Best trial: 12. Best value: 0.752197:  43%|████▎     | 13/30 [01:23<01:41,  5.94s/it]

[32m[I 2026-02-08 19:18:35,069][0m Trial 12 finished with value: 0.7521970565263775 and parameters: {'n_estimators': 294, 'max_depth': 5, 'learning_rate': 0.03913347433912368, 'subsample': 0.6014245292403246, 'colsample_bytree': 0.7957611591204394, 'reg_alpha': 0.00018660551793946956, 'reg_lambda': 8.964592852736427, 'scale_pos_weight': 10.597772834842873}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  47%|████▋     | 14/30 [01:29<01:34,  5.88s/it]

[32m[I 2026-02-08 19:18:40,811][0m Trial 13 finished with value: 0.7515897918427186 and parameters: {'n_estimators': 298, 'max_depth': 4, 'learning_rate': 0.026538079082973228, 'subsample': 0.6038854234694345, 'colsample_bytree': 0.7806930580648809, 'reg_alpha': 0.00043125458575778174, 'reg_lambda': 9.868745464853728, 'scale_pos_weight': 11.019060309495242}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  50%|█████     | 15/30 [01:33<01:18,  5.26s/it]

[32m[I 2026-02-08 19:18:44,638][0m Trial 14 finished with value: 0.7510098070735368 and parameters: {'n_estimators': 244, 'max_depth': 2, 'learning_rate': 0.07172884676045523, 'subsample': 0.6638123593227387, 'colsample_bytree': 0.8919339318126805, 'reg_alpha': 2.6369000442220666e-05, 'reg_lambda': 0.6255688317185157, 'scale_pos_weight': 11.063681116565661}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  53%|█████▎    | 16/30 [01:41<01:27,  6.22s/it]

[32m[I 2026-02-08 19:18:53,078][0m Trial 15 finished with value: 0.7359062415298814 and parameters: {'n_estimators': 249, 'max_depth': 7, 'learning_rate': 0.06621129530220976, 'subsample': 0.6732963685669237, 'colsample_bytree': 0.7977779145544659, 'reg_alpha': 0.0027351317703316587, 'reg_lambda': 0.00016097275579289827, 'scale_pos_weight': 9.020121876280399}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  57%|█████▋    | 17/30 [01:46<01:16,  5.90s/it]

[32m[I 2026-02-08 19:18:58,253][0m Trial 16 finished with value: 0.7509706885568855 and parameters: {'n_estimators': 251, 'max_depth': 4, 'learning_rate': 0.02974282541944059, 'subsample': 0.6509446781447307, 'colsample_bytree': 0.8682180570419545, 'reg_alpha': 0.04438659029937832, 'reg_lambda': 1.4318951998429494, 'scale_pos_weight': 12.589021632405565}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  60%|██████    | 18/30 [01:52<01:09,  5.77s/it]

[32m[I 2026-02-08 19:19:03,698][0m Trial 17 finished with value: 0.7501026895276549 and parameters: {'n_estimators': 221, 'max_depth': 5, 'learning_rate': 0.05001497891412072, 'subsample': 0.7542495483658851, 'colsample_bytree': 0.737408532052827, 'reg_alpha': 1.1856345719368987e-05, 'reg_lambda': 0.03838632001218306, 'scale_pos_weight': 14.501943841302138}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  63%|██████▎   | 19/30 [02:01<01:14,  6.82s/it]

[32m[I 2026-02-08 19:19:12,960][0m Trial 18 finished with value: 0.7307940681686452 and parameters: {'n_estimators': 276, 'max_depth': 7, 'learning_rate': 0.0951514173164598, 'subsample': 0.6411743739750013, 'colsample_bytree': 0.9948521342166293, 'reg_alpha': 0.00038524739333817054, 'reg_lambda': 9.86383315576634, 'scale_pos_weight': 9.79258713403182}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  67%|██████▋   | 20/30 [02:07<01:06,  6.69s/it]

[32m[I 2026-02-08 19:19:19,354][0m Trial 19 finished with value: 0.7500087917676651 and parameters: {'n_estimators': 269, 'max_depth': 5, 'learning_rate': 0.0213028335809897, 'subsample': 0.7170053519884164, 'colsample_bytree': 0.6899007624651141, 'reg_alpha': 0.11041381200290849, 'reg_lambda': 0.23378225464218577, 'scale_pos_weight': 11.60779447050669}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  70%|███████   | 21/30 [02:12<00:54,  6.09s/it]

[32m[I 2026-02-08 19:19:24,055][0m Trial 20 finished with value: 0.7452785073001552 and parameters: {'n_estimators': 213, 'max_depth': 4, 'learning_rate': 0.16522729299476804, 'subsample': 0.7708945864650332, 'colsample_bytree': 0.7727431143998708, 'reg_alpha': 0.00200934417921391, 'reg_lambda': 0.000421628945103269, 'scale_pos_weight': 7.710944483463397}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  73%|███████▎  | 22/30 [02:19<00:50,  6.31s/it]

[32m[I 2026-02-08 19:19:30,869][0m Trial 21 finished with value: 0.751773663666838 and parameters: {'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.03743112490762556, 'subsample': 0.6010004902112124, 'colsample_bytree': 0.8133558461852735, 'reg_alpha': 3.776024071123684e-05, 'reg_lambda': 1.473121668307284, 'scale_pos_weight': 9.909177051171401}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  77%|███████▋  | 23/30 [02:25<00:44,  6.34s/it]

[32m[I 2026-02-08 19:19:37,294][0m Trial 22 finished with value: 0.7513145494741659 and parameters: {'n_estimators': 281, 'max_depth': 5, 'learning_rate': 0.03337909106639233, 'subsample': 0.6316111560388971, 'colsample_bytree': 0.8351640913720344, 'reg_alpha': 0.00017081041887644172, 'reg_lambda': 2.3690864673229264, 'scale_pos_weight': 12.077242245980063}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  80%|████████  | 24/30 [02:33<00:40,  6.80s/it]

[32m[I 2026-02-08 19:19:45,154][0m Trial 23 finished with value: 0.7437383791292942 and parameters: {'n_estimators': 230, 'max_depth': 7, 'learning_rate': 0.057301259205196305, 'subsample': 0.6956509797458579, 'colsample_bytree': 0.8635159097607085, 'reg_alpha': 3.745057422867207e-06, 'reg_lambda': 9.926104950646856, 'scale_pos_weight': 10.289210532615442}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  83%|████████▎ | 25/30 [02:38<00:30,  6.13s/it]

[32m[I 2026-02-08 19:19:49,740][0m Trial 24 finished with value: 0.7484140402049855 and parameters: {'n_estimators': 266, 'max_depth': 3, 'learning_rate': 0.02602708037584956, 'subsample': 0.6265920588930738, 'colsample_bytree': 0.8171070516400449, 'reg_alpha': 0.0002313107347745387, 'reg_lambda': 0.02288040554490327, 'scale_pos_weight': 9.069477731824861}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  87%|████████▋ | 26/30 [02:43<00:23,  5.95s/it]

[32m[I 2026-02-08 19:19:55,245][0m Trial 25 finished with value: 0.7517376452542344 and parameters: {'n_estimators': 286, 'max_depth': 4, 'learning_rate': 0.041585192799241895, 'subsample': 0.6827194859189651, 'colsample_bytree': 0.9259308962023541, 'reg_alpha': 9.055527068146866e-05, 'reg_lambda': 0.15809714716013953, 'scale_pos_weight': 13.216916306770374}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  90%|█████████ | 27/30 [02:51<00:19,  6.37s/it]

[32m[I 2026-02-08 19:20:02,592][0m Trial 26 finished with value: 0.7484931014299194 and parameters: {'n_estimators': 258, 'max_depth': 6, 'learning_rate': 0.015488286042220063, 'subsample': 0.6039243966604406, 'colsample_bytree': 0.8768189353475673, 'reg_alpha': 0.0019294110565580363, 'reg_lambda': 1.8898606990715678, 'scale_pos_weight': 6.666066354183508}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  93%|█████████▎| 28/30 [02:54<00:11,  5.60s/it]

[32m[I 2026-02-08 19:20:06,407][0m Trial 27 finished with value: 0.7518540593251051 and parameters: {'n_estimators': 238, 'max_depth': 2, 'learning_rate': 0.08335430730290301, 'subsample': 0.6314807297199991, 'colsample_bytree': 0.7885553567345974, 'reg_alpha': 8.058360986958853e-06, 'reg_lambda': 0.007480795461140197, 'scale_pos_weight': 4.780482359689908}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197:  97%|█████████▋| 29/30 [02:58<00:05,  5.07s/it]

[32m[I 2026-02-08 19:20:10,230][0m Trial 28 finished with value: 0.7518827992356011 and parameters: {'n_estimators': 236, 'max_depth': 2, 'learning_rate': 0.08004618375644801, 'subsample': 0.6478250388224, 'colsample_bytree': 0.7600457533490707, 'reg_alpha': 7.247150704332303e-06, 'reg_lambda': 0.002386841296024583, 'scale_pos_weight': 1.3546003339046369}. Best is trial 12 with value: 0.7521970565263775.[0m


Best trial: 12. Best value: 0.752197: 100%|██████████| 30/30 [03:02<00:00,  6.07s/it]


[32m[I 2026-02-08 19:20:13,536][0m Trial 29 finished with value: 0.7459590179025568 and parameters: {'n_estimators': 172, 'max_depth': 2, 'learning_rate': 0.0537986140995045, 'subsample': 0.8522957041892277, 'colsample_bytree': 0.6477639824328912, 'reg_alpha': 1.4536897449346058e-06, 'reg_lambda': 3.0439381964747143e-07, 'scale_pos_weight': 1.802494490396012}. Best is trial 12 with value: 0.7521970565263775.[0m
Best params: {'n_estimators': 294, 'max_depth': 5, 'learning_rate': 0.03913347433912368, 'subsample': 0.6014245292403246, 'colsample_bytree': 0.7957611591204394, 'reg_alpha': 0.00018660551793946956, 'reg_lambda': 8.964592852736427, 'scale_pos_weight': 10.597772834842873}
Results:
Train ROC-AUC: 0.7973
Test ROC-AUC:  0.7574


## LightGBM

In [5]:
results_lgbm = tune_and_evaluate(
    X2, y2,
    model_type='lightgbm',
    n_trials=30,
    n_folds=3,
    use_gpu=True
)


[32m[I 2026-02-08 19:20:31,207][0m A new study created in memory with name: no-name-03c044b4-bab9-44d6-8485-efa5dde577ff[0m
Best trial: 0. Best value: 0.74067:   3%|▎         | 1/30 [00:18<08:43, 18.04s/it]

[32m[I 2026-02-08 19:20:49,245][0m Trial 0 finished with value: 0.7406697619908954 and parameters: {'n_estimators': 144, 'max_depth': 15, 'learning_rate': 0.1205712628744377, 'num_leaves': 98, 'subsample': 0.6624074561769746, 'colsample_bytree': 0.662397808134481, 'reg_alpha': 3.3323645788192616e-08, 'reg_lambda': 0.6245760287469893, 'scale_pos_weight': 9.415610164404923}. Best is trial 0 with value: 0.7406697619908954.[0m


Best trial: 1. Best value: 0.754408:   7%|▋         | 2/30 [00:21<04:24,  9.44s/it]

[32m[I 2026-02-08 19:20:52,661][0m Trial 1 finished with value: 0.7544079462896601 and parameters: {'n_estimators': 227, 'max_depth': 2, 'learning_rate': 0.2708160864249968, 'num_leaves': 129, 'subsample': 0.6849356442713105, 'colsample_bytree': 0.6727299868828402, 'reg_alpha': 4.4734294104626844e-07, 'reg_lambda': 5.472429642032198e-06, 'scale_pos_weight': 8.346590042851329}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  10%|█         | 3/30 [00:28<03:40,  8.15s/it]

[32m[I 2026-02-08 19:20:59,285][0m Trial 2 finished with value: 0.7531123321422913 and parameters: {'n_estimators': 158, 'max_depth': 6, 'learning_rate': 0.08012737503998542, 'num_leaves': 38, 'subsample': 0.7168578594140873, 'colsample_bytree': 0.7465447373174767, 'reg_alpha': 0.00012724181576752517, 'reg_lambda': 0.1165691561324743, 'scale_pos_weight': 3.7954329502170365}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  13%|█▎        | 4/30 [00:43<04:50, 11.19s/it]

[32m[I 2026-02-08 19:21:15,128][0m Trial 3 finished with value: 0.7479443973217793 and parameters: {'n_estimators': 179, 'max_depth': 10, 'learning_rate': 0.011711509955524094, 'num_leaves': 99, 'subsample': 0.6682096494749166, 'colsample_bytree': 0.6260206371941118, 'reg_alpha': 3.4671276804481113, 'reg_lambda': 4.905556676028774, 'scale_pos_weight': 12.317562873630456}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  17%|█▋        | 5/30 [00:46<03:26,  8.25s/it]

[32m[I 2026-02-08 19:21:18,164][0m Trial 4 finished with value: 0.7529393886121145 and parameters: {'n_estimators': 126, 'max_depth': 3, 'learning_rate': 0.1024932221692416, 'num_leaves': 77, 'subsample': 0.6488152939379115, 'colsample_bytree': 0.798070764044508, 'reg_alpha': 2.039373116525212e-08, 'reg_lambda': 1.527156759251193, 'scale_pos_weight': 4.622919742400237}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  20%|██        | 6/30 [00:56<03:29,  8.72s/it]

[32m[I 2026-02-08 19:21:27,801][0m Trial 5 finished with value: 0.7492897968614205 and parameters: {'n_estimators': 216, 'max_depth': 6, 'learning_rate': 0.05864129169696527, 'num_leaves': 91, 'subsample': 0.6739417822102108, 'colsample_bytree': 0.9878338511058234, 'reg_alpha': 0.09466630153726856, 'reg_lambda': 2.854239907497756, 'scale_pos_weight': 13.527582905987083}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  23%|██▎       | 7/30 [01:07<03:35,  9.37s/it]

[32m[I 2026-02-08 19:21:38,512][0m Trial 6 finished with value: 0.7483935245537899 and parameters: {'n_estimators': 200, 'max_depth': 14, 'learning_rate': 0.01351182947645082, 'num_leaves': 45, 'subsample': 0.6180909155642152, 'colsample_bytree': 0.7301321323053057, 'reg_alpha': 3.148441347423712e-05, 'reg_lambda': 2.7678419414850017e-06, 'scale_pos_weight': 12.60232512812701}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  27%|██▋       | 8/30 [01:12<02:56,  8.03s/it]

[32m[I 2026-02-08 19:21:43,675][0m Trial 7 finished with value: 0.7534862657744767 and parameters: {'n_estimators': 139, 'max_depth': 5, 'learning_rate': 0.06333268775321843, 'num_leaves': 38, 'subsample': 0.9208787923016158, 'colsample_bytree': 0.6298202574719083, 'reg_alpha': 7.620481786158549, 'reg_lambda': 0.08916674715636537, 'scale_pos_weight': 3.7820195414784137}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  30%|███       | 9/30 [01:19<02:38,  7.57s/it]

[32m[I 2026-02-08 19:21:50,228][0m Trial 8 finished with value: 0.7464976167647398 and parameters: {'n_estimators': 51, 'max_depth': 13, 'learning_rate': 0.11069143219393454, 'num_leaves': 115, 'subsample': 0.9085081386743783, 'colsample_bytree': 0.6296178606936361, 'reg_alpha': 1.683416412018213e-05, 'reg_lambda': 1.1036250149900698e-07, 'scale_pos_weight': 13.08344796225831}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  33%|███▎      | 10/30 [01:29<02:47,  8.39s/it]

[32m[I 2026-02-08 19:22:00,464][0m Trial 9 finished with value: 0.7469406524339149 and parameters: {'n_estimators': 206, 'max_depth': 6, 'learning_rate': 0.012413189635294229, 'num_leaves': 60, 'subsample': 0.7300733288106989, 'colsample_bytree': 0.8918424713352255, 'reg_alpha': 0.005470376807480391, 'reg_lambda': 0.9658611176861268, 'scale_pos_weight': 7.61100895226729}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  37%|███▋      | 11/30 [01:33<02:12,  6.98s/it]

[32m[I 2026-02-08 19:22:04,248][0m Trial 10 finished with value: 0.7537169630563244 and parameters: {'n_estimators': 292, 'max_depth': 2, 'learning_rate': 0.2704729722717776, 'num_leaves': 144, 'subsample': 0.8182873120328862, 'colsample_bytree': 0.876098829427658, 'reg_alpha': 1.990632676927382e-06, 'reg_lambda': 4.32747580263185e-05, 'scale_pos_weight': 7.710132418303267}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  40%|████      | 12/30 [01:36<01:47,  5.99s/it]

[32m[I 2026-02-08 19:22:07,953][0m Trial 11 finished with value: 0.75395782575076 and parameters: {'n_estimators': 300, 'max_depth': 2, 'learning_rate': 0.24950292438859467, 'num_leaves': 147, 'subsample': 0.8153994220705195, 'colsample_bytree': 0.8810796931031994, 'reg_alpha': 6.301704190409315e-07, 'reg_lambda': 0.00014870538574514983, 'scale_pos_weight': 7.767865589092113}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  43%|████▎     | 13/30 [01:40<01:29,  5.27s/it]

[32m[I 2026-02-08 19:22:11,575][0m Trial 12 finished with value: 0.7528919079967008 and parameters: {'n_estimators': 291, 'max_depth': 2, 'learning_rate': 0.28300055804742463, 'num_leaves': 150, 'subsample': 0.8376702913106802, 'colsample_bytree': 0.887098392707996, 'reg_alpha': 4.3126863890374954e-07, 'reg_lambda': 0.0014706657514448518, 'scale_pos_weight': 9.937391476921219}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  47%|████▋     | 14/30 [02:02<02:44, 10.28s/it]

[32m[I 2026-02-08 19:22:33,422][0m Trial 13 finished with value: 0.719211753879072 and parameters: {'n_estimators': 248, 'max_depth': 9, 'learning_rate': 0.16412663139094985, 'num_leaves': 126, 'subsample': 0.7652864965313572, 'colsample_bytree': 0.9523656395806502, 'reg_alpha': 1.4312210113644849e-06, 'reg_lambda': 0.000580928281055351, 'scale_pos_weight': 5.941088304279658}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  50%|█████     | 15/30 [02:07<02:12,  8.82s/it]

[32m[I 2026-02-08 19:22:38,880][0m Trial 14 finished with value: 0.7530579544438153 and parameters: {'n_estimators': 250, 'max_depth': 4, 'learning_rate': 0.03714200138875004, 'num_leaves': 130, 'subsample': 0.9978752684152138, 'colsample_bytree': 0.8124496121405841, 'reg_alpha': 0.0011023387234258472, 'reg_lambda': 9.18949237690121e-06, 'scale_pos_weight': 10.132658593342377}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  53%|█████▎    | 16/30 [02:32<03:09, 13.57s/it]

[32m[I 2026-02-08 19:23:03,470][0m Trial 15 finished with value: 0.7536250581927622 and parameters: {'n_estimators': 250, 'max_depth': 11, 'learning_rate': 0.026624223212611594, 'num_leaves': 119, 'subsample': 0.864218174049301, 'colsample_bytree': 0.7039986977581242, 'reg_alpha': 3.2527122073557805e-07, 'reg_lambda': 1.2319462809318151e-08, 'scale_pos_weight': 6.259915332611578}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  57%|█████▋    | 17/30 [02:53<03:26, 15.87s/it]

[32m[I 2026-02-08 19:23:24,675][0m Trial 16 finished with value: 0.7210068109594584 and parameters: {'n_estimators': 268, 'max_depth': 8, 'learning_rate': 0.19408516667353212, 'num_leaves': 134, 'subsample': 0.7741590943076262, 'colsample_bytree': 0.8139972293011739, 'reg_alpha': 1.0215314651177562e-08, 'reg_lambda': 0.008191351418516124, 'scale_pos_weight': 1.1845090013329296}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  60%|██████    | 18/30 [02:56<02:23, 11.95s/it]

[32m[I 2026-02-08 19:23:27,498][0m Trial 17 finished with value: 0.7521123068971267 and parameters: {'n_estimators': 103, 'max_depth': 3, 'learning_rate': 0.16307830042004035, 'num_leaves': 110, 'subsample': 0.725714612500483, 'colsample_bytree': 0.774439933428912, 'reg_alpha': 7.639371274654855e-06, 'reg_lambda': 1.4309033237222284e-06, 'scale_pos_weight': 10.59187303938706}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  63%|██████▎   | 19/30 [03:01<01:48,  9.82s/it]

[32m[I 2026-02-08 19:23:32,374][0m Trial 18 finished with value: 0.7444207211497483 and parameters: {'n_estimators': 228, 'max_depth': 4, 'learning_rate': 0.21623470239935275, 'num_leaves': 21, 'subsample': 0.8767725283917833, 'colsample_bytree': 0.8624175337686145, 'reg_alpha': 1.5867711819773276e-07, 'reg_lambda': 3.723385683431751e-05, 'scale_pos_weight': 8.27857759470262}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  67%|██████▋   | 20/30 [03:26<02:24, 14.41s/it]

[32m[I 2026-02-08 19:23:57,483][0m Trial 19 finished with value: 0.7453093258597914 and parameters: {'n_estimators': 300, 'max_depth': 8, 'learning_rate': 0.03910891361220486, 'num_leaves': 140, 'subsample': 0.774205059186484, 'colsample_bytree': 0.6846840582961851, 'reg_alpha': 0.00030468089368308873, 'reg_lambda': 3.35918377898784e-07, 'scale_pos_weight': 11.39061768018856}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 1. Best value: 0.754408:  70%|███████   | 21/30 [03:29<01:40, 11.14s/it]

[32m[I 2026-02-08 19:24:01,004][0m Trial 20 finished with value: 0.7523834196068263 and parameters: {'n_estimators': 265, 'max_depth': 2, 'learning_rate': 0.296173153691593, 'num_leaves': 74, 'subsample': 0.6060084846502748, 'colsample_bytree': 0.9354348266823662, 'reg_alpha': 0.019180934401115693, 'reg_lambda': 7.903356822601354e-05, 'scale_pos_weight': 14.913798990695922}. Best is trial 1 with value: 0.7544079462896601.[0m


Best trial: 21. Best value: 0.755253:  73%|███████▎  | 22/30 [03:33<01:10,  8.87s/it]

[32m[I 2026-02-08 19:24:04,569][0m Trial 21 finished with value: 0.7552526038673806 and parameters: {'n_estimators': 279, 'max_depth': 2, 'learning_rate': 0.2237383255592188, 'num_leaves': 149, 'subsample': 0.8247381409512514, 'colsample_bytree': 0.8510806854619423, 'reg_alpha': 3.235268233590935e-06, 'reg_lambda': 8.798531894043094e-05, 'scale_pos_weight': 8.044403637533952}. Best is trial 21 with value: 0.7552526038673806.[0m


Best trial: 21. Best value: 0.755253:  77%|███████▋  | 23/30 [03:38<00:55,  7.88s/it]

[32m[I 2026-02-08 19:24:10,125][0m Trial 22 finished with value: 0.7484526010533036 and parameters: {'n_estimators': 274, 'max_depth': 4, 'learning_rate': 0.15625251198247042, 'num_leaves': 150, 'subsample': 0.8007852355243368, 'colsample_bytree': 0.8305560676959912, 'reg_alpha': 2.3324072344029133e-06, 'reg_lambda': 0.00390493341725392, 'scale_pos_weight': 8.88461147887751}. Best is trial 21 with value: 0.7552526038673806.[0m


Best trial: 21. Best value: 0.755253:  80%|████████  | 24/30 [03:42<00:40,  6.67s/it]

[32m[I 2026-02-08 19:24:13,983][0m Trial 23 finished with value: 0.7513107547884638 and parameters: {'n_estimators': 232, 'max_depth': 3, 'learning_rate': 0.21739568328849712, 'num_leaves': 136, 'subsample': 0.8366849379399733, 'colsample_bytree': 0.8502946719247292, 'reg_alpha': 1.1039483896617751e-07, 'reg_lambda': 0.00020399217423215974, 'scale_pos_weight': 6.576222059527494}. Best is trial 21 with value: 0.7552526038673806.[0m


Best trial: 21. Best value: 0.755253:  83%|████████▎ | 25/30 [03:48<00:32,  6.41s/it]

[32m[I 2026-02-08 19:24:19,806][0m Trial 24 finished with value: 0.7491715577719771 and parameters: {'n_estimators': 187, 'max_depth': 5, 'learning_rate': 0.1339812460043646, 'num_leaves': 126, 'subsample': 0.9339776954058359, 'colsample_bytree': 0.9158457392078533, 'reg_alpha': 3.987273258901355e-05, 'reg_lambda': 2.0039531418866732e-05, 'scale_pos_weight': 5.186858699912094}. Best is trial 21 with value: 0.7552526038673806.[0m


Best trial: 21. Best value: 0.755253:  87%|████████▋ | 26/30 [03:52<00:22,  5.57s/it]

[32m[I 2026-02-08 19:24:23,405][0m Trial 25 finished with value: 0.7534868958387356 and parameters: {'n_estimators': 279, 'max_depth': 2, 'learning_rate': 0.08753294044427287, 'num_leaves': 108, 'subsample': 0.8665825986779486, 'colsample_bytree': 0.7625377118162863, 'reg_alpha': 4.309454075796565e-06, 'reg_lambda': 3.4883831316707584e-06, 'scale_pos_weight': 7.232840363197239}. Best is trial 21 with value: 0.7552526038673806.[0m


Best trial: 21. Best value: 0.755253:  90%|█████████ | 27/30 [04:06<00:24,  8.31s/it]

[32m[I 2026-02-08 19:24:38,109][0m Trial 26 finished with value: 0.7153849056850458 and parameters: {'n_estimators': 243, 'max_depth': 7, 'learning_rate': 0.2070648971575244, 'num_leaves': 142, 'subsample': 0.7487326289175654, 'colsample_bytree': 0.9760286328992359, 'reg_alpha': 5.916632088102311e-07, 'reg_lambda': 0.0002803391168360981, 'scale_pos_weight': 8.459282637101849}. Best is trial 21 with value: 0.7552526038673806.[0m


Best trial: 21. Best value: 0.755253:  93%|█████████▎| 28/30 [04:12<00:14,  7.44s/it]

[32m[I 2026-02-08 19:24:43,519][0m Trial 27 finished with value: 0.7437165680856674 and parameters: {'n_estimators': 269, 'max_depth': 4, 'learning_rate': 0.23098868253142088, 'num_leaves': 122, 'subsample': 0.802440972113789, 'colsample_bytree': 0.9104702877718115, 'reg_alpha': 5.090850119300536e-08, 'reg_lambda': 5.302626356370004e-07, 'scale_pos_weight': 2.8331069722887685}. Best is trial 21 with value: 0.7552526038673806.[0m


Best trial: 21. Best value: 0.755253:  97%|█████████▋| 29/30 [04:16<00:06,  6.36s/it]

[32m[I 2026-02-08 19:24:47,351][0m Trial 28 finished with value: 0.7536115043369076 and parameters: {'n_estimators': 224, 'max_depth': 3, 'learning_rate': 0.1459307166543298, 'num_leaves': 136, 'subsample': 0.6916618853509834, 'colsample_bytree': 0.8377620851897641, 'reg_alpha': 9.01504182137855e-05, 'reg_lambda': 0.022094810952513966, 'scale_pos_weight': 11.452033267527753}. Best is trial 21 with value: 0.7552526038673806.[0m


Best trial: 21. Best value: 0.755253: 100%|██████████| 30/30 [04:24<00:00,  8.82s/it]


[32m[I 2026-02-08 19:24:55,958][0m Trial 29 finished with value: 0.7515750442086269 and parameters: {'n_estimators': 283, 'max_depth': 5, 'learning_rate': 0.01893124600345292, 'num_leaves': 149, 'subsample': 0.9774479587772135, 'colsample_bytree': 0.6715490596318728, 'reg_alpha': 5.607767482178011e-08, 'reg_lambda': 5.08340406004038e-08, 'scale_pos_weight': 9.289906675354608}. Best is trial 21 with value: 0.7552526038673806.[0m
Best params: {'n_estimators': 279, 'max_depth': 2, 'learning_rate': 0.2237383255592188, 'num_leaves': 149, 'subsample': 0.8247381409512514, 'colsample_bytree': 0.8510806854619423, 'reg_alpha': 3.235268233590935e-06, 'reg_lambda': 8.798531894043094e-05, 'scale_pos_weight': 8.044403637533952}
Results:
Train ROC-AUC: 0.7759
Test ROC-AUC:  0.7612


## Compared Results

In [10]:
results = {
    'Logistic': locals().get('results_lr'),
    'Random Forest': locals().get('results_rf'),
    'Gradient Boosting': locals().get('results_gb'),
    'XGBoost': locals().get('results_xgb'),
    'LightGBM': locals().get('results_lgbm'),
}

rows = []
for name, r in results.items():
    rows.append({
        'Model': name,
        'CV Score': r['cv_score'],
        'Train Score': r['train_score'],
        'Test Score': r['test_score']
    })

comp = pd.DataFrame(rows).sort_values('Test Score', ascending=False)
comp.columns = pd.MultiIndex.from_tuples([
    ('BASELINE', col) for col in comp.columns
])

display(comp)

Unnamed: 0_level_0,BASELINE,BASELINE,BASELINE,BASELINE
Unnamed: 0_level_1,Model,CV Score,Train Score,Test Score
4,LightGBM,0.755253,0.775897,0.761222
3,XGBoost,0.752197,0.797267,0.757433
2,Gradient Boosting,0.751437,0.767483,0.755834
0,Logistic,0.74468,0.748914,0.748642
1,Random Forest,0.735803,0.929627,0.739173


maybe add optuna plots - look at warszat ai 

# NO OUTLIERS, ADD FEATURES, MERGE BUREAU, MERGE PREVIOUS_APP

In [None]:
df_full = load_data('../home-credit-default-risk', handle_outliers=True, add_features=True, merge_bureau=True, merge_previous=True)

df_full['TARGET'].value_counts(normalize=True)

In [None]:
X1, y1 = prepare_data(df_full, encode=True)
X2, y2 = prepare_data(df_full, encode=False)

## Logistic Regression

In [None]:
results_lr = tune_and_evaluate(
    X1, y1,
    model_type='logistic',
    n_trials=10,
    n_folds=3
)

## Random Forest

In [None]:
results_rf = tune_and_evaluate(
    X1, y1,
    model_type='random_forest',
    n_trials=20,
    n_folds=3
)

## Gradient Boosting

In [None]:
results_gb = tune_and_evaluate(
    X1, y1,
    model_type='gradient_boosting',
    n_trials=2,
    n_folds=3,
    timeout=1800
)

## XGBoost

In [None]:
results_xgb = tune_and_evaluate(
    X2, y2,
    model_type='xgboost',
    n_trials=30,
    n_folds=3,
    use_gpu=True
)


## LightGBM

In [5]:
results_lgbm = tune_and_evaluate(
    X2, y2,
    model_type='lightgbm',
    n_trials=30,
    n_folds=3,
    use_gpu=True
)


[32m[I 2026-02-10 02:54:45,760][0m A new study created in memory with name: no-name-76f7c78d-5979-4592-aac4-9dd68c205e19[0m
Best trial: 0. Best value: 0.74067:   3%|▎         | 1/30 [00:07<03:46,  7.82s/it]

[32m[I 2026-02-10 02:54:53,579][0m Trial 0 finished with value: 0.7406701726077142 and parameters: {'n_estimators': 144, 'max_depth': 15, 'learning_rate': 0.1205712628744377, 'num_leaves': 98, 'subsample': 0.6624074561769746, 'colsample_bytree': 0.662397808134481, 'reg_alpha': 3.3323645788192616e-08, 'reg_lambda': 0.6245760287469893, 'scale_pos_weight': 9.415610164404923}. Best is trial 0 with value: 0.7406701726077142.[0m


Best trial: 1. Best value: 0.754408:   7%|▋         | 2/30 [00:10<02:09,  4.64s/it]

[32m[I 2026-02-10 02:54:55,997][0m Trial 1 finished with value: 0.7544079462896954 and parameters: {'n_estimators': 227, 'max_depth': 2, 'learning_rate': 0.2708160864249968, 'num_leaves': 129, 'subsample': 0.6849356442713105, 'colsample_bytree': 0.6727299868828402, 'reg_alpha': 4.4734294104626844e-07, 'reg_lambda': 5.472429642032198e-06, 'scale_pos_weight': 8.346590042851329}. Best is trial 1 with value: 0.7544079462896954.[0m


Best trial: 1. Best value: 0.754408:  10%|█         | 3/30 [00:13<01:50,  4.10s/it]

[32m[I 2026-02-10 02:54:59,448][0m Trial 2 finished with value: 0.7530996028975893 and parameters: {'n_estimators': 158, 'max_depth': 6, 'learning_rate': 0.08012737503998542, 'num_leaves': 38, 'subsample': 0.7168578594140873, 'colsample_bytree': 0.7465447373174767, 'reg_alpha': 0.00012724181576752517, 'reg_lambda': 0.1165691561324743, 'scale_pos_weight': 3.7954329502170365}. Best is trial 1 with value: 0.7544079462896954.[0m


Best trial: 1. Best value: 0.754408:  10%|█         | 3/30 [00:16<02:31,  5.62s/it]


[33m[W 2026-02-10 02:55:02,614][0m Trial 3 failed with parameters: {'n_estimators': 179, 'max_depth': 10, 'learning_rate': 0.011711509955524094, 'num_leaves': 99, 'subsample': 0.6682096494749166, 'colsample_bytree': 0.6260206371941118, 'reg_alpha': 3.4671276804481113, 'reg_lambda': 4.905556676028774, 'scale_pos_weight': 12.317562873630456} because of the following error: KeyboardInterrupt().[0m
Traceback (most recent call last):
  File "/home/michal/.cache/pypoetry/virtualenvs/home-credit-default-risk-project-qBBrbwAb-py3.12/lib/python3.12/site-packages/optuna/study/_optimize.py", line 206, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/home/michal/PycharmProjects/home-credit-default-risk-project/src/models/tuning.py", line 103, in objective
    scores = cross_val_score(pipeline, X, y, cv=cv, scoring='roc_auc', n_jobs=1 if use_gpu else -1)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

KeyboardInterrupt: 

In [None]:
results = {
    'Logistic': locals().get('results_lr'),
    'Random Forest': locals().get('results_rf'),
    'Gradient Boosting': locals().get('results_gb'),
    'XGBoost': locals().get('results_xgb'),
    'LightGBM': locals().get('results_lgbm'),
}

rows = []
for name, r in results.items():
    rows.append({
        'Model': name,
        'CV Score': r['cv_score'],
        'Train Score': r['train_score'],
        'Test Score': r['test_score']
    })

comp = pd.DataFrame(rows).sort_values('Test Score', ascending=False)
comp.columns = pd.MultiIndex.from_tuples([
    ('FULL', col) for col in comp.columns
])

display(comp)