In [130]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix
from sklearn.calibration import calibration_curve
import shap
import matplotlib.pyplot as plt
import joblib
import warnings
import openpyxl
warnings.filterwarnings('ignore')

from lightgbm import LGBMClassifier

from sklearn.metrics import (
    roc_auc_score, roc_curve,
    f1_score, precision_score, recall_score,
    accuracy_score, matthews_corrcoef
)
from sklearn.model_selection import TimeSeriesSplit
from mlflow.models.signature import infer_signature

import lightgbm as lgb
import optuna
import mlflow
import mlflow.lightgbm

In [74]:
df = pd.read_csv('base_modelo.csv', parse_dates=['safra'], dayfirst=True)


### Substituir nulos nas variáveis
### Identificar colunas que contêm valores negativos
### Definir as outras colunas sem valores negativos
### Para as demais, penalizar ausência com -1
### Para colunas com valores negativos, criar flag de missing e imputar com mediana

In [75]:
var_cols = [col for col in df.columns if col.startswith('VAR_')]

global_neg_cols = [col for col in var_cols if (df[col] < 0).any()]
other_cols = [col for col in var_cols if col not in global_neg_cols]
df[other_cols] = df[other_cols].fillna(-1)

for col in global_neg_cols:
    df[f"{col}_missing"] = df[col].isna().astype(int)
    median_val = df[col].median()
    df[col] = df[col].fillna(median_val)

## Análise de completude das features

In [76]:
counts = {}
for col in var_cols:
    counts[col] = {
        0: int((df[col] == 0).sum()),
        -1: int((df[col] == -1).sum())
    }
counts_df = pd.DataFrame.from_dict(counts, orient='index').rename(columns={0: 'count_0', -1: 'count_-1'})
total = len(df)
modes = {col: df[col].mode().iloc[0] if not df[col].mode().empty else np.nan for col in var_cols}
mode_counts = {col: int((df[col] == mode_val).sum()) for col, mode_val in modes.items()}
modes_df = pd.DataFrame({'mode': pd.Series(modes), 'count_mode': pd.Series(mode_counts)})
completeness_df = counts_df.merge(modes_df, left_index=True, right_index=True)
completeness_df['total'] = total
completeness_df['pc_0'] = (completeness_df['count_0'] / total).round(2)
completeness_df['pc_-1'] = (completeness_df['count_-1'] / total).round(2)
completeness_df['pc_moda'] = (completeness_df['count_mode'] / total).round(2)
completeness_df
#completeness_df.to_excel("completude.xlsx")

Unnamed: 0,count_0,count_-1,mode,count_mode,total,pc_0,pc_-1,pc_moda
VAR_1,3376,416,0.00,3376,10738,0.31,0.04,0.31
VAR_2,9125,416,0.00,9125,10738,0.85,0.04,0.85
VAR_3,8528,416,0.00,8528,10738,0.79,0.04,0.79
VAR_4,7163,416,0.00,7163,10738,0.67,0.04,0.67
VAR_5,0,4060,-1.00,4060,10738,0.00,0.38,0.38
...,...,...,...,...,...,...,...,...
VAR_74,3932,5541,-1.00,5541,10738,0.37,0.52,0.52
VAR_75,0,7024,-1.00,7024,10738,0.00,0.65,0.65
VAR_76,112,0,318.28,4552,10738,0.01,0.00,0.42
VAR_77,2677,5565,-1.00,5565,10738,0.25,0.52,0.52


In [113]:
# 4. Separação em DEV e OOT (corrigido)

# Garante que 'safra' seja string no formato 'YYYYMM'
df["safra"] = df["safra"].astype(str)

# Inicializa a coluna com dtype object
df["period_label"] = pd.Series(index=df.index, dtype="object")

# Máscaras para DEV e OOT
mask_dev = (df["safra"] >= "201401") & (df["safra"] <= "201408")
mask_oot = (df["safra"] >= "201409") & (df["safra"] <= "201412")

# Atribui os rótulos
df.loc[mask_dev, "period_label"] = "DEV"
df.loc[mask_oot, "period_label"] = "OOT"

# Agora extrai os subsets
dev = df[df["period_label"] == "DEV"].copy()
oot = df[df["period_label"] == "OOT"].copy()

## PSI E IV

In [115]:
# 5. Cálculo de PSI
def psi(expected, actual, bins=10):
    breaks = np.unique(np.percentile(expected, np.linspace(0, 100, bins + 1)))
    exp_perc = np.histogram(expected, bins=breaks)[0] / len(expected)
    act_perc = np.histogram(actual,   bins=breaks)[0] / len(actual)
    exp_perc = np.where(exp_perc == 0, 1e-8, exp_perc)
    act_perc = np.where(act_perc == 0, 1e-8, act_perc)
    return np.sum((exp_perc - act_perc) * np.log(exp_perc / act_perc))

psi_results = []
for c in var_cols:
    psi_results.append({
        "variable": c,
        "psi_dev": 0.0,
        "psi_oot": psi(dev[c].values, oot[c].values, bins=10)
    })
psi_df = pd.DataFrame(psi_results).sort_values("psi_oot", ascending=False)
psi_df.to_excel("psi_df.xlsx")
psi_df

Unnamed: 0,variable,psi_dev,psi_oot
52,VAR_53,0.0,1.253914
53,VAR_54,0.0,0.397371
29,VAR_30,0.0,0.324975
0,VAR_1,0.0,0.135028
1,VAR_2,0.0,0.092166
...,...,...,...
76,VAR_77,0.0,0.001646
34,VAR_35,0.0,0.001551
11,VAR_12,0.0,0.001459
10,VAR_11,0.0,0.000593


In [116]:
excl_psi = ['VAR_53'
'VAR_54'
'VAR_30'
'VAR_1'
'VAR_2'
'VAR_17'
'VAR_3'
'VAR_27'
'VAR_4'
'VAR_15'
'VAR_52'
'VAR_31'
'VAR_38'
'VAR_6'
'VAR_9'
'VAR_20'
'VAR_26'
'VAR_50'
'VAR_7'
'VAR_29'
'VAR_56'
'VAR_46'
'VAR_37'
'VAR_67'
'VAR_78'
'VAR_74'
'VAR_71'
'VAR_73'
'VAR_42'
]

In [117]:
# 6. Cálculo de IV
def calc_iv(series, target, bins=10):
    df2 = pd.DataFrame({"x": series, "y": target})
    df2["bin"] = pd.qcut(df2["x"], q=bins, duplicates="drop")
    grp = df2.groupby("bin")["y"].agg(["count","sum"]).rename(columns={"sum":"bad","count":"total"})
    grp["good"] = grp["total"] - grp["bad"]
    dist_good = grp["good"] / grp["good"].sum()
    dist_bad  = grp["bad"]  / grp["bad"].sum()
    dist_good = dist_good.replace(0, 1e-8)
    dist_bad  = dist_bad.replace(0, 1e-8)
    woe = np.log(dist_good / dist_bad)
    return ((dist_good - dist_bad) * woe).sum()

iv_results = []
for label, subset in [("dev", dev), ("oot", oot)]:
    for c in var_cols:
        iv_results.append({
            "variable": c,
            f"iv_{label}": calc_iv(subset[c], subset["y"], bins=10)
        })
iv_df = (pd.DataFrame(iv_results)
         .pivot_table(index="variable", values=["iv_dev","iv_oot"])
         .reset_index()
         .sort_values("iv_dev", ascending=False))
iv_df

Unnamed: 0,variable,iv_dev,iv_oot
0,VAR_1,0.799464,0.117865
11,VAR_2,0.626170,0.031866
22,VAR_3,0.625740,0.046753
33,VAR_4,0.522272,0.086753
55,VAR_6,0.165381,0.203964
...,...,...,...
28,VAR_35,0.005031,0.019980
47,VAR_52,0.004448,0.010476
29,VAR_36,0.001844,0.000866
57,VAR_61,0.000691,0.003487


## Feature Selection com Boruta

In [118]:
from sklearn.ensemble import RandomForestClassifier
from boruta import BorutaPy

# Preparar dados
X = df[var_cols].values
y = df['y'].values

# Estimador base
rf = RandomForestClassifier(max_depth=2, random_state=42, n_jobs=-1)

# Configurar Boruta
selector = BorutaPy(
    estimator=rf,
    n_estimators='auto',
    alpha=0.01,
    verbose=2,
    random_state=42
)

# Ajustar e selecionar
selector.fit(X, y)

# Recuperar features selecionadas
selected_features = [var_cols[i] for i, keep in enumerate(selector.support_) if keep]
print("Features selecionadas pelo Boruta:", selected_features)

Iteration: 	1 / 100
Confirmed: 	0
Tentative: 	78
Rejected: 	0
Iteration: 	2 / 100
Confirmed: 	0
Tentative: 	78
Rejected: 	0
Iteration: 	3 / 100
Confirmed: 	0
Tentative: 	78
Rejected: 	0
Iteration: 	4 / 100
Confirmed: 	0
Tentative: 	78
Rejected: 	0
Iteration: 	5 / 100
Confirmed: 	0
Tentative: 	78
Rejected: 	0
Iteration: 	6 / 100
Confirmed: 	0
Tentative: 	78
Rejected: 	0
Iteration: 	7 / 100
Confirmed: 	0
Tentative: 	78
Rejected: 	0
Iteration: 	8 / 100
Confirmed: 	0
Tentative: 	78
Rejected: 	0
Iteration: 	9 / 100
Confirmed: 	0
Tentative: 	78
Rejected: 	0
Iteration: 	10 / 100
Confirmed: 	54
Tentative: 	24
Rejected: 	0
Iteration: 	11 / 100
Confirmed: 	54
Tentative: 	24
Rejected: 	0
Iteration: 	12 / 100
Confirmed: 	54
Tentative: 	22
Rejected: 	2
Iteration: 	13 / 100
Confirmed: 	54
Tentative: 	22
Rejected: 	2
Iteration: 	14 / 100
Confirmed: 	54
Tentative: 	22
Rejected: 	2
Iteration: 	15 / 100
Confirmed: 	55
Tentative: 	21
Rejected: 	2
Iteration: 	16 / 100
Confirmed: 	55
Tentative: 	18
Rejecte

In [119]:
cols_boruta = ['VAR_1', 'VAR_2', 'VAR_3', 'VAR_4', 'VAR_6', 'VAR_9', 'VAR_10', 'VAR_12', 'VAR_14', 'VAR_16', 'VAR_17', 'VAR_18', 'VAR_19', 'VAR_20', 
               'VAR_21', 'VAR_22', 'VAR_23', 'VAR_25', 'VAR_26', 'VAR_27', 'VAR_28', 'VAR_29', 'VAR_30', 'VAR_31', 'VAR_32', 'VAR_33', 'VAR_34', 
               'VAR_37', 'VAR_38', 'VAR_40', 'VAR_41', 'VAR_42', 'VAR_43', 'VAR_44', 'VAR_46', 'VAR_47', 'VAR_48', 'VAR_49', 'VAR_51', 'VAR_53', 
               'VAR_57', 'VAR_59', 'VAR_60', 'VAR_62', 'VAR_64', 'VAR_65', 'VAR_66', 'VAR_67', 'VAR_68', 'VAR_69', 'VAR_70', 'VAR_71', 'VAR_72', 
               'VAR_73', 'VAR_74', 'VAR_76', 'VAR_77', 'VAR_78']

cols_boruta = [col for col in cols_boruta if col not in excl_psi]


In [None]:
mask_dev = (df["safra"] >= "201401") & (df["safra"] <= "201409")
mask_oot = (df["safra"] >= "201410") & (df["safra"] <= "201412")
df.loc[mask_dev, "period_label"] = "DEV"
df.loc[mask_oot, "period_label"] = "OOT"

dev = df[df["period_label"] == "DEV"]
oot = df[df["period_label"] == "OOT"]


In [127]:
table_dev_X_train = dev[cols_boruta]
table_dev_y_train = dev["y"]
table_oot_X       = oot[cols_boruta]
table_oot_y       = oot["y"]
features = cols_boruta  # para logging de importância

In [129]:
coefval = lambda x: np.std(x, ddof=1) / np.mean(x)

def ks_score(y_true, y_proba):
    fpr, tpr, _ = roc_curve(y_true, y_proba)
    return np.max(np.abs(tpr - fpr)) * 100

def metricas_models(X_df, y_df, model):
    # predict returns raw score, para LGBM usar predict
    y_pred_proba = model.predict(X_df)
    # thresholds
    thr_05 = 0.5
    thr_mean = np.mean(y_pred_proba)
    y_pred_05   = (y_pred_proba > thr_05).astype(int)
    y_pred_mean = (y_pred_proba > thr_mean).astype(int)

    f1      = f1_score(y_df, y_pred_05) * 100
    precis  = precision_score(y_df, y_pred_05) * 100
    recall  = recall_score(y_df, y_pred_05) * 100
    ks      = ks_score(y_df, y_pred_proba)
    gini    = (2 * roc_auc_score(y_df, y_pred_proba) - 1) * 100
    acc     = accuracy_score(y_df, y_pred_05) * 100
    precis_c = precision_score(y_df, y_pred_mean) * 100
    recall_c = recall_score(y_df, y_pred_mean) * 100
    f1_c     = f1_score(y_df, y_pred_mean) * 100
    acc_c    = accuracy_score(y_df, y_pred_mean) * 100
    mcc      = matthews_corrcoef(y_df, y_pred_05) * 100

    return (
        f1, precis, recall, ks, gini, acc,
        precis_c, recall_c, f1_c, acc_c, mcc
    )

In [135]:
mlflow.set_experiment("lgbm_full_with_custom_metrics")

def objective(trial):
    # Espaço de busca
    params = {
        "objective": "binary",
        "metric": "binary_error",
        "is_unbalance": False,
        "boosting_type": trial.suggest_categorical("boosting_type", ["gbdt", "dart"]),
        "num_leaves": trial.suggest_int("num_leaves", 50, 250),
        "max_depth": trial.suggest_int("max_depth", 2, 10),
        "min_child_samples": trial.suggest_int("min_child_samples", 2000, 9000, step=10),
        "learning_rate": trial.suggest_float("learning_rate", 0.1, 0.9),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.1, 0.9),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.1, 0.9),
        "reg_lambda": trial.suggest_float("reg_lambda", 0.1, 0.9),
        "subsample": trial.suggest_float("subsample", 0.1, 0.7),
        "verbosity": -1
    }
    num_boost_round = trial.suggest_int("num_boost_round", 350, 500, step=10)

    # Preparar datasets
    lgbm_train = lgb.Dataset(table_dev_X_train, label=table_dev_y_train)
    lgbm_oot   = lgb.Dataset(table_oot_X, label=table_oot_y, reference=lgbm_train)

    with mlflow.start_run(run_name=f"run_{trial.number}"):
        # Treino com early stopping
        model = lgb.train(
            params,
            lgbm_train,
            num_boost_round=num_boost_round,
            valid_sets=[lgbm_train, lgbm_oot],
            valid_names=["train", "oot"],
            callbacks=[lgb.early_stopping(stopping_rounds=50)]
        )

        # Métricas DEV vs OOT
        mt_train = metricas_models(table_dev_X_train, table_dev_y_train, model)
        mt_oot   = metricas_models(table_oot_X, table_oot_y, model)

        # Shifts absolutos em pontos percentuais
        shift_gini  = (mt_oot[4] - mt_train[4]) * 1.0        # já em %
        shift_ks    = (mt_oot[3] - mt_train[3]) * 1.0        # já em %
        shift_f1    = (mt_oot[0] - mt_train[0]) * 1.0        # já em %
        shift_f1c   = (mt_oot[8] - mt_train[8]) * 1.0        # já em %

        # CV AUC
        cv_res = lgb.cv(
            params=params,
            train_set=lgbm_train,
            nfold=5,
            stratified=True,
            metrics="auc",
            num_boost_round=num_boost_round,
            callbacks=[lgb.early_stopping(stopping_rounds=50)]
        )
        # encontrar chave contendo "auc" e "mean"
        auc_key = next(k for k in cv_res if "auc" in k.lower() and "mean" in k.lower())
        aucs = cv_res[auc_key]
        auc_mean_cv  = np.mean(aucs) * 100
        auc_min_cv   = np.min(aucs) * 100
        auc_max_cv   = np.max(aucs) * 100
        auc_ampl_cv  = auc_max_cv - auc_min_cv
        auc_coefv_cv = coefval(aucs) * 100
        gini_cv      = (np.mean(aucs) * 2 - 1) * 100

        # Importâncias
        imp_split = {f: int(i) for f, i in zip(features, model.feature_importance("split"))}
        imp_gain  = {f: i for f, i in zip(features, model.feature_importance("gain"))}

        # Log no MLflow
        mlflow.log_params(params)
        mlflow.log_param("num_boost_round", num_boost_round)
        mlflow.log_dict(imp_split, "feat_imp_split.json")
        mlflow.log_dict(imp_gain, "feat_imp_gain.json")

        mlflow.log_metrics({
            # Train/OOT metrics
            "gini_train":   round(mt_train[4],  2),
            "gini_oot":     round(mt_oot[4],    2),
            "ks_train":     round(mt_train[3],  2),
            "ks_oot":       round(mt_oot[3],    2),
            "f1_train":     round(mt_train[0],  2),
            "f1_oot":       round(mt_oot[0],    2),
            # Shifts
            "shift_gini":  round(shift_gini,    2),
            "shift_ks":    round(shift_ks,      2),
            "shift_f1":    round(shift_f1,      2),
            "shift_f1_c":  round(shift_f1c,     2),
            # CV AUC/Gini
            "auc_mean_cv":  round(auc_mean_cv,   2),
            "auc_min_cv":   round(auc_min_cv,    2),
            "auc_max_cv":   round(auc_max_cv,    2),
            "auc_ampl_cv":  round(auc_ampl_cv,   2),
            "auc_coefv_cv": round(auc_coefv_cv,  2),
            "gini_cv":      round(gini_cv,       2),
            # binary_error final
            "train_binary_error": model.best_score["train"]["binary_error"],
            "oot_binary_error":   model.best_score["oot"]["binary_error"],
        })

        # Log do modelo
        signature = infer_signature(table_dev_X_train, model.predict(table_dev_X_train))
        mlflow.lightgbm.log_model(model, "model", signature=signature)

    # Objetivo: gini_cv penalizado pelo absolute shift de gini
    return gini_cv - abs(shift_gini)

# Executa o tuning
study = optuna.create_study(direction="maximize", study_name="lgbm_custom_metrics")
study.optimize(objective, n_trials=150)

[I 2025-05-25 19:14:47,736] A new study created in memory with name: lgbm_custom_metrics


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[39]	train's binary_error: 0.218366	oot's binary_error: 0.305105
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[174]	valid's auc: 0.758213 + 0.0160053


[I 2025-05-25 19:15:02,045] Trial 0 finished with value: 33.1542400759369 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 123, 'max_depth': 8, 'min_child_samples': 2700, 'learning_rate': 0.31605871113901574, 'feature_fraction': 0.5109031204811888, 'reg_alpha': 0.7283868933547999, 'reg_lambda': 0.34635413518101643, 'subsample': 0.15615023152241364, 'num_boost_round': 490}. Best is trial 0 with value: 33.1542400759369.
[I 2025-05-25 19:15:11,248] Trial 1 finished with value: 0.0 and parameters: {'boosting_type': 'dart', 'num_leaves': 210, 'max_depth': 8, 'min_child_samples': 7350, 'learning_rate': 0.10066553680769755, 'feature_fraction': 0.3405638627083527, 'reg_alpha': 0.449896367739142, 'reg_lambda': 0.2358161274079147, 'subsample': 0.3007710883001645, 'num_boost_round': 500}. Best is trial 0 with value: 33.1542400759369.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[158]	train's binary_error: 0.200585	oot's binary_error: 0.292046
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[76]	valid's auc: 0.776452 + 0.0134284


[I 2025-05-25 19:15:19,098] Trial 2 finished with value: 30.98148075344352 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 163, 'max_depth': 10, 'min_child_samples': 2100, 'learning_rate': 0.6922288178530231, 'feature_fraction': 0.5520770299990397, 'reg_alpha': 0.7834953103302204, 'reg_lambda': 0.26931068669603675, 'subsample': 0.467083948415537, 'num_boost_round': 420}. Best is trial 0 with value: 33.1542400759369.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	train's binary_error: 0.282061	oot's binary_error: 0.32133
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid's auc: 0.5 + 0


[I 2025-05-25 19:15:25,285] Trial 3 finished with value: 0.0 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 114, 'max_depth': 5, 'min_child_samples': 5460, 'learning_rate': 0.7009458043713234, 'feature_fraction': 0.8313495285276801, 'reg_alpha': 0.5092137806155309, 'reg_lambda': 0.1331605222645066, 'subsample': 0.22597038257984678, 'num_boost_round': 500}. Best is trial 0 with value: 33.1542400759369.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	train's binary_error: 0.282061	oot's binary_error: 0.32133
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid's auc: 0.5 + 0


[I 2025-05-25 19:15:31,232] Trial 4 finished with value: 0.0 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 95, 'max_depth': 7, 'min_child_samples': 5190, 'learning_rate': 0.13780314836939925, 'feature_fraction': 0.1368601463982449, 'reg_alpha': 0.23329837362161251, 'reg_lambda': 0.3669770120973611, 'subsample': 0.318289107753009, 'num_boost_round': 410}. Best is trial 0 with value: 33.1542400759369.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	train's binary_error: 0.282061	oot's binary_error: 0.32133
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid's auc: 0.5 + 0


[I 2025-05-25 19:15:37,387] Trial 5 finished with value: 0.0 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 105, 'max_depth': 4, 'min_child_samples': 6180, 'learning_rate': 0.6921759249848629, 'feature_fraction': 0.7985853562993349, 'reg_alpha': 0.5520175231322978, 'reg_lambda': 0.17074448946110943, 'subsample': 0.355364955886331, 'num_boost_round': 470}. Best is trial 0 with value: 33.1542400759369.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	train's binary_error: 0.282061	oot's binary_error: 0.32133
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid's auc: 0.5 + 0


[I 2025-05-25 19:15:43,365] Trial 6 finished with value: 0.0 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 150, 'max_depth': 6, 'min_child_samples': 7200, 'learning_rate': 0.8220319007469101, 'feature_fraction': 0.855064856221793, 'reg_alpha': 0.6783978023834416, 'reg_lambda': 0.8738910067986173, 'subsample': 0.23195946934264453, 'num_boost_round': 470}. Best is trial 0 with value: 33.1542400759369.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	train's binary_error: 0.282061	oot's binary_error: 0.32133
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid's auc: 0.5 + 0


[I 2025-05-25 19:15:49,363] Trial 7 finished with value: 0.0 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 231, 'max_depth': 9, 'min_child_samples': 4760, 'learning_rate': 0.33472242451129325, 'feature_fraction': 0.5537227070791756, 'reg_alpha': 0.6136002228496075, 'reg_lambda': 0.668336274543826, 'subsample': 0.5224216242601697, 'num_boost_round': 380}. Best is trial 0 with value: 33.1542400759369.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	train's binary_error: 0.282061	oot's binary_error: 0.32133
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid's auc: 0.5 + 0


[I 2025-05-25 19:15:55,602] Trial 8 finished with value: 0.0 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 82, 'max_depth': 4, 'min_child_samples': 7780, 'learning_rate': 0.6973406012650494, 'feature_fraction': 0.10336446276158728, 'reg_alpha': 0.838618522306977, 'reg_lambda': 0.4730863444364619, 'subsample': 0.6031905750608654, 'num_boost_round': 420}. Best is trial 0 with value: 33.1542400759369.
[I 2025-05-25 19:16:02,672] Trial 9 finished with value: 0.0 and parameters: {'boosting_type': 'dart', 'num_leaves': 89, 'max_depth': 6, 'min_child_samples': 8380, 'learning_rate': 0.8979953838012066, 'feature_fraction': 0.43525226983122245, 'reg_alpha': 0.5190553273942154, 'reg_lambda': 0.1608469663763592, 'subsample': 0.32539288813944833, 'num_boost_round': 380}. Best is trial 0 with value: 33.1542400759369.
[I 2025-05-25 19:16:16,462] Trial 10 finished with value: 35.16592680968762 and parameters: {'boosting_type': 'dart', 'num_leaves': 157, 'max_depth': 2, 'min_child_samples': 

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[58]	train's binary_error: 0.25953	oot's binary_error: 0.307083
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid's auc: 0.5 + 0


[I 2025-05-25 19:20:59,209] Trial 37 finished with value: -11.012810297264863 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 124, 'max_depth': 10, 'min_child_samples': 3910, 'learning_rate': 0.28595283234275093, 'feature_fraction': 0.3909306058373219, 'reg_alpha': 0.4989617545773439, 'reg_lambda': 0.31889563015818045, 'subsample': 0.36705851218387053, 'num_boost_round': 500}. Best is trial 14 with value: 36.332961491579006.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	train's binary_error: 0.282061	oot's binary_error: 0.32133
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid's auc: 0.5 + 0


[I 2025-05-25 19:21:06,040] Trial 38 finished with value: 0.0 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 130, 'max_depth': 4, 'min_child_samples': 4910, 'learning_rate': 0.34212990215911687, 'feature_fraction': 0.5806585151019611, 'reg_alpha': 0.6227061506182789, 'reg_lambda': 0.8910907593321717, 'subsample': 0.302368813814606, 'num_boost_round': 450}. Best is trial 14 with value: 36.332961491579006.
[I 2025-05-25 19:21:13,756] Trial 39 finished with value: 0.0 and parameters: {'boosting_type': 'dart', 'num_leaves': 114, 'max_depth': 7, 'min_child_samples': 5810, 'learning_rate': 0.6231755102243127, 'feature_fraction': 0.2689115984491883, 'reg_alpha': 0.6511683309637238, 'reg_lambda': 0.23174990039033322, 'subsample': 0.2291298796504291, 'num_boost_round': 420}. Best is trial 14 with value: 36.332961491579006.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[57]	train's binary_error: 0.228596	oot's binary_error: 0.303522
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[14]	valid's auc: 0.643078 + 0.0241157


[I 2025-05-25 19:21:21,594] Trial 40 finished with value: 7.090251993284344 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 91, 'max_depth': 9, 'min_child_samples': 3260, 'learning_rate': 0.17476015609685908, 'feature_fraction': 0.5052868050207958, 'reg_alpha': 0.7385785258711678, 'reg_lambda': 0.7393523329535627, 'subsample': 0.44336714717858183, 'num_boost_round': 430}. Best is trial 14 with value: 36.332961491579006.
[I 2025-05-25 19:21:36,557] Trial 41 finished with value: 35.7135438272111 and parameters: {'boosting_type': 'dart', 'num_leaves': 157, 'max_depth': 3, 'min_child_samples': 2060, 'learning_rate': 0.29913091650113144, 'feature_fraction': 0.5505123714030502, 'reg_alpha': 0.38826920376605667, 'reg_lambda': 0.6658095575500409, 'subsample': 0.17032130842494778, 'num_boost_round': 460}. Best is trial 14 with value: 36.332961491579006.
[I 2025-05-25 19:21:53,085] Trial 42 finished with value: 33.83342195999573 and parameters: {'boosting_type': 'dart', 'num_leaves': 145

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[120]	train's binary_error: 0.213738	oot's binary_error: 0.304313
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[181]	valid's auc: 0.758277 + 0.0156716


[I 2025-05-25 19:22:49,558] Trial 47 finished with value: 32.75492174706878 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 82, 'max_depth': 6, 'min_child_samples': 2660, 'learning_rate': 0.2500972634661694, 'feature_fraction': 0.4768537115794827, 'reg_alpha': 0.5543592478177128, 'reg_lambda': 0.5668110969057849, 'subsample': 0.5032743347308586, 'num_boost_round': 490}. Best is trial 45 with value: 36.426602054214904.
[I 2025-05-25 19:22:57,163] Trial 48 finished with value: 0.0 and parameters: {'boosting_type': 'dart', 'num_leaves': 116, 'max_depth': 7, 'min_child_samples': 8670, 'learning_rate': 0.2016115200191998, 'feature_fraction': 0.7173682192731416, 'reg_alpha': 0.7230047643832627, 'reg_lambda': 0.8030169790693177, 'subsample': 0.3365763580124198, 'num_boost_round': 470}. Best is trial 45 with value: 36.426602054214904.
[I 2025-05-25 19:23:06,459] Trial 49 finished with value: 0.0 and parameters: {'boosting_type': 'dart', 'num_leaves': 145, 'max_depth': 8, 'min_child_sam

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[40]	train's binary_error: 0.209962	oot's binary_error: 0.303918
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[85]	valid's auc: 0.769561 + 0.0168176


[I 2025-05-25 19:24:44,198] Trial 57 finished with value: 33.17536347543992 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 171, 'max_depth': 5, 'min_child_samples': 2350, 'learning_rate': 0.7537699015332482, 'feature_fraction': 0.4168411869325891, 'reg_alpha': 0.5528865185088703, 'reg_lambda': 0.8453144819143605, 'subsample': 0.3831063461878386, 'num_boost_round': 450}. Best is trial 51 with value: 36.84840790905042.
[I 2025-05-25 19:24:57,306] Trial 58 finished with value: 28.67854692505493 and parameters: {'boosting_type': 'dart', 'num_leaves': 147, 'max_depth': 5, 'min_child_samples': 2990, 'learning_rate': 0.22139596565147868, 'feature_fraction': 0.4854236237791385, 'reg_alpha': 0.4874759733570332, 'reg_lambda': 0.6901398826125248, 'subsample': 0.2761052026313853, 'num_boost_round': 470}. Best is trial 51 with value: 36.84840790905042.
[I 2025-05-25 19:25:09,587] Trial 59 finished with value: 29.074947065454637 and parameters: {'boosting_type': 'dart', 'num_leaves': 62, 'm

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[98]	train's binary_error: 0.214468	oot's binary_error: 0.303918
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[317]	valid's auc: 0.757328 + 0.0163046


[I 2025-05-25 19:28:56,076] Trial 77 finished with value: 33.10563695804423 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 213, 'max_depth': 3, 'min_child_samples': 2690, 'learning_rate': 0.18093319489210125, 'feature_fraction': 0.6741144868966764, 'reg_alpha': 0.3780914053345659, 'reg_lambda': 0.8161407308413431, 'subsample': 0.6295245954288261, 'num_boost_round': 490}. Best is trial 75 with value: 36.85278826480547.
[I 2025-05-25 19:29:11,522] Trial 78 finished with value: 33.098961773982374 and parameters: {'boosting_type': 'dart', 'num_leaves': 250, 'max_depth': 3, 'min_child_samples': 2190, 'learning_rate': 0.5002455720451129, 'feature_fraction': 0.611530302074938, 'reg_alpha': 0.28417809666387966, 'reg_lambda': 0.7172634788656965, 'subsample': 0.651553787029176, 'num_boost_round': 480}. Best is trial 75 with value: 36.85278826480547.
[I 2025-05-25 19:29:25,176] Trial 79 finished with value: 28.93117059515926 and parameters: {'boosting_type': 'dart', 'num_leaves': 223, 'm

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[47]	train's binary_error: 0.21252	oot's binary_error: 0.304709
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[342]	valid's auc: 0.763683 + 0.0172994


[I 2025-05-25 19:31:28,033] Trial 89 finished with value: 34.64333289620396 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 72, 'max_depth': 2, 'min_child_samples': 2500, 'learning_rate': 0.2572980737522253, 'feature_fraction': 0.6738761347138971, 'reg_alpha': 0.35467108908242573, 'reg_lambda': 0.7717178857050225, 'subsample': 0.6425993053586487, 'num_boost_round': 440}. Best is trial 75 with value: 36.85278826480547.
[I 2025-05-25 19:31:40,151] Trial 90 finished with value: 35.48620239876266 and parameters: {'boosting_type': 'dart', 'num_leaves': 232, 'max_depth': 4, 'min_child_samples': 2160, 'learning_rate': 0.23480346823342957, 'feature_fraction': 0.6582514983496661, 'reg_alpha': 0.4503399925838377, 'reg_lambda': 0.7444321150712434, 'subsample': 0.6767984853815303, 'num_boost_round': 460}. Best is trial 75 with value: 36.85278826480547.
[I 2025-05-25 19:31:53,844] Trial 91 finished with value: 36.163470684414904 and parameters: {'boosting_type': 'dart', 'num_leaves': 247, '

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[112]	train's binary_error: 0.211545	oot's binary_error: 0.305896
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[416]	valid's auc: 0.765374 + 0.0177482


[I 2025-05-25 19:35:06,460] Trial 107 finished with value: 34.45192063262701 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 114, 'max_depth': 9, 'min_child_samples': 2420, 'learning_rate': 0.15204213360958752, 'feature_fraction': 0.7384785910925781, 'reg_alpha': 0.3491183926647873, 'reg_lambda': 0.7958105303113892, 'subsample': 0.6623602442841597, 'num_boost_round': 430}. Best is trial 103 with value: 37.13973975867.
[I 2025-05-25 19:35:18,851] Trial 108 finished with value: 37.36196555117266 and parameters: {'boosting_type': 'dart', 'num_leaves': 65, 'max_depth': 8, 'min_child_samples': 2000, 'learning_rate': 0.16258828432686942, 'feature_fraction': 0.790195731251833, 'reg_alpha': 0.4706291965625791, 'reg_lambda': 0.843544850928665, 'subsample': 0.2201996565688782, 'num_boost_round': 410}. Best is trial 108 with value: 37.36196555117266.
[I 2025-05-25 19:35:28,642] Trial 109 finished with value: 28.599863385768984 and parameters: {'boosting_type': 'dart', 'num_leaves': 67, 'm

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[63]	train's binary_error: 0.21252	oot's binary_error: 0.306688
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[406]	valid's auc: 0.766025 + 0.0175785


[I 2025-05-25 19:38:55,675] Trial 128 finished with value: 35.00513732614962 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 111, 'max_depth': 7, 'min_child_samples': 2400, 'learning_rate': 0.15317529707005517, 'feature_fraction': 0.7077201594791873, 'reg_alpha': 0.5692403386525124, 'reg_lambda': 0.28912843915123565, 'subsample': 0.6421253475247228, 'num_boost_round': 420}. Best is trial 108 with value: 37.36196555117266.
[I 2025-05-25 19:39:11,793] Trial 129 finished with value: 37.09803336215509 and parameters: {'boosting_type': 'dart', 'num_leaves': 137, 'max_depth': 7, 'min_child_samples': 2010, 'learning_rate': 0.1740211007722674, 'feature_fraction': 0.7472425013255145, 'reg_alpha': 0.6396902742880101, 'reg_lambda': 0.19864117090363678, 'subsample': 0.6799963572917042, 'num_boost_round': 430}. Best is trial 108 with value: 37.36196555117266.
[I 2025-05-25 19:39:26,417] Trial 130 finished with value: 34.71063563481467 and parameters: {'boosting_type': 'dart', 'num_leaves': 