# Tuning de Hiperparâmetros de Modelos

## Configurações

In [1]:
# configurações
import importlib #quando necessário reimportar pacotes

import sys
sys.path.append("../")
sys.path.append("../ml-project-template")

from config import *
import utils
import data_manager as data_mgr
import model_tuner as mod_tuner

import optuna
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from ngboost import NGBClassifier

from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score


import seaborn as sns
import matplotlib.pyplot as plt

## Carregar databases

Faz load dos databases para treinar os modelos

In [2]:
importlib.reload(mod_tuner)

<module 'model_tuner' from 'D:\\mba\\Data Science e Analytics (USP-Esalq)\\99 - TCC\\Projeto\\notebooks\\../ml-project-template\\model_tuner.py'>

In [3]:
# inicia o auxiliar de tunig dos modelos
model_tuner = mod_tuner.ModelTuner()

In [4]:
# inicia o auxiliar de importação e exportação de dados
data_manager = data_mgr.DataManager()

In [5]:
train_v0, test_v0 = data_manager.load_processed_data(feature_set = "v0_basico")

 Carregando features: v0_basico
 FeatureSet info: 11 features


In [6]:
train_X_v0, train_y_v0 = data_manager.split_features_target(train_v0)

 Features: (8000, 10), Target: (8000,)


In [7]:
test_X_v0, test_y_v0 = data_manager.split_features_target(test_v0)

 Features: (2000, 10), Target: (2000,)


In [8]:
train_v1, test_v1 = data_manager.load_processed_data(feature_set = "v1_one-hot_encoding")

 Carregando features: v1_one-hot_encoding
 FeatureSet info: 13 features


In [9]:
train_X_v1, train_y_v1 = data_manager.split_features_target(train_v1)

 Features: (8000, 12), Target: (8000,)


In [10]:
test_X_v1, test_y_v1 = data_manager.split_features_target(test_v1)

 Features: (2000, 12), Target: (2000,)


In [11]:
train_v2, test_v2 = data_manager.load_processed_data(feature_set = "v2_one-hot_encoding_plus_normalizacao")

 Carregando features: v2_one-hot_encoding_plus_normalizacao
 FeatureSet info: 13 features


In [12]:
train_X_v2, train_y_v2 = data_manager.split_features_target(train_v2)

 Features: (8000, 12), Target: (8000,)


In [13]:
test_X_v2, test_y_v2 = data_manager.split_features_target(test_v2)

 Features: (2000, 12), Target: (2000,)


In [14]:
train_v3, test_v3 = data_manager.load_processed_data(feature_set = "v3_one-hot_encoding_plus_normalizacao_plus_poly")

 Carregando features: v3_one-hot_encoding_plus_normalizacao_plus_poly
 FeatureSet info: 34 features


In [15]:
train_X_v3, train_y_v3 = data_manager.split_features_target(train_v3)

 Features: (8000, 33), Target: (8000,)


In [16]:
test_X_v3, test_y_v3 = data_manager.split_features_target(test_v3)

 Features: (2000, 33), Target: (2000,)


In [17]:
train_v4, test_v4 = data_manager.load_processed_data(feature_set = "v4_normalizacao_plus_poly")

 Carregando features: v4_normalizacao_plus_poly
 FeatureSet info: 32 features


In [18]:
train_X_v4, train_y_v4 = data_manager.split_features_target(train_v4)

 Features: (8000, 31), Target: (8000,)


In [19]:
test_X_v4, test_y_v4 = data_manager.split_features_target(test_v4)

 Features: (2000, 31), Target: (2000,)


In [20]:
train_v5, test_v5 = data_manager.load_processed_data(feature_set = "v5_one-hot_encoding_plus_poly")

 Carregando features: v5_one-hot_encoding_plus_poly
 FeatureSet info: 34 features


In [21]:
train_X_v5, train_y_v5 = data_manager.split_features_target(train_v5)

 Features: (8000, 33), Target: (8000,)


In [22]:
test_X_v5, test_y_v5 = data_manager.split_features_target(test_v5)

 Features: (2000, 33), Target: (2000,)


In [23]:
train_v6, test_v6 = data_manager.load_processed_data(feature_set = "v6_one-hot_encoding_plus_top3_poly")

 Carregando features: v6_one-hot_encoding_plus_top3_poly
 FeatureSet info: 16 features


In [24]:
train_X_v6, train_y_v6 = data_manager.split_features_target(train_v6)

 Features: (8000, 15), Target: (8000,)


In [25]:
test_X_v6, test_y_v6 = data_manager.split_features_target(test_v6)

 Features: (2000, 15), Target: (2000,)


In [26]:
train_v7, test_v7 = data_manager.load_processed_data(feature_set = "v7_plus_top3_poly")

 Carregando features: v7_plus_top3_poly
 FeatureSet info: 14 features


In [27]:
train_X_v7, train_y_v7 = data_manager.split_features_target(train_v7)

 Features: (8000, 13), Target: (8000,)


In [28]:
test_X_v7, test_y_v7 = data_manager.split_features_target(test_v7)

 Features: (2000, 13), Target: (2000,)


In [29]:
train_v8, test_v8 = data_manager.load_processed_data(feature_set = "v8_numerical_to_categorical")

 Carregando features: v8_numerical_to_categorical
 FeatureSet info: 11 features


In [30]:
train_X_v8, train_y_v8 = data_manager.split_features_target(train_v8)

 Features: (8000, 10), Target: (8000,)


In [31]:
test_X_v8, test_y_v8 = data_manager.split_features_target(test_v8)

 Features: (2000, 10), Target: (2000,)


In [32]:
train_v9, test_v9 = data_manager.load_processed_data(feature_set = "v9_numerical_to_categorical_plus_one_hot-encoding")

 Carregando features: v9_numerical_to_categorical_plus_one_hot-encoding
 FeatureSet info: 35 features


In [33]:
train_X_v9, train_y_v9 = data_manager.split_features_target(train_v9)

 Features: (8000, 34), Target: (8000,)


In [34]:
test_X_v9, test_y_v9 = data_manager.split_features_target(test_v9)

 Features: (2000, 34), Target: (2000,)


In [35]:
train_v10, test_v10 = data_manager.load_processed_data(feature_set = "v10_one_hot-encoding_plus_poly_all_features")

 Carregando features: v10_one_hot-encoding_plus_poly_all_features
 FeatureSet info: 85 features


In [36]:
train_X_v10, train_y_v10 = data_manager.split_features_target(train_v10)

 Features: (8000, 84), Target: (8000,)


In [37]:
test_X_v10, test_y_v10 = data_manager.split_features_target(test_v10)

 Features: (2000, 84), Target: (2000,)


## Optuna - parâmetros gerais de busca por tipo de modelo

In [38]:
def random_forest_param_space(trial):

    params = {}

    params["n_estimators"] = trial.suggest_int(
        "n_estimators", 300, 1500
    )

    params["max_depth"] = trial.suggest_int(
        "max_depth", 4, 15
    )

    params["min_samples_split"] = trial.suggest_int(
        "min_samples_split", 2, 50
    )

    params["min_samples_leaf"] = trial.suggest_int(
        "min_samples_leaf", 1, 50
    )

    params["max_features"] = trial.suggest_float(
        "max_features", 0.4, 1.0
    )

    params["bootstrap"] = True

    params["class_weight"] = trial.suggest_categorical(
        "class_weight", [None, "balanced"]
    )

    params["n_jobs"] = -1

    return params


In [39]:
def xgboost_param_space(trial):

    params = {}

    params["booster"] = "gbtree"
    params["tree_method"] = "hist"
    params["grow_policy"] = "depthwise"

    params["learning_rate"] = trial.suggest_float(
        "learning_rate", 1e-3, 0.3, log=True
    )

    params["max_depth"] = trial.suggest_int(
        "max_depth", 4, 10
    )

    params["n_estimators"] = trial.suggest_int(
        "n_estimators", 500, 3000
    )

    params["reg_lambda"] = trial.suggest_float(
        "reg_lambda", 1e-2, 100.0, log=True
    )

    params["reg_alpha"] = trial.suggest_float(
        "reg_alpha", 1e-3, 10.0, log=True
    )

    params["gamma"] = trial.suggest_float(
        "gamma", 0.0, 5.0
    )

    params["min_child_weight"] = trial.suggest_float(
        "min_child_weight", 1e-2, 10.0, log=True
    )

    params["subsample"] = trial.suggest_float(
        "subsample", 0.6, 1.0
    )

    params["colsample_bytree"] = trial.suggest_float(
        "colsample_bytree", 0.6, 1.0
    )

    params["scale_pos_weight"] = trial.suggest_float(
        "scale_pos_weight", 0.5, 10.0, log=True
    )

    params["eval_metric"] = "auc"
    
    params["early_stopping_rounds"] = trial.suggest_int(
        "early_stopping_rounds", 50, 100
    )

    return params


In [40]:
def lightgbm_param_space(trial):

    params = {}

    params["boosting_type"] = "gbdt"
    params["objective"] = "binary"

    params["learning_rate"] = trial.suggest_float(
        "learning_rate", 1e-3, 0.3, log=True
    )

    params["num_leaves"] = trial.suggest_int(
        "num_leaves", 16, 256
    ) 

    params["max_depth"] = trial.suggest_int(
        "max_depth", 4, 10
    )

    params["n_estimators"] = trial.suggest_int(
        "n_estimators", 500, 3000
    )

    params["min_child_samples"] = trial.suggest_int(
        "min_child_samples", 10, 200
    )

    params["min_child_weight"] = trial.suggest_float(
        "min_child_weight", 1e-3, 10.0, log=True
    )

    params["subsample"] = trial.suggest_float(
        "subsample", 0.6, 1.0
    )

    params["colsample_bytree"] = trial.suggest_float(
        "colsample_bytree", 0.6, 1.0
    )

    params["reg_lambda"] = trial.suggest_float(
        "reg_lambda", 1e-2, 100.0, log=True
    )

    params["reg_alpha"] = trial.suggest_float(
        "reg_alpha", 1e-3, 10.0, log=True
    )

    params["scale_pos_weight"] = trial.suggest_float(
        "scale_pos_weight", 0.5, 10.0, log=True
    )

    params["early_stopping_rounds"] = trial.suggest_int(
        "early_stopping_rounds", 50, 100
    )

    params["metric"] = "auc"

    params["verbosity"] = -1

    return params


In [41]:
def catboost_param_space(trial):

    params = {}

    params["boosting_type"] = "Plain"
    
    params["grow_policy"] = "SymmetricTree"

    params["learning_rate"] = trial.suggest_float(
        "learning_rate", 1e-3, 0.3, log=True
    )

    params["depth"] = trial.suggest_int("depth", 4, 10)

    params["iterations"] = trial.suggest_int("iterations", 500, 3000)

    params["l2_leaf_reg"] = trial.suggest_float(
        "l2_leaf_reg", 1e-2, 100, log=True
    )

    params["random_strength"] = trial.suggest_float(
        "random_strength", 0, 5
    )

    params["bagging_temperature"] = trial.suggest_float(
        "bagging_temperature", 0.0, 5.0
    )

    if params["bagging_temperature"] == 0.0:
        params["subsample"] = trial.suggest_float("subsample", 0.6, 1.0)


    params["auto_class_weights"] = trial.suggest_categorical(
        "auto_class_weights", ["None", "Balanced"]
    )

    params["loss_function"] = "Logloss"
    
    params["eval_metric"] = "AUC"

    params["early_stopping_rounds"] = trial.suggest_int(
        "early_stopping_rounds", 50, 100
    )

    params["verbose"] = False

    return params



In [42]:
from sklearn.tree import DecisionTreeRegressor

def ngboost_param_space(trial):

    params = {}

    # NGBoost (nível 1)
    params["n_estimators"] = trial.suggest_int(
        "n_estimators", 300, 1500
    )

    params["learning_rate"] = trial.suggest_float(
        "learning_rate", 1e-3, 0.1, log=True
    )

    params["minibatch_frac"] = trial.suggest_float(
        "minibatch_frac", 0.5, 1.0
    )

    params["natural_gradient"] = True
    
    params["verbose"] = 0

    # Base learner (nível 2)
    base_params = {
        "max_depth": trial.suggest_int("base_max_depth", 4, 10),
        "min_samples_leaf": trial.suggest_int("base_min_samples_leaf", 1, 50),
        "min_samples_split": trial.suggest_int("base_min_samples_split", 2, 50),
        "max_features": trial.suggest_float("base_max_features", 0.4, 1.0),
    }

    base = DecisionTreeRegressor(
        **base_params,
        random_state = RANDOM_STATE
    )

    params["Base"] = base

    return params

## A - Feature sets: v0 e v1 (Numéricas sem transformações e Categóricas com one-hot encoding (exceto Catboost)

### Xgboost - Optuna - feature set: v1_one-hot_encoding

In [29]:
xgb_opt_fv1_model, xgb_opt_fv1_best_params, xgb_opt_fv1_best_score, xgb_opt_fv1_optuna_results = model_tuner.tune_optuna(
    model_class = XGBClassifier, 
    X = train_X_v1, 
    y = train_y_v1, 
    param_space_func = xgboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-30 23:42:59,330] A new study created in memory with name: no-name-46692b7c-1619-4fd4-b29c-428ce337080e
[I 2025-12-30 23:43:00,892] Trial 0 finished with value: 0.8581334090974757 and parameters: {'learning_rate': 0.0029813770307052724, 'max_depth': 8, 'n_estimators': 1594, 'reg_lambda': 13.849507661133075, 'reg_alpha': 1.3179630432958698, 'gamma': 1.3629630264132082, 'min_child_weight': 0.06751383304677265, 'subsample': 0.9207488710140077, 'colsample_bytree': 0.983255741473482, 'scale_pos_weight': 6.895799670211798, 'early_stopping_rounds': 68}. Best is trial 0 with value: 0.8581334090974757.
[I 2025-12-30 23:43:02,056] Trial 1 finished with value: 0.8622285251996032 and parameters: {'learning_rate': 0.017419098458624965, 'max_depth': 8, 'n_estimators': 2282, 'reg_lambda': 0.3026934470902619, 'reg_alpha': 0.1757052524413466, 'gamma': 2.5154158265390487, 'min_child_weight': 0.010997788128340768, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'scale_p

In [30]:
model_tuner.save_model_and_metric(filename = 'xgb_opt_fv1', 
                                  model = xgb_opt_fv1_model, 
                                  params = xgb_opt_fv1_best_params, 
                                  score = xgb_opt_fv1_best_score,
                                  dataframe_info = xgb_opt_fv1_optuna_results)

{'model': 'XGBClassifier', 'params': {'learning_rate': 0.06605345010031059, 'max_depth': 8, 'n_estimators': 558, 'reg_lambda': 0.010522134830906853, 'reg_alpha': 0.40056609823866235, 'gamma': 3.4286351277640126, 'min_child_weight': 0.016302844626512235, 'subsample': 0.6722969142430106, 'colsample_bytree': 0.6364698221789004, 'scale_pos_weight': 0.6858426394328141, 'early_stopping_rounds': 78}, 'score': 0.8712925330585278, 'timestamp': '2025-12-30T23:46:18.287967'}


In [42]:
xgb_opt_fv1_model = utils.load_model('xgb_opt_fv1')
xgb_opt_fv1_model

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.6364698221789004
,device,
,early_stopping_rounds,
,enable_categorical,False


In [32]:
model_tuner.calculate_metrics(xgb_opt_fv1_model, 
                              test_X_v1, 
                              test_y_v1, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'xgb_opt_fv1_test')

{'roc_auc_score': 0.8623546504902437, 'accuracy_score': 0.8675, 'precision_score': 0.8317757009345794, 'recall_score': 0.43734643734643736, 'f1_score': 0.573268921095008}


### Xgboost - Optuna - feature set: v1_one-hot_encoding (300 trials)

In [39]:
xgb_opt_fv1_300trials_model, xgb_opt_fv1_300trials_best_params, xgb_opt_fv1_300trials_best_score, xgb_opt_fv1_300trials_optuna_results = model_tuner.tune_optuna(
    model_class = XGBClassifier, 
    X = train_X_v1, 
    y = train_y_v1, 
    param_space_func = xgboost_param_space,
    scoring='roc_auc',
    n_trials = 300,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/300 [00:00<?, ?it/s]

[I 2026-02-22 15:15:34,081] A new study created in memory with name: no-name-27bb45f6-f3a6-4931-898a-8effae2c2090
[I 2026-02-22 15:15:35,627] Trial 0 finished with value: 0.8581334090974757 and parameters: {'learning_rate': 0.0029813770307052724, 'max_depth': 8, 'n_estimators': 1594, 'reg_lambda': 13.849507661133075, 'reg_alpha': 1.3179630432958698, 'gamma': 1.3629630264132082, 'min_child_weight': 0.06751383304677265, 'subsample': 0.9207488710140077, 'colsample_bytree': 0.983255741473482, 'scale_pos_weight': 6.895799670211798, 'early_stopping_rounds': 68}. Best is trial 0 with value: 0.8581334090974757.
[I 2026-02-22 15:15:36,685] Trial 1 finished with value: 0.8622285251996032 and parameters: {'learning_rate': 0.017419098458624965, 'max_depth': 8, 'n_estimators': 2282, 'reg_lambda': 0.3026934470902619, 'reg_alpha': 0.1757052524413466, 'gamma': 2.5154158265390487, 'min_child_weight': 0.010997788128340768, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'scale_p

In [40]:
model_tuner.save_model_and_metric(filename = 'xgb_opt_fv1_300trials', 
                                  model = xgb_opt_fv1_300trials_model, 
                                  params = xgb_opt_fv1_300trials_best_params, 
                                  score = xgb_opt_fv1_300trials_best_score,
                                  dataframe_info = xgb_opt_fv1_300trials_optuna_results)

{'model': 'XGBClassifier', 'params': {'learning_rate': 0.023246783510672178, 'max_depth': 4, 'n_estimators': 2244, 'reg_lambda': 0.23517342175047715, 'reg_alpha': 0.003404275127778491, 'gamma': 1.4426444278753785, 'min_child_weight': 2.004108347467593, 'subsample': 0.8410539389234349, 'colsample_bytree': 0.6811756309319335, 'scale_pos_weight': 1.1400712265545252, 'early_stopping_rounds': 96}, 'score': 0.8701827970451985, 'timestamp': '2026-02-22T15:24:09.447674'}


In [41]:
xgb_opt_fv1_300trials_model = utils.load_model('xgb_opt_fv1_300trials')
xgb_opt_fv1_300trials_model

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.6811756309319335
,device,
,early_stopping_rounds,
,enable_categorical,False


In [42]:
model_tuner.calculate_metrics(xgb_opt_fv1_300trials_model, 
                              test_X_v1, 
                              test_y_v1, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'xgb_opt_fv1_300trials_test')

{'roc_auc_score': 0.858046027537553, 'accuracy_score': 0.8635, 'precision_score': 0.7310344827586207, 'recall_score': 0.5208845208845209, 'f1_score': 0.6083213773314203}


### Catboost - Optuna - feature set: v0_basico

In [22]:
cat_opt_fv0_model, cat_opt_fv0_best_params, cat_opt_fv0_best_score, cat_opt_fv0_optuna_results = model_tuner.tune_optuna(
    model_class = CatBoostClassifier, 
    X = train_X_v0, 
    y = train_y_v0, 
    param_space_func = catboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)

Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-30 22:03:48,962] A new study created in memory with name: no-name-bc0e618f-f119-485c-92b5-ba4c711ab99a
[I 2025-12-30 22:06:29,080] Trial 0 finished with value: 0.8575878109620442 and parameters: {'learning_rate': 0.0029813770307052724, 'depth': 8, 'iterations': 1594, 'l2_leaf_reg': 13.849507661133075, 'random_strength': 3.8998790405940174, 'bagging_temperature': 1.3629630264132082, 'auto_class_weights': 'Balanced', 'early_stopping_rounds': 98}. Best is trial 0 with value: 0.8575878109620442.
[I 2025-12-30 22:06:47,603] Trial 1 finished with value: 0.8730383989367339 and parameters: {'learning_rate': 0.14783979348659035, 'depth': 6, 'iterations': 1752, 'l2_leaf_reg': 5.4181589388967515, 'random_strength': 3.563510134914501, 'bagging_temperature': 1.8512537739519748, 'auto_class_weights': 'None', 'early_stopping_rounds': 50}. Best is trial 1 with value: 0.8730383989367339.
[I 2025-12-30 22:07:16,583] Trial 2 finished with value: 0.8664986853637162 and parameters: {'learning_ra

0:	learn: 0.6732605	total: 19.6ms	remaining: 20.1s
1:	learn: 0.6571867	total: 42.6ms	remaining: 21.9s
2:	learn: 0.6395447	total: 62ms	remaining: 21.2s
3:	learn: 0.6236943	total: 84.6ms	remaining: 21.7s
4:	learn: 0.6078605	total: 108ms	remaining: 22s
5:	learn: 0.5939374	total: 130ms	remaining: 22.2s
6:	learn: 0.5830122	total: 151ms	remaining: 22s
7:	learn: 0.5715154	total: 169ms	remaining: 21.6s
8:	learn: 0.5614321	total: 190ms	remaining: 21.5s
9:	learn: 0.5496549	total: 210ms	remaining: 21.5s
10:	learn: 0.5388896	total: 235ms	remaining: 21.7s
11:	learn: 0.5284157	total: 258ms	remaining: 21.9s
12:	learn: 0.5209232	total: 279ms	remaining: 21.8s
13:	learn: 0.5118405	total: 299ms	remaining: 21.7s
14:	learn: 0.5033104	total: 320ms	remaining: 21.7s
15:	learn: 0.4962098	total: 342ms	remaining: 21.7s
16:	learn: 0.4885384	total: 363ms	remaining: 21.6s
17:	learn: 0.4828346	total: 385ms	remaining: 21.6s
18:	learn: 0.4771741	total: 406ms	remaining: 21.6s
19:	learn: 0.4698504	total: 427ms	remaining

In [23]:
model_tuner.save_model_and_metric(filename = 'cat_opt_fv0', 
                                  model = cat_opt_fv0_model, 
                                  params = cat_opt_fv0_best_params, 
                                  score = cat_opt_fv0_best_score,
                                  dataframe_info = cat_opt_fv0_optuna_results)

{'model': 'CatBoostClassifier', 'params': {'learning_rate': 0.024501031804540142, 'depth': 4, 'iterations': 1030, 'l2_leaf_reg': 0.5144148041361079, 'random_strength': 0.9740652138500033, 'bagging_temperature': 4.316675820927967, 'auto_class_weights': 'None', 'early_stopping_rounds': 64}, 'score': 0.873923491057584, 'timestamp': '2025-12-30T23:40:54.735016'}


In [24]:
cat_opt_fv0_model = utils.load_model('cat_opt_fv0')
cat_opt_fv0_model

<catboost.core.CatBoostClassifier at 0x2b3d48dd640>

In [25]:
model_tuner.calculate_metrics(cat_opt_fv0_model, 
                              test_X_v0, 
                              test_y_v0, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'cat_opt_fv0_test')

{'roc_auc_score': 0.8642432879721015, 'accuracy_score': 0.869, 'precision_score': 0.7655677655677655, 'recall_score': 0.5135135135135135, 'f1_score': 0.6147058823529412}


### Catboost - Optuna - feature set: v0_basico (300 trials)

In [43]:
cat_opt_fv0_300trials_model, cat_opt_fv0_300trials_best_params, cat_opt_fv0_300trials_best_score, cat_opt_fv0_300trials_optuna_results = model_tuner.tune_optuna(
    model_class = CatBoostClassifier, 
    X = train_X_v0, 
    y = train_y_v0, 
    param_space_func = catboost_param_space,
    scoring='roc_auc',
    n_trials = 300,
    timeout = 21600 # limite de 6 horas
)

Trials:   0%|          | 0/300 [00:00<?, ?it/s]

[I 2026-02-22 15:26:05,956] A new study created in memory with name: no-name-87296161-3d49-4bb2-964f-584ec0419087
[I 2026-02-22 15:29:30,618] Trial 0 finished with value: 0.8575878109620442 and parameters: {'learning_rate': 0.0029813770307052724, 'depth': 8, 'iterations': 1594, 'l2_leaf_reg': 13.849507661133075, 'random_strength': 3.8998790405940174, 'bagging_temperature': 1.3629630264132082, 'auto_class_weights': 'Balanced', 'early_stopping_rounds': 98}. Best is trial 0 with value: 0.8575878109620442.
[I 2026-02-22 15:29:54,129] Trial 1 finished with value: 0.8730383989367339 and parameters: {'learning_rate': 0.14783979348659035, 'depth': 6, 'iterations': 1752, 'l2_leaf_reg': 5.4181589388967515, 'random_strength': 3.563510134914501, 'bagging_temperature': 1.8512537739519748, 'auto_class_weights': 'None', 'early_stopping_rounds': 50}. Best is trial 1 with value: 0.8730383989367339.
[I 2026-02-22 15:30:24,252] Trial 2 pruned. 
[I 2026-02-22 15:31:04,453] Trial 3 finished with value: 0.8

0:	learn: 0.6811961	total: 27.7ms	remaining: 57.9s
1:	learn: 0.6700313	total: 55.4ms	remaining: 57.8s
2:	learn: 0.6590688	total: 88.4ms	remaining: 1m 1s
3:	learn: 0.6484134	total: 117ms	remaining: 1m 1s
4:	learn: 0.6376087	total: 155ms	remaining: 1m 4s
5:	learn: 0.6276333	total: 189ms	remaining: 1m 5s
6:	learn: 0.6173726	total: 223ms	remaining: 1m 6s
7:	learn: 0.6082258	total: 254ms	remaining: 1m 6s
8:	learn: 0.5991837	total: 286ms	remaining: 1m 6s
9:	learn: 0.5905024	total: 319ms	remaining: 1m 6s
10:	learn: 0.5824883	total: 351ms	remaining: 1m 6s
11:	learn: 0.5767392	total: 372ms	remaining: 1m 4s
12:	learn: 0.5688415	total: 405ms	remaining: 1m 4s
13:	learn: 0.5623384	total: 441ms	remaining: 1m 5s
14:	learn: 0.5552094	total: 474ms	remaining: 1m 5s
15:	learn: 0.5490855	total: 506ms	remaining: 1m 5s
16:	learn: 0.5423315	total: 537ms	remaining: 1m 5s
17:	learn: 0.5366244	total: 569ms	remaining: 1m 5s
18:	learn: 0.5309690	total: 600ms	remaining: 1m 5s
19:	learn: 0.5251073	total: 633ms	rema

In [46]:
model_tuner.save_model_and_metric(filename = 'cat_opt_fv0_300trials', 
                                  model = cat_opt_fv0_300trials_model, 
                                  params = cat_opt_fv0_300trials_best_params, 
                                  score = cat_opt_fv0_300trials_best_score,
                                  dataframe_info = cat_opt_fv0_300trials_optuna_results)

{'model': 'CatBoostClassifier', 'params': {'learning_rate': 0.014581881369351258, 'depth': 5, 'iterations': 2092, 'l2_leaf_reg': 0.539330438747958, 'random_strength': 1.0495540276176767, 'bagging_temperature': 3.9347070373113664, 'auto_class_weights': 'None', 'early_stopping_rounds': 82}, 'score': 0.8738791882963662, 'timestamp': '2026-02-22T21:11:51.024182'}


In [47]:
cat_opt_fv0_300trials_model = utils.load_model('cat_opt_fv0_300trials')
cat_opt_fv0_300trials_model

<catboost.core.CatBoostClassifier at 0x2538ddb7b00>

In [48]:
model_tuner.calculate_metrics(cat_opt_fv0_300trials_model, 
                              test_X_v0, 
                              test_y_v0, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'cat_opt_fv0_300trials_test')

{'roc_auc_score': 0.8598860802250634, 'accuracy_score': 0.864, 'precision_score': 0.7509293680297398, 'recall_score': 0.4963144963144963, 'f1_score': 0.5976331360946746}


### LightGBM- Optuna - feature set: v1_one-hot_encoding

In [40]:
lgb_opt_fv1_model, lgb_opt_fv1_best_params, lgb_opt_fv1_best_score, lgb_opt_fv1_optuna_results = model_tuner.tune_optuna(
    model_class = LGBMClassifier, 
    X = train_X_v1, 
    y = train_y_v1, 
    param_space_func = lightgbm_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-30 23:59:55,110] A new study created in memory with name: no-name-6e45a8f0-8ef5-4025-8773-4804b85f8dcc
[I 2025-12-30 23:59:57,447] Trial 0 finished with value: 0.8167062341689861 and parameters: {'learning_rate': 0.0029813770307052724, 'num_leaves': 165, 'max_depth': 7, 'n_estimators': 2464, 'min_child_samples': 158, 'min_child_weight': 0.012313185468743897, 'subsample': 0.7105857020572387, 'colsample_bytree': 0.9207488710140077, 'reg_lambda': 68.00759466734245, 'reg_alpha': 3.189558257433924, 'scale_pos_weight': 1.4604981309148681, 'early_stopping_rounds': 75}. Best is trial 0 with value: 0.8167062341689861.
[I 2025-12-30 23:59:58,980] Trial 1 finished with value: 0.8664664213963075 and parameters: {'learning_rate': 0.04931971559288606, 'num_leaves': 187, 'max_depth': 6, 'n_estimators': 1903, 'min_child_samples': 106, 'min_child_weight': 0.0011352037010541887, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'reg_lambda': 0.288100449320714, 'reg_alph

In [42]:
model_tuner.save_model_and_metric(filename = 'lgb_opt_fv1', 
                                  model = lgb_opt_fv1_model, 
                                  params = lgb_opt_fv1_best_params, 
                                  score = lgb_opt_fv1_best_score,
                                  dataframe_info = lgb_opt_fv1_optuna_results)

{'model': 'LGBMClassifier', 'params': {'learning_rate': 0.018908840487666216, 'num_leaves': 235, 'max_depth': 4, 'n_estimators': 2795, 'min_child_samples': 22, 'min_child_weight': 0.004628418664177039, 'subsample': 0.8467281043051259, 'colsample_bytree': 0.6232825403423599, 'reg_lambda': 0.01914246217477908, 'reg_alpha': 0.01066360987366781, 'scale_pos_weight': 0.9139906907024911, 'early_stopping_rounds': 92}, 'score': 0.8694701004516954, 'timestamp': '2025-12-31T00:06:39.118573'}


In [43]:
lgb_opt_fv1_model = utils.load_model('lgb_opt_fv1')
lgb_opt_fv1_model

0,1,2
,boosting_type,'gbdt'
,num_leaves,235
,max_depth,4
,learning_rate,0.018908840487666216
,n_estimators,2795
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.004628418664177039


In [44]:
model_tuner.calculate_metrics(lgb_opt_fv1_model, 
                              test_X_v1, 
                              test_y_v1, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'lgb_opt_fv1_test')

{'roc_auc_score': 0.8440644033864373, 'accuracy_score': 0.8575, 'precision_score': 0.7293233082706767, 'recall_score': 0.47665847665847666, 'f1_score': 0.5765230312035661}


### LightGBM- Optuna - feature set: v1_one-hot_encoding (300 trials)

In [50]:
lgb_opt_fv1_300trials_model, lgb_opt_fv1_300trials_best_params, lgb_opt_fv1_300trials_best_score, lgb_opt_fv1_300trials_optuna_results = model_tuner.tune_optuna(
    model_class = LGBMClassifier, 
    X = train_X_v1, 
    y = train_y_v1, 
    param_space_func = lightgbm_param_space,
    scoring='roc_auc',
    n_trials = 300,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/300 [00:00<?, ?it/s]

[I 2026-02-22 21:15:58,960] A new study created in memory with name: no-name-9e8c8329-9dc4-405f-b80a-4cbf7e1945d7
[I 2026-02-22 21:15:59,924] Trial 0 finished with value: 0.8167062341689861 and parameters: {'learning_rate': 0.0029813770307052724, 'num_leaves': 165, 'max_depth': 7, 'n_estimators': 2464, 'min_child_samples': 158, 'min_child_weight': 0.012313185468743897, 'subsample': 0.7105857020572387, 'colsample_bytree': 0.9207488710140077, 'reg_lambda': 68.00759466734245, 'reg_alpha': 3.189558257433924, 'scale_pos_weight': 1.4604981309148681, 'early_stopping_rounds': 75}. Best is trial 0 with value: 0.8167062341689861.
[I 2026-02-22 21:16:01,929] Trial 1 finished with value: 0.8664664213963075 and parameters: {'learning_rate': 0.04931971559288606, 'num_leaves': 187, 'max_depth': 6, 'n_estimators': 1903, 'min_child_samples': 106, 'min_child_weight': 0.0011352037010541887, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'reg_lambda': 0.288100449320714, 'reg_alph

In [51]:
model_tuner.save_model_and_metric(filename = 'lgb_opt_fv1_300trials', 
                                  model = lgb_opt_fv1_300trials_model, 
                                  params = lgb_opt_fv1_300trials_best_params, 
                                  score = lgb_opt_fv1_300trials_best_score,
                                  dataframe_info = lgb_opt_fv1_300trials_optuna_results)

{'model': 'LGBMClassifier', 'params': {'learning_rate': 0.029044810711714372, 'num_leaves': 48, 'max_depth': 4, 'n_estimators': 2853, 'min_child_samples': 25, 'min_child_weight': 0.007969926003845778, 'subsample': 0.6431020671741129, 'colsample_bytree': 0.6534961651460207, 'reg_lambda': 3.956226849270416, 'reg_alpha': 0.003664509608306226, 'scale_pos_weight': 6.681937474060882, 'early_stopping_rounds': 71}, 'score': 0.8695553351118643, 'timestamp': '2026-02-22T21:27:43.338688'}


In [52]:
lgb_opt_fv1_300trials_model = utils.load_model('lgb_opt_fv1_300trials')
lgb_opt_fv1_300trials_model

0,1,2
,boosting_type,'gbdt'
,num_leaves,48
,max_depth,4
,learning_rate,0.029044810711714372
,n_estimators,2853
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.007969926003845778


In [53]:
model_tuner.calculate_metrics(lgb_opt_fv1_300trials_model, 
                              test_X_v1, 
                              test_y_v1, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'lgb_opt_fv1_300trials_test')

{'roc_auc_score': 0.8398691449538906, 'accuracy_score': 0.776, 'precision_score': 0.46709470304975925, 'recall_score': 0.714987714987715, 'f1_score': 0.5650485436893203}


### Random Forest - Optuna - feature set: v1_one-hot_encoding

In [52]:
rf_opt_fv1_model, rf_opt_fv1_best_params, rf_opt_fv1_best_score, rf_opt_fv1_optuna_results = model_tuner.tune_optuna(
    model_class = RandomForestClassifier, 
    X = train_X_v1, 
    y = train_y_v1, 
    param_space_func = random_forest_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-31 00:20:07,083] A new study created in memory with name: no-name-2d6be182-64c0-4151-8cd1-add1c0901706
[I 2025-12-31 00:20:16,009] Trial 0 finished with value: 0.8556457127447488 and parameters: {'n_estimators': 530, 'max_depth': 11, 'min_samples_split': 23, 'min_samples_leaf': 40, 'max_features': 0.8679854848712821, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.8556457127447488.
[I 2025-12-31 00:20:37,427] Trial 1 finished with value: 0.861520644123624 and parameters: {'n_estimators': 1263, 'max_depth': 15, 'min_samples_split': 44, 'min_samples_leaf': 18, 'max_features': 0.7005970753140752, 'class_weight': 'balanced'}. Best is trial 1 with value: 0.861520644123624.
[I 2025-12-31 00:20:53,530] Trial 2 finished with value: 0.8599050379944332 and parameters: {'n_estimators': 744, 'max_depth': 10, 'min_samples_split': 26, 'min_samples_leaf': 1, 'max_features': 0.8636959729674244, 'class_weight': None}. Best is trial 1 with value: 0.861520644123624.
[I 2025-12-31 00

In [53]:
model_tuner.save_model_and_metric(filename = 'rf_opt_fv1', 
                                  model = rf_opt_fv1_model, 
                                  params = rf_opt_fv1_best_params, 
                                  score = rf_opt_fv1_best_score,
                                  dataframe_info = rf_opt_fv1_optuna_results)

{'model': 'RandomForestClassifier', 'params': {'n_estimators': 1339, 'max_depth': 9, 'min_samples_split': 23, 'min_samples_leaf': 6, 'max_features': 0.48726312627554236, 'class_weight': None}, 'score': 0.8657626335102233, 'timestamp': '2025-12-31T00:56:59.693614'}


In [54]:
rf_opt_fv1_model = utils.load_model('rf_opt_fv1')
rf_opt_fv1_model

0,1,2
,n_estimators,1339
,criterion,'gini'
,max_depth,9
,min_samples_split,23
,min_samples_leaf,6
,min_weight_fraction_leaf,0.0
,max_features,0.48726312627554236
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [55]:
model_tuner.calculate_metrics(rf_opt_fv1_model, 
                              test_X_v1, 
                              test_y_v1, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'rf_opt_fv1_test')

{'roc_auc_score': 0.8531505311166327, 'accuracy_score': 0.8665, 'precision_score': 0.7916666666666666, 'recall_score': 0.4668304668304668, 'f1_score': 0.5873261205564142}


### Random Forest - Optuna - feature set: v1_one-hot_encoding (300 trials)

In [54]:
rf_opt_fv1_300trials_model, rf_opt_fv1_300trials_best_params, rf_opt_fv1_300trials_best_score, rf_opt_fv1_300trials_optuna_results = model_tuner.tune_optuna(
    model_class = RandomForestClassifier, 
    X = train_X_v1, 
    y = train_y_v1, 
    param_space_func = random_forest_param_space,
    scoring='roc_auc',
    n_trials = 300,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/300 [00:00<?, ?it/s]

[I 2026-02-22 21:29:41,043] A new study created in memory with name: no-name-fe20a8d5-ef75-4751-b44f-57e41efab49e
[I 2026-02-22 21:29:50,459] Trial 0 finished with value: 0.8556457127447488 and parameters: {'n_estimators': 530, 'max_depth': 11, 'min_samples_split': 23, 'min_samples_leaf': 40, 'max_features': 0.8679854848712821, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.8556457127447488.
[I 2026-02-22 21:30:13,398] Trial 1 finished with value: 0.861520644123624 and parameters: {'n_estimators': 1263, 'max_depth': 15, 'min_samples_split': 44, 'min_samples_leaf': 18, 'max_features': 0.7005970753140752, 'class_weight': 'balanced'}. Best is trial 1 with value: 0.861520644123624.
[I 2026-02-22 21:30:30,341] Trial 2 finished with value: 0.8599050379944332 and parameters: {'n_estimators': 744, 'max_depth': 10, 'min_samples_split': 26, 'min_samples_leaf': 1, 'max_features': 0.8636959729674244, 'class_weight': None}. Best is trial 1 with value: 0.861520644123624.
[I 2026-02-22 21

In [55]:
model_tuner.save_model_and_metric(filename = 'rf_opt_fv1_300trials', 
                                  model = rf_opt_fv1_300trials_model, 
                                  params = rf_opt_fv1_300trials_best_params, 
                                  score = rf_opt_fv1_300trials_best_score,
                                  dataframe_info = rf_opt_fv1_300trials_optuna_results)

{'model': 'RandomForestClassifier', 'params': {'n_estimators': 774, 'max_depth': 9, 'min_samples_split': 12, 'min_samples_leaf': 5, 'max_features': 0.42232499813959085, 'class_weight': None}, 'score': 0.8657698567865089, 'timestamp': '2026-02-22T22:15:29.705650'}


In [56]:
rf_opt_fv1_300trials_model = utils.load_model('rf_opt_fv1_300trials')
rf_opt_fv1_300trials_model

0,1,2
,n_estimators,774
,criterion,'gini'
,max_depth,9
,min_samples_split,12
,min_samples_leaf,5
,min_weight_fraction_leaf,0.0
,max_features,0.42232499813959085
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [58]:
model_tuner.calculate_metrics(rf_opt_fv1_300trials_model, 
                              test_X_v1, 
                              test_y_v1, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'rf_opt_fv1_300trials_test')

{'roc_auc_score': 0.8530286835371581, 'accuracy_score': 0.8665, 'precision_score': 0.7941176470588235, 'recall_score': 0.4643734643734644, 'f1_score': 0.586046511627907}


### NGBoost - Optuna - feature set: v1_one-hot_encoding

In [47]:
ngb_opt_fv1_model, ngb_opt_fv1_best_params, ngb_opt_fv1_best_score, ngb_opt_fv1_optuna_results = model_tuner.tune_optuna(
    model_class = NGBClassifier, 
    X = train_X_v1, 
    y = train_y_v1, 
    param_space_func = ngboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-31 15:25:14,306] A new study created in memory with name: no-name-04bf8cbd-6ed4-4789-9b91-8117af2e36f5
[I 2025-12-31 15:26:31,211] Trial 0 finished with value: 0.859309840028508 and parameters: {'n_estimators': 1207, 'learning_rate': 0.009166080569548896, 'minibatch_frac': 0.6335652954919282, 'base_max_depth': 7, 'base_min_samples_leaf': 15, 'base_min_samples_split': 17, 'base_max_features': 0.8001254913892248}. Best is trial 0 with value: 0.859309840028508.
[I 2025-12-31 15:27:14,956] Trial 1 finished with value: 0.8559567951767777 and parameters: {'n_estimators': 541, 'learning_rate': 0.050252461227493404, 'minibatch_frac': 0.705798108585104, 'base_max_depth': 7, 'base_min_samples_leaf': 14, 'base_min_samples_split': 2, 'base_max_features': 0.9314209942959957}. Best is trial 0 with value: 0.859309840028508.
[I 2025-12-31 15:28:37,503] Trial 2 finished with value: 0.8659658483497221 and parameters: {'n_estimators': 1418, 'learning_rate': 0.002504617745700087, 'minibatch_fra

[iter 0] loss=0.5064 val_loss=0.0000 scale=2.0000 norm=4.0043
[iter 100] loss=0.4126 val_loss=0.0000 scale=2.0000 norm=3.4598
[iter 200] loss=0.3795 val_loss=0.0000 scale=2.0000 norm=3.3819
[iter 300] loss=0.3475 val_loss=0.0000 scale=2.0000 norm=3.3059
[iter 400] loss=0.3282 val_loss=0.0000 scale=2.0000 norm=3.2760
[iter 500] loss=0.3185 val_loss=0.0000 scale=2.0000 norm=3.2846
[iter 600] loss=0.3099 val_loss=0.0000 scale=1.0000 norm=1.6660
[iter 700] loss=0.3026 val_loss=0.0000 scale=1.0000 norm=1.6631
[iter 800] loss=0.2892 val_loss=0.0000 scale=2.0000 norm=3.2380
[iter 900] loss=0.2985 val_loss=0.0000 scale=1.0000 norm=1.6594


In [50]:
model_tuner.save_model_and_metric(filename = 'ngb_opt_fv1', 
                                  model = ngb_opt_fv1_model, 
                                  params = ngb_opt_fv1_best_params, 
                                  score = ngb_opt_fv1_best_score,
                                  dataframe_info = ngb_opt_fv1_optuna_results)

{'model': 'NGBClassifier', 'params': {'n_estimators': 919, 'learning_rate': 0.0019391638801433634, 'minibatch_frac': 0.5585780611073167, 'base_max_depth': 6, 'base_min_samples_leaf': 3, 'base_min_samples_split': 42, 'base_max_features': 0.6296122913657486}, 'score': 0.8682329939998652, 'timestamp': '2025-12-31T16:58:59.155036'}


In [51]:
ngb_opt_fv1_model = utils.load_model('ngb_opt_fv1')
ngb_opt_fv1_model

0,1,2
,Dist,<class 'ngboo....Categorical'>
,Score,<class 'ngboo...res.LogScore'>
,Base,DecisionTreeR...om_state=1234)
,natural_gradient,True
,n_estimators,919
,learning_rate,0.0019391638801433634
,minibatch_frac,0.5585780611073167
,col_sample,1.0
,verbose,True
,random_state,RandomState(M... 0x26F8A0FEB40

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,6
,min_samples_split,42
,min_samples_leaf,3
,min_weight_fraction_leaf,0.0
,max_features,0.6296122913657486
,random_state,1234
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [52]:
model_tuner.calculate_metrics(ngb_opt_fv1_model, 
                              test_X_v1, 
                              test_y_v1, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'ngb_opt_fv1_test')

{'roc_auc_score': 0.8565237039813312, 'accuracy_score': 0.867, 'precision_score': 0.8025751072961373, 'recall_score': 0.4594594594594595, 'f1_score': 0.584375}


### NGBoost - Optuna - feature set: v1_one-hot_encoding (300 trials)

In [39]:
ngb_opt_fv1_300trials_model, ngb_opt_fv1_300trials_best_params, ngb_opt_fv1_300trials_best_score, ngb_opt_fv1_300trials_optuna_results = model_tuner.tune_optuna(
    model_class = NGBClassifier, 
    X = train_X_v1, 
    y = train_y_v1, 
    param_space_func = ngboost_param_space,
    scoring='roc_auc',
    n_trials = 300,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/300 [00:00<?, ?it/s]

[I 2026-02-23 20:27:59,935] A new study created in memory with name: no-name-c33f0c23-df9e-402f-b19c-6e6fe860014e
[I 2026-02-23 20:29:23,824] Trial 0 finished with value: 0.8648366094904218 and parameters: {'n_estimators': 1319, 'learning_rate': 0.005011971183406987, 'minibatch_frac': 0.6352982313850575, 'base_max_depth': 6, 'base_min_samples_leaf': 15, 'base_min_samples_split': 20, 'base_max_features': 0.5869508941279964}. Best is trial 0 with value: 0.8648366094904218.
[I 2026-02-23 20:30:20,004] Trial 1 finished with value: 0.8642390037657348 and parameters: {'n_estimators': 1206, 'learning_rate': 0.005339276944039102, 'minibatch_frac': 0.5716432089093513, 'base_max_depth': 4, 'base_min_samples_leaf': 27, 'base_min_samples_split': 35, 'base_max_features': 0.7281207923045068}. Best is trial 0 with value: 0.8648366094904218.
[I 2026-02-23 20:30:43,095] Trial 2 finished with value: 0.8611642958268725 and parameters: {'n_estimators': 388, 'learning_rate': 0.0010209218864063129, 'minibat

[iter 0] loss=0.5060 val_loss=0.0000 scale=2.0000 norm=4.0022
[iter 100] loss=0.4342 val_loss=0.0000 scale=2.0000 norm=3.5519
[iter 200] loss=0.4063 val_loss=0.0000 scale=2.0000 norm=3.4550
[iter 300] loss=0.3718 val_loss=0.0000 scale=2.0000 norm=3.3377
[iter 400] loss=0.3522 val_loss=0.0000 scale=2.0000 norm=3.2843
[iter 500] loss=0.3404 val_loss=0.0000 scale=2.0000 norm=3.2683
[iter 600] loss=0.3298 val_loss=0.0000 scale=2.0000 norm=3.3057
[iter 700] loss=0.3185 val_loss=0.0000 scale=2.0000 norm=3.2837
[iter 800] loss=0.3020 val_loss=0.0000 scale=2.0000 norm=3.1960
[iter 900] loss=0.3142 val_loss=0.0000 scale=1.0000 norm=1.6556
[iter 1000] loss=0.2999 val_loss=0.0000 scale=1.0000 norm=1.6380
[iter 1100] loss=0.2912 val_loss=0.0000 scale=1.0000 norm=1.5987
[iter 1200] loss=0.2986 val_loss=0.0000 scale=1.0000 norm=1.6660
[iter 1300] loss=0.2935 val_loss=0.0000 scale=1.0000 norm=1.6422


In [40]:
model_tuner.save_model_and_metric(filename = 'ngb_opt_fv1_300trials', 
                                  model = ngb_opt_fv1_300trials_model, 
                                  params = ngb_opt_fv1_300trials_best_params, 
                                  score = ngb_opt_fv1_300trials_best_score,
                                  dataframe_info = ngb_opt_fv1_300trials_optuna_results)

{'model': 'NGBClassifier', 'params': {'n_estimators': 1385, 'learning_rate': 0.0012656589601070013, 'minibatch_frac': 0.5372883081587808, 'base_max_depth': 6, 'base_min_samples_leaf': 5, 'base_min_samples_split': 10, 'base_max_features': 0.627008043808254}, 'score': 0.867970066743073, 'timestamp': '2026-02-23T23:17:45.591711'}


In [41]:
ngb_opt_fv1_300trials_model = utils.load_model('ngb_opt_fv1_300trials')
ngb_opt_fv1_300trials_model

0,1,2
,Dist,<class 'ngboo....Categorical'>
,Score,<class 'ngboo...res.LogScore'>
,Base,DecisionTreeR...om_state=1234)
,natural_gradient,True
,n_estimators,1385
,learning_rate,0.0012656589601070013
,minibatch_frac,0.5372883081587808
,col_sample,1.0
,verbose,True
,random_state,RandomState(M... 0x1E03D600740

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,6
,min_samples_split,10
,min_samples_leaf,5
,min_weight_fraction_leaf,0.0
,max_features,0.627008043808254
,random_state,1234
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [42]:
model_tuner.calculate_metrics(ngb_opt_fv1_300trials_model, 
                              test_X_v1, 
                              test_y_v1, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'ngb_opt_fv1_300trials_test')

{'roc_auc_score': 0.8570882130204165, 'accuracy_score': 0.865, 'precision_score': 0.7991266375545851, 'recall_score': 0.44963144963144963, 'f1_score': 0.5754716981132075}


## B - Feature sets: v3 e v4 (Numéricas com normalização e produtos entre variáveis e Categóricas com one-hot encoding (exceto Catboost)

### XGBoost - Optuna - feature set: v3_one-hot_encoding_plus_normalizacao_plus_poly

In [68]:
xgb_opt_fv3_model, xgb_opt_fv3_best_params, xgb_opt_fv3_best_score, xgb_opt_fv3_optuna_results = model_tuner.tune_optuna(
    model_class = XGBClassifier, 
    X = train_X_v3, 
    y = train_y_v3, 
    param_space_func = xgboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-31 17:54:10,149] A new study created in memory with name: no-name-9c926822-efd3-41be-b402-5f175e5927c7
[I 2025-12-31 17:54:37,255] Trial 0 finished with value: 0.8492579287496028 and parameters: {'learning_rate': 0.0029813770307052724, 'max_depth': 8, 'n_estimators': 1594, 'reg_lambda': 13.849507661133075, 'reg_alpha': 1.3179630432958698, 'gamma': 1.3629630264132082, 'min_child_weight': 0.06751383304677265, 'subsample': 0.9207488710140077, 'colsample_bytree': 0.983255741473482, 'scale_pos_weight': 6.895799670211798, 'early_stopping_rounds': 68}. Best is trial 0 with value: 0.8492579287496028.
[I 2025-12-31 17:54:45,131] Trial 1 finished with value: 0.8544969710394776 and parameters: {'learning_rate': 0.017419098458624965, 'max_depth': 8, 'n_estimators': 2282, 'reg_lambda': 0.3026934470902619, 'reg_alpha': 0.1757052524413466, 'gamma': 2.5154158265390487, 'min_child_weight': 0.010997788128340768, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'scale_p

In [69]:
model_tuner.save_model_and_metric(filename = 'xgb_opt_fv3', 
                                  model = xgb_opt_fv3_model, 
                                  params = xgb_opt_fv3_best_params, 
                                  score = xgb_opt_fv3_best_score,
                                  dataframe_info = xgb_opt_fv3_optuna_results)

{'model': 'XGBClassifier', 'params': {'learning_rate': 0.028914701172905902, 'max_depth': 5, 'n_estimators': 2941, 'reg_lambda': 0.9973732870012123, 'reg_alpha': 0.003272456658044637, 'gamma': 4.991390367590288, 'min_child_weight': 0.6837286170194612, 'subsample': 0.6002134766115784, 'colsample_bytree': 0.6009784065217589, 'scale_pos_weight': 0.6746868968434129, 'early_stopping_rounds': 82}, 'score': 0.8684328379770975, 'timestamp': '2025-12-31T18:02:34.632275'}


In [70]:
xgb_opt_fv3_model = utils.load_model('xgb_opt_fv3')
xgb_opt_fv3_model

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.6009784065217589
,device,
,early_stopping_rounds,
,enable_categorical,False


In [71]:
model_tuner.calculate_metrics(xgb_opt_fv3_model, 
                              test_X_v3, 
                              test_y_v3, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'xgb_opt_fv3_test')

{'roc_auc_score': 0.8584054007782822, 'accuracy_score': 0.8665, 'precision_score': 0.8240740740740741, 'recall_score': 0.43734643734643736, 'f1_score': 0.5714285714285714}


### Catboost - Optuna - feature set: v4_normalizacao_plus_poly

In [74]:
cat_opt_fv4_model, cat_opt_fv4_best_params, cat_opt_fv4_best_score, cat_opt_fv4_optuna_results = model_tuner.tune_optuna(
    model_class = CatBoostClassifier, 
    X = train_X_v4, 
    y = train_y_v4, 
    param_space_func = catboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)

Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-31 18:13:53,451] A new study created in memory with name: no-name-95a3e096-acf9-4db0-af01-c81119dbcfc6
[I 2025-12-31 18:18:15,145] Trial 0 finished with value: 0.86193285242365 and parameters: {'learning_rate': 0.0029813770307052724, 'depth': 8, 'iterations': 1594, 'l2_leaf_reg': 13.849507661133075, 'random_strength': 3.8998790405940174, 'bagging_temperature': 1.3629630264132082, 'auto_class_weights': 'Balanced', 'early_stopping_rounds': 98}. Best is trial 0 with value: 0.86193285242365.
[I 2025-12-31 18:18:33,874] Trial 1 finished with value: 0.867371497914881 and parameters: {'learning_rate': 0.14783979348659035, 'depth': 6, 'iterations': 1752, 'l2_leaf_reg': 5.4181589388967515, 'random_strength': 3.563510134914501, 'bagging_temperature': 1.8512537739519748, 'auto_class_weights': 'None', 'early_stopping_rounds': 50}. Best is trial 1 with value: 0.867371497914881.
[I 2025-12-31 18:19:14,252] Trial 2 finished with value: 0.8664006895821093 and parameters: {'learning_rate': 0

0:	learn: 0.6616636	total: 18.5ms	remaining: 27.2s
1:	learn: 0.6343374	total: 38.4ms	remaining: 28.2s
2:	learn: 0.6071810	total: 60.5ms	remaining: 29.6s
3:	learn: 0.5823601	total: 82.7ms	remaining: 30.3s
4:	learn: 0.5620282	total: 101ms	remaining: 29.6s
5:	learn: 0.5454701	total: 122ms	remaining: 29.8s
6:	learn: 0.5280074	total: 144ms	remaining: 30.1s
7:	learn: 0.5117499	total: 165ms	remaining: 30.3s
8:	learn: 0.4989425	total: 188ms	remaining: 30.6s
9:	learn: 0.4857603	total: 210ms	remaining: 30.7s
10:	learn: 0.4757841	total: 234ms	remaining: 31.1s
11:	learn: 0.4649903	total: 255ms	remaining: 31.1s
12:	learn: 0.4566461	total: 278ms	remaining: 31.2s
13:	learn: 0.4479526	total: 300ms	remaining: 31.2s
14:	learn: 0.4389745	total: 322ms	remaining: 31.3s
15:	learn: 0.4308474	total: 343ms	remaining: 31.2s
16:	learn: 0.4239515	total: 365ms	remaining: 31.2s
17:	learn: 0.4189776	total: 386ms	remaining: 31.1s
18:	learn: 0.4138552	total: 409ms	remaining: 31.2s
19:	learn: 0.4076773	total: 434ms	rem

In [75]:
model_tuner.save_model_and_metric(filename = 'cat_opt_fv4', 
                                  model = cat_opt_fv4_model, 
                                  params = cat_opt_fv4_best_params, 
                                  score = cat_opt_fv4_best_score,
                                  dataframe_info = cat_opt_fv4_optuna_results)

{'model': 'CatBoostClassifier', 'params': {'learning_rate': 0.04155689845625358, 'depth': 4, 'iterations': 1471, 'l2_leaf_reg': 0.04510928406554372, 'random_strength': 0.7298172152062612, 'bagging_temperature': 3.6804166566407863, 'auto_class_weights': 'None', 'early_stopping_rounds': 82}, 'score': 0.8728397588388825, 'timestamp': '2025-12-31T20:42:53.171226'}


In [76]:
cat_opt_fv4_model = utils.load_model('cat_opt_fv4')
cat_opt_fv4_model

<catboost.core.CatBoostClassifier at 0x26f9e0a1040>

In [77]:
model_tuner.calculate_metrics(cat_opt_fv4_model, 
                              test_X_v4, 
                              test_y_v4, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'cat_opt_fv4_test')

{'roc_auc_score': 0.8545093629839392, 'accuracy_score': 0.8625, 'precision_score': 0.7391304347826086, 'recall_score': 0.5012285012285013, 'f1_score': 0.5973645680819912}


### LightGBM- Optuna - feature set: v3_one-hot_encoding_plus_normalizacao_plus_poly

In [85]:
lgb_opt_fv3_model, lgb_opt_fv3_best_params, lgb_opt_fv3_best_score, lgb_opt_fv3_optuna_results = model_tuner.tune_optuna(
    model_class = LGBMClassifier, 
    X = train_X_v3, 
    y = train_y_v3, 
    param_space_func = lightgbm_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-31 20:51:55,235] A new study created in memory with name: no-name-dd7e11bc-b586-4604-b140-51535884f87f
[I 2025-12-31 20:52:06,181] Trial 0 finished with value: 0.8538776954859338 and parameters: {'learning_rate': 0.0029813770307052724, 'num_leaves': 165, 'max_depth': 7, 'n_estimators': 2464, 'min_child_samples': 158, 'min_child_weight': 0.012313185468743897, 'subsample': 0.7105857020572387, 'colsample_bytree': 0.9207488710140077, 'reg_lambda': 68.00759466734245, 'reg_alpha': 3.189558257433924, 'scale_pos_weight': 1.4604981309148681, 'early_stopping_rounds': 75}. Best is trial 0 with value: 0.8538776954859338.
[I 2025-12-31 20:52:07,746] Trial 1 finished with value: 0.8619080525084032 and parameters: {'learning_rate': 0.04931971559288606, 'num_leaves': 187, 'max_depth': 6, 'n_estimators': 1903, 'min_child_samples': 106, 'min_child_weight': 0.0011352037010541887, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'reg_lambda': 0.288100449320714, 'reg_alph

In [86]:
model_tuner.save_model_and_metric(filename = 'lgb_opt_fv3', 
                                  model = lgb_opt_fv3_model, 
                                  params = lgb_opt_fv3_best_params, 
                                  score = lgb_opt_fv3_best_score,
                                  dataframe_info = lgb_opt_fv3_optuna_results)

{'model': 'LGBMClassifier', 'params': {'learning_rate': 0.07536878912888248, 'num_leaves': 82, 'max_depth': 5, 'n_estimators': 652, 'min_child_samples': 59, 'min_child_weight': 0.004579615133130554, 'subsample': 0.7999212004193511, 'colsample_bytree': 0.751933847977377, 'reg_lambda': 84.97107096585827, 'reg_alpha': 0.0024935323644191703, 'scale_pos_weight': 3.0742724365966536, 'early_stopping_rounds': 73}, 'score': 0.8670281515154434, 'timestamp': '2025-12-31T21:17:14.146386'}


In [87]:
lgb_opt_fv3_model = utils.load_model('lgb_opt_fv3')
lgb_opt_fv3_model

0,1,2
,boosting_type,'gbdt'
,num_leaves,82
,max_depth,5
,learning_rate,0.07536878912888248
,n_estimators,652
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.004579615133130554


In [88]:
model_tuner.calculate_metrics(lgb_opt_fv3_model, 
                              test_X_v3, 
                              test_y_v3, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'lgb_opt_fv3_test')

{'roc_auc_score': 0.8443744206456072, 'accuracy_score': 0.8225, 'precision_score': 0.5548523206751055, 'recall_score': 0.6461916461916462, 'f1_score': 0.5970488081725313}


### Random Forest - Optuna - feature set: v3_one-hot_encoding_plus_normalizacao_plus_poly

In [24]:
rf_opt_fv3_model, rf_opt_fv3_best_params, rf_opt_fv3_best_score, rf_opt_fv3_optuna_results = model_tuner.tune_optuna(
    model_class = RandomForestClassifier, 
    X = train_X_v3, 
    y = train_y_v3, 
    param_space_func = random_forest_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2026-01-01 10:41:08,761] A new study created in memory with name: no-name-ac69a554-2182-40a7-8774-b9694cc7b16f
[I 2026-01-01 10:41:53,542] Trial 0 finished with value: 0.8545280311275052 and parameters: {'n_estimators': 530, 'max_depth': 11, 'min_samples_split': 23, 'min_samples_leaf': 40, 'max_features': 0.8679854848712821, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.8545280311275052.
[I 2026-01-01 10:43:27,940] Trial 1 finished with value: 0.8548771561479713 and parameters: {'n_estimators': 1263, 'max_depth': 15, 'min_samples_split': 44, 'min_samples_leaf': 18, 'max_features': 0.7005970753140752, 'class_weight': 'balanced'}. Best is trial 1 with value: 0.8548771561479713.
[I 2026-01-01 10:44:47,977] Trial 2 finished with value: 0.8512770752472768 and parameters: {'n_estimators': 744, 'max_depth': 10, 'min_samples_split': 26, 'min_samples_leaf': 1, 'max_features': 0.8636959729674244, 'class_weight': None}. Best is trial 1 with value: 0.8548771561479713.
[I 2026-01-01

In [25]:
model_tuner.save_model_and_metric(filename = 'rf_opt_fv3', 
                                  model = rf_opt_fv3_model, 
                                  params = rf_opt_fv3_best_params, 
                                  score = rf_opt_fv3_best_score,
                                  dataframe_info = rf_opt_fv3_optuna_results)

{'model': 'RandomForestClassifier', 'params': {'n_estimators': 857, 'max_depth': 7, 'min_samples_split': 19, 'min_samples_leaf': 19, 'max_features': 0.45026650531253715, 'class_weight': None}, 'score': 0.8600928431778565, 'timestamp': '2026-01-01T12:12:03.339250'}


In [26]:
rf_opt_fv3_model = utils.load_model('rf_opt_fv3')
rf_opt_fv3_model

0,1,2
,n_estimators,857
,criterion,'gini'
,max_depth,7
,min_samples_split,19
,min_samples_leaf,19
,min_weight_fraction_leaf,0.0
,max_features,0.45026650531253715
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [27]:
model_tuner.calculate_metrics(rf_opt_fv3_model, 
                              test_X_v3, 
                              test_y_v3, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'rf_opt_fv3_test')

{'roc_auc_score': 0.8462561174425581, 'accuracy_score': 0.864, 'precision_score': 0.766798418972332, 'recall_score': 0.47665847665847666, 'f1_score': 0.5878787878787879}


### NGBoost - Optuna - feature set: v3_one-hot_encoding_plus_normalizacao_plus_poly

In [28]:
ngb_opt_fv3_model, ngb_opt_fv3_best_params, ngb_opt_fv3_best_score, ngb_opt_fv3_optuna_results = model_tuner.tune_optuna(
    model_class = NGBClassifier, 
    X = train_X_v3, 
    y = train_y_v3, 
    param_space_func = ngboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2026-01-01 12:14:04,759] A new study created in memory with name: no-name-6ba365f4-3f26-405b-9a22-774379edb80d
[I 2026-01-01 12:19:18,990] Trial 0 finished with value: 0.8463194999566603 and parameters: {'n_estimators': 1472, 'learning_rate': 0.03925879663657639, 'minibatch_frac': 0.8657241282022083, 'base_max_depth': 5, 'base_min_samples_leaf': 12, 'base_min_samples_split': 35, 'base_max_features': 0.5560145299407964}. Best is trial 0 with value: 0.8463194999566603.
[I 2026-01-01 12:23:21,138] Trial 1 finished with value: 0.8515448180215929 and parameters: {'n_estimators': 1078, 'learning_rate': 0.011433986439392906, 'minibatch_frac': 0.9496870062973557, 'base_max_depth': 8, 'base_min_samples_leaf': 38, 'base_min_samples_split': 20, 'base_max_features': 0.40038950108918625}. Best is trial 1 with value: 0.8515448180215929.
[I 2026-01-01 12:26:09,293] Trial 2 finished with value: 0.8556091148115688 and parameters: {'n_estimators': 726, 'learning_rate': 0.0016695019675157065, 'minibat

[iter 0] loss=0.5041 val_loss=0.0000 scale=2.0000 norm=3.9923
[iter 100] loss=0.4248 val_loss=0.0000 scale=2.0000 norm=3.5263
[iter 200] loss=0.3993 val_loss=0.0000 scale=2.0000 norm=3.4661
[iter 300] loss=0.3683 val_loss=0.0000 scale=2.0000 norm=3.3909
[iter 400] loss=0.3551 val_loss=0.0000 scale=2.0000 norm=3.3944
[iter 500] loss=0.3438 val_loss=0.0000 scale=2.0000 norm=3.3775
[iter 600] loss=0.3375 val_loss=0.0000 scale=1.0000 norm=1.7322
[iter 700] loss=0.3323 val_loss=0.0000 scale=1.0000 norm=1.7429
[iter 800] loss=0.3179 val_loss=0.0000 scale=1.0000 norm=1.6793
[iter 900] loss=0.3346 val_loss=0.0000 scale=1.0000 norm=1.7642
[iter 1000] loss=0.3248 val_loss=0.0000 scale=1.0000 norm=1.7673
[iter 1100] loss=0.3122 val_loss=0.0000 scale=1.0000 norm=1.6908
[iter 1200] loss=0.3245 val_loss=0.0000 scale=1.0000 norm=1.7901
[iter 1300] loss=0.3165 val_loss=0.0000 scale=2.0000 norm=3.5089
[iter 1400] loss=0.3146 val_loss=0.0000 scale=1.0000 norm=1.7696


In [29]:
model_tuner.save_model_and_metric(filename = 'ngb_opt_fv3', 
                                  model = ngb_opt_fv3_model, 
                                  params = ngb_opt_fv3_best_params, 
                                  score = ngb_opt_fv3_best_score,
                                  dataframe_info = ngb_opt_fv3_optuna_results)

{'model': 'NGBClassifier', 'params': {'n_estimators': 1402, 'learning_rate': 0.001881601915025182, 'minibatch_frac': 0.5206244938896017, 'base_max_depth': 4, 'base_min_samples_leaf': 34, 'base_min_samples_split': 39, 'base_max_features': 0.7937172680487597}, 'score': 0.8635267887239841, 'timestamp': '2026-01-01T17:54:28.710413'}


In [30]:
ngb_opt_fv3_model = utils.load_model('ngb_opt_fv3')
ngb_opt_fv3_model

0,1,2
,Dist,<class 'ngboo....Categorical'>
,Score,<class 'ngboo...res.LogScore'>
,Base,DecisionTreeR...om_state=1234)
,natural_gradient,True
,n_estimators,1402
,learning_rate,0.001881601915025182
,minibatch_frac,0.5206244938896017
,col_sample,1.0
,verbose,True
,random_state,RandomState(M... 0x213B4DD2840

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,4
,min_samples_split,39
,min_samples_leaf,34
,min_weight_fraction_leaf,0.0
,max_features,0.7937172680487597
,random_state,1234
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [31]:
model_tuner.calculate_metrics(ngb_opt_fv3_model, 
                              test_X_v3, 
                              test_y_v3, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'ngb_opt_fv3_test')

{'roc_auc_score': 0.8536641417997352, 'accuracy_score': 0.8655, 'precision_score': 0.7804878048780488, 'recall_score': 0.47174447174447176, 'f1_score': 0.5880551301684533}


## C - Feature sets: v5 (Numéricas com produtos entre variáveis e Categóricas com one-hot encoding (exceto Catboost)

### XGBoost - Optuna - feature set: v5_one-hot_encoding_plus_poly

In [37]:
xgb_opt_fv5_model, xgb_opt_fv5_best_params, xgb_opt_fv5_best_score, xgb_opt_fv5_optuna_results = model_tuner.tune_optuna(
    model_class = XGBClassifier, 
    X = train_X_v5, 
    y = train_y_v5, 
    param_space_func = xgboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2026-01-01 18:01:33,509] A new study created in memory with name: no-name-4959bd1b-c474-4667-90e0-25caa6fd1dcd
[I 2026-01-01 18:01:55,928] Trial 0 finished with value: 0.8543450414616057 and parameters: {'learning_rate': 0.0029813770307052724, 'max_depth': 8, 'n_estimators': 1594, 'reg_lambda': 13.849507661133075, 'reg_alpha': 1.3179630432958698, 'gamma': 1.3629630264132082, 'min_child_weight': 0.06751383304677265, 'subsample': 0.9207488710140077, 'colsample_bytree': 0.983255741473482, 'scale_pos_weight': 6.895799670211798, 'early_stopping_rounds': 68}. Best is trial 0 with value: 0.8543450414616057.
[I 2026-01-01 18:02:02,180] Trial 1 finished with value: 0.8576614883801561 and parameters: {'learning_rate': 0.017419098458624965, 'max_depth': 8, 'n_estimators': 2282, 'reg_lambda': 0.3026934470902619, 'reg_alpha': 0.1757052524413466, 'gamma': 2.5154158265390487, 'min_child_weight': 0.010997788128340768, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'scale_p

In [38]:
model_tuner.save_model_and_metric(filename = 'xgb_opt_fv5', 
                                  model = xgb_opt_fv5_model, 
                                  params = xgb_opt_fv5_best_params, 
                                  score = xgb_opt_fv5_best_score,
                                  dataframe_info = xgb_opt_fv5_optuna_results)

{'model': 'XGBClassifier', 'params': {'learning_rate': 0.03242469376722362, 'max_depth': 4, 'n_estimators': 1304, 'reg_lambda': 0.0605745136269652, 'reg_alpha': 0.004603575867916167, 'gamma': 3.118719664867648, 'min_child_weight': 2.722766837769076, 'subsample': 0.6131356241657105, 'colsample_bytree': 0.6704585423214244, 'scale_pos_weight': 1.4172857771493428, 'early_stopping_rounds': 94}, 'score': 0.8679522493282352, 'timestamp': '2026-01-01T18:09:53.744997'}


In [39]:
xgb_opt_fv5_model = utils.load_model('xgb_opt_fv5')
xgb_opt_fv5_model

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.6704585423214244
,device,
,early_stopping_rounds,
,enable_categorical,False


In [40]:
model_tuner.calculate_metrics(xgb_opt_fv5_model, 
                              test_X_v5, 
                              test_y_v5, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'xgb_opt_fv5_test')

{'roc_auc_score': 0.8517469703910382, 'accuracy_score': 0.8565, 'precision_score': 0.6818181818181818, 'recall_score': 0.5528255528255528, 'f1_score': 0.6105834464043419}


## D - Feature sets: v6 e v7 (Numéricas com produtos entre 3 variáveis mais importantes e Categóricas com one-hot encoding (exceto Catboost)

### XGBoost - Optuna - feature set: v6_one-hot_encoding_plus_top3_poly

In [10]:
xgb_opt_fv6_model, xgb_opt_fv6_best_params, xgb_opt_fv6_best_score, xgb_opt_fv6_optuna_results = model_tuner.tune_optuna(
    model_class = XGBClassifier, 
    X = train_X_v6, 
    y = train_y_v6, 
    param_space_func = xgboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2026-01-04 11:48:36,960] A new study created in memory with name: no-name-7842d446-dfd3-4774-887a-536e4343be77
[I 2026-01-04 11:48:44,310] Trial 0 finished with value: 0.8590681010488197 and parameters: {'learning_rate': 0.0029813770307052724, 'max_depth': 8, 'n_estimators': 1594, 'reg_lambda': 13.849507661133075, 'reg_alpha': 1.3179630432958698, 'gamma': 1.3629630264132082, 'min_child_weight': 0.06751383304677265, 'subsample': 0.9207488710140077, 'colsample_bytree': 0.983255741473482, 'scale_pos_weight': 6.895799670211798, 'early_stopping_rounds': 68}. Best is trial 0 with value: 0.8590681010488197.
[I 2026-01-04 11:48:46,607] Trial 1 finished with value: 0.8606904489025435 and parameters: {'learning_rate': 0.017419098458624965, 'max_depth': 8, 'n_estimators': 2282, 'reg_lambda': 0.3026934470902619, 'reg_alpha': 0.1757052524413466, 'gamma': 2.5154158265390487, 'min_child_weight': 0.010997788128340768, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'scale_p

In [11]:
model_tuner.save_model_and_metric(filename = 'xgb_opt_fv6', 
                                  model = xgb_opt_fv6_model, 
                                  params = xgb_opt_fv6_best_params, 
                                  score = xgb_opt_fv6_best_score,
                                  dataframe_info = xgb_opt_fv6_optuna_results)

{'model': 'XGBClassifier', 'params': {'learning_rate': 0.10407588529262211, 'max_depth': 4, 'n_estimators': 1501, 'reg_lambda': 0.4395315335350399, 'reg_alpha': 0.015085931306582402, 'gamma': 1.346001473085891, 'min_child_weight': 0.010026731753896958, 'subsample': 0.6235858953012439, 'colsample_bytree': 0.727021007031989, 'scale_pos_weight': 2.2363586997580804, 'early_stopping_rounds': 85}, 'score': 0.8695170517475512, 'timestamp': '2026-01-04T11:56:47.152161'}


In [12]:
xgb_opt_fv6_model = utils.load_model('xgb_opt_fv6')
xgb_opt_fv6_model

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.727021007031989
,device,
,early_stopping_rounds,
,enable_categorical,False


In [13]:
model_tuner.calculate_metrics(xgb_opt_fv6_model, 
                              test_X_v6, 
                              test_y_v6, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'xgb_opt_fv6_test')

{'roc_auc_score': 0.8242310106716886, 'accuracy_score': 0.839, 'precision_score': 0.609254498714653, 'recall_score': 0.5823095823095823, 'f1_score': 0.5954773869346733}


### LightGBM - Optuna - feature set: v6_one-hot_encoding_plus_top3_poly

In [22]:
lgb_opt_fv6_model, lgb_opt_fv6_best_params, lgb_opt_fv6_best_score, lgb_opt_fv6_optuna_results = model_tuner.tune_optuna(
    model_class = LGBMClassifier, 
    X = train_X_v6, 
    y = train_y_v6, 
    param_space_func = lightgbm_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2026-01-04 12:02:46,357] A new study created in memory with name: no-name-363cd714-b7fa-4dc9-8ac9-49ed87eecd9f
[I 2026-01-04 12:03:02,180] Trial 0 finished with value: 0.8638614671918792 and parameters: {'learning_rate': 0.0029813770307052724, 'num_leaves': 165, 'max_depth': 7, 'n_estimators': 2464, 'min_child_samples': 158, 'min_child_weight': 0.012313185468743897, 'subsample': 0.7105857020572387, 'colsample_bytree': 0.9207488710140077, 'reg_lambda': 68.00759466734245, 'reg_alpha': 3.189558257433924, 'scale_pos_weight': 1.4604981309148681, 'early_stopping_rounds': 75}. Best is trial 0 with value: 0.8638614671918792.
[I 2026-01-04 12:03:03,444] Trial 1 finished with value: 0.8644260866215292 and parameters: {'learning_rate': 0.04931971559288606, 'num_leaves': 187, 'max_depth': 6, 'n_estimators': 1903, 'min_child_samples': 106, 'min_child_weight': 0.0011352037010541887, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'reg_lambda': 0.288100449320714, 'reg_alph

In [23]:
model_tuner.save_model_and_metric(filename = 'lgb_opt_fv6', 
                                  model = lgb_opt_fv6_model, 
                                  params = lgb_opt_fv6_best_params, 
                                  score = lgb_opt_fv6_best_score,
                                  dataframe_info = lgb_opt_fv6_optuna_results)

{'model': 'LGBMClassifier', 'params': {'learning_rate': 0.20391446156741475, 'num_leaves': 215, 'max_depth': 4, 'n_estimators': 2377, 'min_child_samples': 81, 'min_child_weight': 0.001450448624740765, 'subsample': 0.6744446871022076, 'colsample_bytree': 0.6039912129877285, 'reg_lambda': 2.6312000215361495, 'reg_alpha': 0.3919174548497089, 'scale_pos_weight': 1.2161961453539707, 'early_stopping_rounds': 99}, 'score': 0.8694977896774567, 'timestamp': '2026-01-04T12:09:30.941828'}


In [24]:
lgb_opt_fv6_model = utils.load_model('lgb_opt_fv6')
lgb_opt_fv6_model

0,1,2
,boosting_type,'gbdt'
,num_leaves,215
,max_depth,4
,learning_rate,0.20391446156741475
,n_estimators,2377
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.001450448624740765


In [25]:
model_tuner.calculate_metrics(lgb_opt_fv6_model, 
                              test_X_v6, 
                              test_y_v6, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'lgb_opt_fv6_test')

{'roc_auc_score': 0.8107491158338616, 'accuracy_score': 0.838, 'precision_score': 0.6209912536443148, 'recall_score': 0.5233415233415234, 'f1_score': 0.568}


### Random Forest - Optuna - feature set: v6_one-hot_encoding_plus_top3_poly

In [26]:
rf_opt_fv6_model, rf_opt_fv6_best_params, rf_opt_fv6_best_score, rf_opt_fv6_optuna_results = model_tuner.tune_optuna(
    model_class = RandomForestClassifier, 
    X = train_X_v6, 
    y = train_y_v6, 
    param_space_func = random_forest_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2026-01-04 12:14:46,968] A new study created in memory with name: no-name-bf0ab23e-8852-49f8-a508-69bae0dfcfeb
[I 2026-01-04 12:14:59,232] Trial 0 finished with value: 0.8584021149752964 and parameters: {'n_estimators': 530, 'max_depth': 11, 'min_samples_split': 23, 'min_samples_leaf': 40, 'max_features': 0.8679854848712821, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.8584021149752964.
[I 2026-01-04 12:15:24,969] Trial 1 finished with value: 0.8609938265065346 and parameters: {'n_estimators': 1263, 'max_depth': 15, 'min_samples_split': 44, 'min_samples_leaf': 18, 'max_features': 0.7005970753140752, 'class_weight': 'balanced'}. Best is trial 1 with value: 0.8609938265065346.
[I 2026-01-04 12:15:44,093] Trial 2 finished with value: 0.8597374579846095 and parameters: {'n_estimators': 744, 'max_depth': 10, 'min_samples_split': 26, 'min_samples_leaf': 1, 'max_features': 0.8636959729674244, 'class_weight': None}. Best is trial 1 with value: 0.8609938265065346.
[I 2026-01-04

In [27]:
model_tuner.save_model_and_metric(filename = 'rf_opt_fv6', 
                                  model = rf_opt_fv6_model, 
                                  params = rf_opt_fv6_best_params, 
                                  score = rf_opt_fv6_best_score,
                                  dataframe_info = rf_opt_fv6_optuna_results)

{'model': 'RandomForestClassifier', 'params': {'n_estimators': 1062, 'max_depth': 8, 'min_samples_split': 42, 'min_samples_leaf': 1, 'max_features': 0.4354679082516301, 'class_weight': None}, 'score': 0.8649329198408953, 'timestamp': '2026-01-04T12:41:45.716180'}


In [28]:
rf_opt_fv6_model = utils.load_model('rf_opt_fv6')
rf_opt_fv6_model

0,1,2
,n_estimators,1062
,criterion,'gini'
,max_depth,8
,min_samples_split,42
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,0.4354679082516301
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [29]:
model_tuner.calculate_metrics(rf_opt_fv6_model, 
                              test_X_v6, 
                              test_y_v6, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'rf_opt_fv6_test')

{'roc_auc_score': 0.8488889505838659, 'accuracy_score': 0.867, 'precision_score': 0.7722007722007722, 'recall_score': 0.4914004914004914, 'f1_score': 0.6006006006006006}


### NGBoost - Optuna - feature set: v6_one-hot_encoding_plus_top3_poly

In [30]:
ngb_opt_fv6_model, ngb_opt_fv6_best_params, ngb_opt_fv6_best_score, ngb_opt_fv6_optuna_results = model_tuner.tune_optuna(
    model_class = NGBClassifier, 
    X = train_X_v6, 
    y = train_y_v6, 
    param_space_func = ngboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2026-01-04 12:45:08,843] A new study created in memory with name: no-name-0ebcc9ba-ef30-4b20-9597-221af52cbbdc
[I 2026-01-04 12:46:17,731] Trial 0 finished with value: 0.8508889445348691 and parameters: {'n_estimators': 1136, 'learning_rate': 0.0291712316039178, 'minibatch_frac': 0.5211007369688627, 'base_max_depth': 8, 'base_min_samples_leaf': 46, 'base_min_samples_split': 5, 'base_max_features': 0.4595896622865756}. Best is trial 0 with value: 0.8508889445348691.
[I 2026-01-04 12:47:43,670] Trial 1 finished with value: 0.8529634694840654 and parameters: {'n_estimators': 747, 'learning_rate': 0.024894520532258, 'minibatch_frac': 0.7251285591944875, 'base_max_depth': 10, 'base_min_samples_leaf': 22, 'base_min_samples_split': 39, 'base_max_features': 0.7767374353691181}. Best is trial 1 with value: 0.8529634694840654.
[I 2026-01-04 12:49:06,820] Trial 2 finished with value: 0.860553206653119 and parameters: {'n_estimators': 910, 'learning_rate': 0.0010201046406077088, 'minibatch_frac

[iter 0] loss=0.5088 val_loss=0.0000 scale=2.0000 norm=4.0173
[iter 100] loss=0.4337 val_loss=0.0000 scale=2.0000 norm=3.5619
[iter 200] loss=0.4010 val_loss=0.0000 scale=2.0000 norm=3.4455
[iter 300] loss=0.3740 val_loss=0.0000 scale=2.0000 norm=3.3790
[iter 400] loss=0.3554 val_loss=0.0000 scale=2.0000 norm=3.3458
[iter 500] loss=0.3453 val_loss=0.0000 scale=2.0000 norm=3.3440
[iter 600] loss=0.3318 val_loss=0.0000 scale=2.0000 norm=3.3694
[iter 700] loss=0.3290 val_loss=0.0000 scale=2.0000 norm=3.4049
[iter 800] loss=0.3118 val_loss=0.0000 scale=2.0000 norm=3.3081
[iter 900] loss=0.3244 val_loss=0.0000 scale=1.0000 norm=1.7183
[iter 1000] loss=0.3134 val_loss=0.0000 scale=2.0000 norm=3.4222
[iter 1100] loss=0.3077 val_loss=0.0000 scale=1.0000 norm=1.6719
[iter 1200] loss=0.3164 val_loss=0.0000 scale=1.0000 norm=1.7414


In [31]:
model_tuner.save_model_and_metric(filename = 'ngb_opt_fv6', 
                                  model = ngb_opt_fv6_model, 
                                  params = ngb_opt_fv6_best_params, 
                                  score = ngb_opt_fv6_best_score,
                                  dataframe_info = ngb_opt_fv6_optuna_results)


{'model': 'NGBClassifier', 'params': {'n_estimators': 1205, 'learning_rate': 0.001490972463180535, 'minibatch_frac': 0.5925073195879553, 'base_max_depth': 5, 'base_min_samples_leaf': 7, 'base_min_samples_split': 33, 'base_max_features': 0.491013723565043}, 'score': 0.866283190954532, 'timestamp': '2026-01-04T15:19:49.147909'}


In [32]:
ngb_opt_fv6_model = utils.load_model('ngb_opt_fv6')
ngb_opt_fv6_model

0,1,2
,Dist,<class 'ngboo....Categorical'>
,Score,<class 'ngboo...res.LogScore'>
,Base,DecisionTreeR...om_state=1234)
,natural_gradient,True
,n_estimators,1205
,learning_rate,0.001490972463180535
,minibatch_frac,0.5925073195879553
,col_sample,1.0
,verbose,True
,random_state,RandomState(M... 0x1B7CEF9CF40

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,5
,min_samples_split,33
,min_samples_leaf,7
,min_weight_fraction_leaf,0.0
,max_features,0.491013723565043
,random_state,1234
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [33]:
model_tuner.calculate_metrics(ngb_opt_fv6_model, 
                              test_X_v6, 
                              test_y_v6, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'ngb_opt_fv6_test')

{'roc_auc_score': 0.8564650937532293, 'accuracy_score': 0.8625, 'precision_score': 0.7773109243697479, 'recall_score': 0.45454545454545453, 'f1_score': 0.5736434108527132}


### Catboost - Optuna - feature set: v7_plus_top3_poly

cat_opt_fv7_model, cat_opt_fv7_best_params, cat_opt_fv7_best_score, cat_opt_fv7_optuna_results = model_tuner.tune_optuna(
    model_class = CatBoostClassifier, 
    X = train_X_v7, 
    y = train_y_v7, 
    param_space_func = catboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)

In [38]:
model_tuner.save_model_and_metric(filename = 'cat_opt_fv7', 
                                  model = cat_opt_fv7_model, 
                                  params = cat_opt_fv7_best_params, 
                                  score = cat_opt_fv7_best_score,
                                  dataframe_info = cat_opt_fv7_optuna_results)


{'model': 'CatBoostClassifier', 'params': {'learning_rate': 0.05466163029314155, 'depth': 4, 'iterations': 2022, 'l2_leaf_reg': 0.1178622759058269, 'random_strength': 1.3486102044211736, 'bagging_temperature': 3.411004040359085, 'auto_class_weights': 'None', 'early_stopping_rounds': 94}, 'score': 0.874234573489613, 'timestamp': '2026-01-04T17:26:14.473772'}


In [39]:
cat_opt_fv7_model = utils.load_model('cat_opt_fv7')
cat_opt_fv7_model

<catboost.core.CatBoostClassifier at 0x1b7e26fdca0>

In [40]:
model_tuner.calculate_metrics(cat_opt_fv7_model, 
                              test_X_v7, 
                              test_y_v7, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'cat_opt_fv7_test')

{'roc_auc_score': 0.849694070033053, 'accuracy_score': 0.8625, 'precision_score': 0.7291666666666666, 'recall_score': 0.515970515970516, 'f1_score': 0.60431654676259}


## E - Feature sets: v8 e v9 (Numéricas transformadas em categóricas e com one-hot encoding (exceto Catboost)

### Catboost - Optuna - feature set: v8_numerical_to_categorical

In [54]:
new_categorical = list(train_X_v8.columns)
new_categorical

['Geography',
 'Gender',
 'HasCrCard',
 'IsActiveMember',
 'CreditScore_cat',
 'Age_cat',
 'Tenure_cat',
 'Balance_cat',
 'NumOfProducts_cat',
 'EstimatedSalary_cat']

In [62]:
cat_opt_fv8_model, cat_opt_fv8_best_params, cat_opt_fv8_best_score, cat_opt_fv8_optuna_results = model_tuner.tune_optuna(
    model_class = CatBoostClassifier, 
    X = train_X_v8, 
    y = train_y_v8, 
    param_space_func = catboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600, # limite de 6 horas
    categorical_features = new_categorical
)

Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2026-01-04 19:00:57,139] A new study created in memory with name: no-name-877f9a33-8fac-4774-8299-12a112864ba5
[I 2026-01-04 19:05:11,791] Trial 0 finished with value: 0.8512549238666679 and parameters: {'learning_rate': 0.0029813770307052724, 'depth': 8, 'iterations': 1594, 'l2_leaf_reg': 13.849507661133075, 'random_strength': 3.8998790405940174, 'bagging_temperature': 1.3629630264132082, 'auto_class_weights': 'Balanced', 'early_stopping_rounds': 98}. Best is trial 0 with value: 0.8512549238666679.
[I 2026-01-04 19:05:38,924] Trial 1 finished with value: 0.8626226753089157 and parameters: {'learning_rate': 0.14783979348659035, 'depth': 6, 'iterations': 1752, 'l2_leaf_reg': 5.4181589388967515, 'random_strength': 3.563510134914501, 'bagging_temperature': 1.8512537739519748, 'auto_class_weights': 'None', 'early_stopping_rounds': 50}. Best is trial 1 with value: 0.8626226753089157.
[I 2026-01-04 19:05:55,097] Trial 2 pruned. 
[I 2026-01-04 19:06:53,999] Trial 3 finished with value: 0.8

0:	learn: 0.6513040	total: 33.6ms	remaining: 1m 14s
1:	learn: 0.6164313	total: 75.3ms	remaining: 1m 23s
2:	learn: 0.5872899	total: 104ms	remaining: 1m 16s
3:	learn: 0.5612405	total: 142ms	remaining: 1m 18s
4:	learn: 0.5317034	total: 181ms	remaining: 1m 20s
5:	learn: 0.5062410	total: 220ms	remaining: 1m 21s
6:	learn: 0.4851795	total: 261ms	remaining: 1m 22s
7:	learn: 0.4679550	total: 300ms	remaining: 1m 23s
8:	learn: 0.4533892	total: 339ms	remaining: 1m 23s
9:	learn: 0.4387164	total: 379ms	remaining: 1m 23s
10:	learn: 0.4278282	total: 417ms	remaining: 1m 23s
11:	learn: 0.4175045	total: 454ms	remaining: 1m 23s
12:	learn: 0.4102066	total: 487ms	remaining: 1m 22s
13:	learn: 0.4023297	total: 526ms	remaining: 1m 22s
14:	learn: 0.3949901	total: 568ms	remaining: 1m 23s
15:	learn: 0.3898616	total: 610ms	remaining: 1m 24s
16:	learn: 0.3844115	total: 651ms	remaining: 1m 24s
17:	learn: 0.3795492	total: 688ms	remaining: 1m 24s
18:	learn: 0.3751089	total: 730ms	remaining: 1m 24s
19:	learn: 0.3709846

In [63]:
model_tuner.save_model_and_metric(filename = 'cat_opt_fv8', 
                                  model = cat_opt_fv8_model, 
                                  params = cat_opt_fv8_best_params, 
                                  score = cat_opt_fv8_best_score,
                                  dataframe_info = cat_opt_fv8_optuna_results)


{'model': 'CatBoostClassifier', 'params': {'learning_rate': 0.06444404134163935, 'depth': 6, 'iterations': 2222, 'l2_leaf_reg': 0.13386818501263967, 'random_strength': 0.445591075749733, 'bagging_temperature': 3.830315947131502, 'auto_class_weights': 'None', 'early_stopping_rounds': 61}, 'score': 0.8650641426934153, 'timestamp': '2026-01-04T20:48:58.034475'}


In [64]:
cat_opt_fv8_model = utils.load_model('cat_opt_fv8')
cat_opt_fv8_model

<catboost.core.CatBoostClassifier at 0x1b7e2877e00>

In [65]:
model_tuner.calculate_metrics(cat_opt_fv8_model, 
                              test_X_v8, 
                              test_y_v8, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'cat_opt_fv8_test')

{'roc_auc_score': 0.8291928291928292, 'accuracy_score': 0.851, 'precision_score': 0.6847457627118644, 'recall_score': 0.4963144963144963, 'f1_score': 0.5754985754985755}


### Catboost - Optuna - feature set: v8_numerical_to_categorical (300 trials)

In [44]:
new_categorical = list(train_X_v8.columns)
new_categorical

['Geography',
 'Gender',
 'HasCrCard',
 'IsActiveMember',
 'CreditScore_cat',
 'Age_cat',
 'Tenure_cat',
 'Balance_cat',
 'NumOfProducts_cat',
 'EstimatedSalary_cat']

In [46]:
cat_opt_fv8_300trials_model, cat_opt_fv8_300trials_best_params, cat_opt_fv8_300trials_best_score, cat_opt_fv8_300trials_optuna_results = model_tuner.tune_optuna(
    model_class = CatBoostClassifier, 
    X = train_X_v8, 
    y = train_y_v8, 
    param_space_func = catboost_param_space,
    scoring='roc_auc',
    n_trials = 300,
    timeout = 21600, # limite de 6 horas
    categorical_features = new_categorical
)

Trials:   0%|          | 0/300 [00:00<?, ?it/s]

[I 2026-03-01 12:01:17,825] A new study created in memory with name: no-name-06c3f7a8-f303-4ec9-b2b9-f3d571dd1608
[I 2026-03-01 12:04:22,990] Trial 0 finished with value: 0.8512549238666679 and parameters: {'learning_rate': 0.0029813770307052724, 'depth': 8, 'iterations': 1594, 'l2_leaf_reg': 13.849507661133075, 'random_strength': 3.8998790405940174, 'bagging_temperature': 1.3629630264132082, 'auto_class_weights': 'Balanced', 'early_stopping_rounds': 98}. Best is trial 0 with value: 0.8512549238666679.
[I 2026-03-01 12:04:43,682] Trial 1 finished with value: 0.8626226753089157 and parameters: {'learning_rate': 0.14783979348659035, 'depth': 6, 'iterations': 1752, 'l2_leaf_reg': 5.4181589388967515, 'random_strength': 3.563510134914501, 'bagging_temperature': 1.8512537739519748, 'auto_class_weights': 'None', 'early_stopping_rounds': 50}. Best is trial 1 with value: 0.8626226753089157.
[I 2026-03-01 12:05:10,425] Trial 2 finished with value: 0.8581728963411699 and parameters: {'learning_ra

0:	learn: 0.6013197	total: 22.2ms	remaining: 18.4s
1:	learn: 0.5269534	total: 46.7ms	remaining: 19.3s
2:	learn: 0.4735150	total: 69ms	remaining: 19s
3:	learn: 0.4394586	total: 93.6ms	remaining: 19.3s
4:	learn: 0.4163151	total: 117ms	remaining: 19.3s
5:	learn: 0.4069368	total: 130ms	remaining: 17.8s
6:	learn: 0.3921366	total: 154ms	remaining: 18s
7:	learn: 0.3832908	total: 178ms	remaining: 18.2s
8:	learn: 0.3716210	total: 202ms	remaining: 18.3s
9:	learn: 0.3670632	total: 226ms	remaining: 18.5s
10:	learn: 0.3607068	total: 250ms	remaining: 18.6s
11:	learn: 0.3544764	total: 274ms	remaining: 18.6s
12:	learn: 0.3514244	total: 297ms	remaining: 18.6s
13:	learn: 0.3485049	total: 320ms	remaining: 18.6s
14:	learn: 0.3469384	total: 345ms	remaining: 18.7s
15:	learn: 0.3456803	total: 369ms	remaining: 18.7s
16:	learn: 0.3450224	total: 387ms	remaining: 18.5s
17:	learn: 0.3430751	total: 411ms	remaining: 18.5s
18:	learn: 0.3418350	total: 435ms	remaining: 18.5s
19:	learn: 0.3414632	total: 458ms	remaining

In [47]:
model_tuner.save_model_and_metric(filename = 'cat_opt_fv8_300trials', 
                                  model = cat_opt_fv8_300trials_model, 
                                  params = cat_opt_fv8_300trials_best_params, 
                                  score = cat_opt_fv8_300trials_best_score,
                                  dataframe_info = cat_opt_fv8_300trials_optuna_results)


{'model': 'CatBoostClassifier', 'params': {'learning_rate': 0.15149287621823193, 'depth': 5, 'iterations': 828, 'l2_leaf_reg': 0.4215002588090173, 'random_strength': 0.7082379152612603, 'bagging_temperature': 2.7798164803398815, 'auto_class_weights': 'None', 'early_stopping_rounds': 96}, 'score': 0.8661871213799348, 'timestamp': '2026-03-01T13:47:59.478935'}


In [48]:
cat_opt_fv8_300trials_model = utils.load_model('cat_opt_fv8_300trials')
cat_opt_fv8_300trials_model

<catboost.core.CatBoostClassifier at 0x184c8670d10>

In [49]:
model_tuner.calculate_metrics(cat_opt_fv8_300trials_model, 
                              test_X_v8, 
                              test_y_v8, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'cat_opt_fv8_300trials_test')

{'roc_auc_score': 0.8464412023734058, 'accuracy_score': 0.8575, 'precision_score': 0.6930379746835443, 'recall_score': 0.538083538083538, 'f1_score': 0.6058091286307054}


### XGBoost - Optuna - feature set: v9_numerical_to_categorical_plus_one_hot-encoding

In [73]:
xgb_opt_fv9_model, xgb_opt_fv9_best_params, xgb_opt_fv9_best_score, xgb_opt_fv9_optuna_results = model_tuner.tune_optuna(
    model_class = XGBClassifier, 
    X = train_X_v9, 
    y = train_y_v9, 
    param_space_func = xgboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2026-01-04 21:11:00,044] A new study created in memory with name: no-name-62cf018b-c7f2-4b77-bcc2-b8bab9a6159a
[I 2026-01-04 21:11:05,739] Trial 0 finished with value: 0.8513664030973409 and parameters: {'learning_rate': 0.0029813770307052724, 'max_depth': 8, 'n_estimators': 1594, 'reg_lambda': 13.849507661133075, 'reg_alpha': 1.3179630432958698, 'gamma': 1.3629630264132082, 'min_child_weight': 0.06751383304677265, 'subsample': 0.9207488710140077, 'colsample_bytree': 0.983255741473482, 'scale_pos_weight': 6.895799670211798, 'early_stopping_rounds': 68}. Best is trial 0 with value: 0.8513664030973409.
[I 2026-01-04 21:11:09,110] Trial 1 finished with value: 0.8559144186225695 and parameters: {'learning_rate': 0.017419098458624965, 'max_depth': 8, 'n_estimators': 2282, 'reg_lambda': 0.3026934470902619, 'reg_alpha': 0.1757052524413466, 'gamma': 2.5154158265390487, 'min_child_weight': 0.010997788128340768, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'scale_p

In [75]:
model_tuner.save_model_and_metric(filename = 'xgb_opt_fv9', 
                                  model = xgb_opt_fv9_model, 
                                  params = xgb_opt_fv9_best_params, 
                                  score = xgb_opt_fv9_best_score,
                                  dataframe_info = xgb_opt_fv9_optuna_results)

{'model': 'XGBClassifier', 'params': {'learning_rate': 0.13032826559926483, 'max_depth': 4, 'n_estimators': 2730, 'reg_lambda': 0.15244568393342947, 'reg_alpha': 2.2956547450664595, 'gamma': 0.3002081987305092, 'min_child_weight': 0.8837286461712252, 'subsample': 0.746294373223446, 'colsample_bytree': 0.6962593591810898, 'scale_pos_weight': 0.9419687083253394, 'early_stopping_rounds': 76}, 'score': 0.8617214512043609, 'timestamp': '2026-01-04T21:16:05.475504'}


In [76]:
xgb_opt_fv9_model = utils.load_model('xgb_opt_fv9')
xgb_opt_fv9_model

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.6962593591810898
,device,
,early_stopping_rounds,
,enable_categorical,False


In [77]:
model_tuner.calculate_metrics(xgb_opt_fv9_model, 
                              test_X_v9, 
                              test_y_v9, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'xgb_opt_fv9_test')

{'roc_auc_score': 0.8290416764993035, 'accuracy_score': 0.8555, 'precision_score': 0.7062937062937062, 'recall_score': 0.4963144963144963, 'f1_score': 0.5829725829725829}


### XGBoost - Optuna - feature set: v9_numerical_to_categorical_plus_one_hot-encoding (300 trials)

In [50]:
xgb_opt_fv9_300trials_model, xgb_opt_fv9_300trials_best_params, xgb_opt_fv9_300trials_best_score, xgb_opt_fv9_300trials_optuna_results = model_tuner.tune_optuna(
    model_class = XGBClassifier, 
    X = train_X_v9, 
    y = train_y_v9, 
    param_space_func = xgboost_param_space,
    scoring='roc_auc',
    n_trials = 300,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/300 [00:00<?, ?it/s]

[I 2026-03-01 13:52:28,604] A new study created in memory with name: no-name-0dfd2eb7-0169-4e4c-8a22-08ed6b4ae972
[I 2026-03-01 13:52:33,189] Trial 0 finished with value: 0.8513664030973409 and parameters: {'learning_rate': 0.0029813770307052724, 'max_depth': 8, 'n_estimators': 1594, 'reg_lambda': 13.849507661133075, 'reg_alpha': 1.3179630432958698, 'gamma': 1.3629630264132082, 'min_child_weight': 0.06751383304677265, 'subsample': 0.9207488710140077, 'colsample_bytree': 0.983255741473482, 'scale_pos_weight': 6.895799670211798, 'early_stopping_rounds': 68}. Best is trial 0 with value: 0.8513664030973409.
[I 2026-03-01 13:52:35,984] Trial 1 finished with value: 0.8559144186225695 and parameters: {'learning_rate': 0.017419098458624965, 'max_depth': 8, 'n_estimators': 2282, 'reg_lambda': 0.3026934470902619, 'reg_alpha': 0.1757052524413466, 'gamma': 2.5154158265390487, 'min_child_weight': 0.010997788128340768, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'scale_p

In [51]:
model_tuner.save_model_and_metric(filename = 'xgb_opt_fv9_300trials', 
                                  model = xgb_opt_fv9_300trials_model, 
                                  params = xgb_opt_fv9_300trials_best_params, 
                                  score = xgb_opt_fv9_300trials_best_score,
                                  dataframe_info = xgb_opt_fv9_300trials_optuna_results)

{'model': 'XGBClassifier', 'params': {'learning_rate': 0.03575336921014208, 'max_depth': 5, 'n_estimators': 1659, 'reg_lambda': 0.04216025483841688, 'reg_alpha': 0.8629406960683204, 'gamma': 1.7322620675157616, 'min_child_weight': 0.47076313635213407, 'subsample': 0.8139284552607952, 'colsample_bytree': 0.7101615612166823, 'scale_pos_weight': 0.6030611264469572, 'early_stopping_rounds': 79}, 'score': 0.8619193689745839, 'timestamp': '2026-03-01T14:02:32.346676'}


In [52]:
xgb_opt_fv9_300trials_model = utils.load_model('xgb_opt_fv9_300trials')
xgb_opt_fv9_300trials_model

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.7101615612166823
,device,
,early_stopping_rounds,
,enable_categorical,False


In [53]:
model_tuner.calculate_metrics(xgb_opt_fv9_300trials_model, 
                              test_X_v9, 
                              test_y_v9, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'xgb_opt_fv9_300trials_test')

{'roc_auc_score': 0.8566262718805092, 'accuracy_score': 0.856, 'precision_score': 0.8287292817679558, 'recall_score': 0.36855036855036855, 'f1_score': 0.5102040816326531}


### LightGBM - Optuna - feature set: v9_numerical_to_categorical_plus_one_hot-encoding

In [40]:
lgb_opt_fv9_model, lgb_opt_fv9_best_params, lgb_opt_fv9_best_score, lgb_opt_fv9_optuna_results = model_tuner.tune_optuna(
    model_class = LGBMClassifier, 
    X = train_X_v9, 
    y = train_y_v9, 
    param_space_func = lightgbm_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2026-01-13 16:03:21,288] A new study created in memory with name: no-name-0e5459b2-38f5-4896-bc7e-8b6565a647ff
[I 2026-01-13 16:03:33,441] Trial 0 finished with value: 0.840995945334245 and parameters: {'learning_rate': 0.0029813770307052724, 'num_leaves': 165, 'max_depth': 7, 'n_estimators': 2464, 'min_child_samples': 158, 'min_child_weight': 0.012313185468743897, 'subsample': 0.7105857020572387, 'colsample_bytree': 0.9207488710140077, 'reg_lambda': 68.00759466734245, 'reg_alpha': 3.189558257433924, 'scale_pos_weight': 1.4604981309148681, 'early_stopping_rounds': 75}. Best is trial 0 with value: 0.840995945334245.
[I 2026-01-13 16:03:35,874] Trial 1 finished with value: 0.8565221369340563 and parameters: {'learning_rate': 0.04931971559288606, 'num_leaves': 187, 'max_depth': 6, 'n_estimators': 1903, 'min_child_samples': 106, 'min_child_weight': 0.0011352037010541887, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'reg_lambda': 0.288100449320714, 'reg_alpha'

In [41]:
model_tuner.save_model_and_metric(filename = 'lgb_opt_fv9', 
                                  model = lgb_opt_fv9_model, 
                                  params = lgb_opt_fv9_best_params, 
                                  score = lgb_opt_fv9_best_score,
                                  dataframe_info = lgb_opt_fv9_optuna_results)

{'model': 'LGBMClassifier', 'params': {'learning_rate': 0.14942144034539045, 'num_leaves': 165, 'max_depth': 4, 'n_estimators': 991, 'min_child_samples': 22, 'min_child_weight': 4.523243995965258, 'subsample': 0.743178324975948, 'colsample_bytree': 0.6260661709056415, 'reg_lambda': 64.87520261884568, 'reg_alpha': 0.0017025602861625827, 'scale_pos_weight': 0.6372589997151129, 'early_stopping_rounds': 61}, 'score': 0.8614036270477989, 'timestamp': '2026-01-13T16:17:07.076455'}


In [42]:
lgb_opt_fv9_model = utils.load_model('lgb_opt_fv9')
lgb_opt_fv9_model

0,1,2
,boosting_type,'gbdt'
,num_leaves,165
,max_depth,4
,learning_rate,0.14942144034539045
,n_estimators,991
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.0
,min_child_weight,4.523243995965258


In [43]:
model_tuner.calculate_metrics(lgb_opt_fv9_model, 
                              test_X_v9, 
                              test_y_v9, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'lgb_opt_fv9_test')

{'roc_auc_score': 0.8460525240186256, 'accuracy_score': 0.8615, 'precision_score': 0.8095238095238095, 'recall_score': 0.4176904176904177, 'f1_score': 0.5510534846029174}


### LightGBM - Optuna - feature set: v9_numerical_to_categorical_plus_one_hot-encoding (300 trials)

In [54]:
lgb_opt_fv9_300trials_model, lgb_opt_fv9_300trials_best_params, lgb_opt_fv9_300trials_best_score, lgb_opt_fv9_300trials_optuna_results = model_tuner.tune_optuna(
    model_class = LGBMClassifier, 
    X = train_X_v9, 
    y = train_y_v9, 
    param_space_func = lightgbm_param_space,
    scoring='roc_auc',
    n_trials = 300,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/300 [00:00<?, ?it/s]

[I 2026-03-01 14:10:28,281] A new study created in memory with name: no-name-b043ef80-9f79-4ba0-9266-65809bb390c9
[I 2026-03-01 14:10:38,898] Trial 0 finished with value: 0.840995945334245 and parameters: {'learning_rate': 0.0029813770307052724, 'num_leaves': 165, 'max_depth': 7, 'n_estimators': 2464, 'min_child_samples': 158, 'min_child_weight': 0.012313185468743897, 'subsample': 0.7105857020572387, 'colsample_bytree': 0.9207488710140077, 'reg_lambda': 68.00759466734245, 'reg_alpha': 3.189558257433924, 'scale_pos_weight': 1.4604981309148681, 'early_stopping_rounds': 75}. Best is trial 0 with value: 0.840995945334245.
[I 2026-03-01 14:10:40,522] Trial 1 finished with value: 0.8565221369340563 and parameters: {'learning_rate': 0.04931971559288606, 'num_leaves': 187, 'max_depth': 6, 'n_estimators': 1903, 'min_child_samples': 106, 'min_child_weight': 0.0011352037010541887, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'reg_lambda': 0.288100449320714, 'reg_alpha'

In [55]:
model_tuner.save_model_and_metric(filename = 'lgb_opt_fv9_300trials', 
                                  model = lgb_opt_fv9_300trials_model, 
                                  params = lgb_opt_fv9_300trials_best_params, 
                                  score = lgb_opt_fv9_300trials_best_score,
                                  dataframe_info = lgb_opt_fv9_300trials_optuna_results)

{'model': 'LGBMClassifier', 'params': {'learning_rate': 0.2659207130794154, 'num_leaves': 33, 'max_depth': 4, 'n_estimators': 2039, 'min_child_samples': 10, 'min_child_weight': 0.00631336525229831, 'subsample': 0.6885208864731935, 'colsample_bytree': 0.6090778807477083, 'reg_lambda': 75.37246516253727, 'reg_alpha': 0.007568065994357847, 'scale_pos_weight': 1.1244545693471926, 'early_stopping_rounds': 57}, 'score': 0.8614647841203494, 'timestamp': '2026-03-01T14:21:44.241738'}


In [56]:
lgb_opt_fv9_300trials_model = utils.load_model('lgb_opt_fv9_300trials')
lgb_opt_fv9_300trials_model

0,1,2
,boosting_type,'gbdt'
,num_leaves,33
,max_depth,4
,learning_rate,0.2659207130794154
,n_estimators,2039
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.00631336525229831


In [57]:
model_tuner.calculate_metrics(lgb_opt_fv9_300trials_model, 
                              test_X_v9, 
                              test_y_v9, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'lgb_opt_fv9_300trials_test')

{'roc_auc_score': 0.828890523805778, 'accuracy_score': 0.85, 'precision_score': 0.6731391585760518, 'recall_score': 0.5110565110565111, 'f1_score': 0.5810055865921788}


### Random Forest - Optuna - feature set: v9_numerical_to_categorical_plus_one_hot-encoding

In [44]:
rf_opt_fv9_model, rf_opt_fv9_best_params, rf_opt_fv9_best_score, rf_opt_fv9_optuna_results = model_tuner.tune_optuna(
    model_class = RandomForestClassifier, 
    X = train_X_v9, 
    y = train_y_v9, 
    param_space_func = random_forest_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2026-01-13 16:18:47,404] A new study created in memory with name: no-name-db4ac371-9fde-47bc-bf13-19afae7e221e
[I 2026-01-13 16:18:59,586] Trial 0 finished with value: 0.8487749323419788 and parameters: {'n_estimators': 530, 'max_depth': 11, 'min_samples_split': 23, 'min_samples_leaf': 40, 'max_features': 0.8679854848712821, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.8487749323419788.
[I 2026-01-13 16:19:27,345] Trial 1 finished with value: 0.8500784929356359 and parameters: {'n_estimators': 1263, 'max_depth': 15, 'min_samples_split': 44, 'min_samples_leaf': 18, 'max_features': 0.7005970753140752, 'class_weight': 'balanced'}. Best is trial 1 with value: 0.8500784929356359.
[I 2026-01-13 16:19:46,421] Trial 2 finished with value: 0.8507300324565881 and parameters: {'n_estimators': 744, 'max_depth': 10, 'min_samples_split': 26, 'min_samples_leaf': 1, 'max_features': 0.8636959729674244, 'class_weight': None}. Best is trial 2 with value: 0.8507300324565881.
[I 2026-01-13

In [45]:
model_tuner.save_model_and_metric(filename = 'rf_opt_fv9', 
                                  model = rf_opt_fv9_model, 
                                  params = rf_opt_fv9_best_params, 
                                  score = rf_opt_fv9_best_score,
                                  dataframe_info = rf_opt_fv9_optuna_results)

{'model': 'RandomForestClassifier', 'params': {'n_estimators': 1334, 'max_depth': 8, 'min_samples_split': 19, 'min_samples_leaf': 3, 'max_features': 0.43238468412039505, 'class_weight': None}, 'score': 0.8556033361905404, 'timestamp': '2026-01-13T17:02:13.945225'}


In [46]:
rf_opt_fv9_model = utils.load_model('rf_opt_fv9')
rf_opt_fv9_model

0,1,2
,n_estimators,1334
,criterion,'gini'
,max_depth,8
,min_samples_split,19
,min_samples_leaf,3
,min_weight_fraction_leaf,0.0
,max_features,0.43238468412039505
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [47]:
model_tuner.calculate_metrics(rf_opt_fv9_model, 
                              test_X_v9, 
                              test_y_v9, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'rf_opt_fv9_test')

{'roc_auc_score': 0.8503202740490876, 'accuracy_score': 0.8585, 'precision_score': 0.7296296296296296, 'recall_score': 0.48402948402948404, 'f1_score': 0.5819793205317577}


### Random Forest - Optuna - feature set: v9_numerical_to_categorical_plus_one_hot-encoding (300 trials)

In [58]:
rf_opt_fv9_300trials_model, rf_opt_fv9_300trials_best_params, rf_opt_fv9_300trials_best_score, rf_opt_fv9_300trials_optuna_results = model_tuner.tune_optuna(
    model_class = RandomForestClassifier, 
    X = train_X_v9, 
    y = train_y_v9, 
    param_space_func = random_forest_param_space,
    scoring='roc_auc',
    n_trials = 300,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/300 [00:00<?, ?it/s]

[I 2026-03-01 14:27:19,835] A new study created in memory with name: no-name-89863021-b37b-4531-95b4-78cf51103454
[I 2026-03-01 14:27:27,318] Trial 0 finished with value: 0.8487749323419788 and parameters: {'n_estimators': 530, 'max_depth': 11, 'min_samples_split': 23, 'min_samples_leaf': 40, 'max_features': 0.8679854848712821, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.8487749323419788.
[I 2026-03-01 14:27:46,817] Trial 1 finished with value: 0.8500784929356359 and parameters: {'n_estimators': 1263, 'max_depth': 15, 'min_samples_split': 44, 'min_samples_leaf': 18, 'max_features': 0.7005970753140752, 'class_weight': 'balanced'}. Best is trial 1 with value: 0.8500784929356359.
[I 2026-03-01 14:28:00,392] Trial 2 finished with value: 0.8507300324565881 and parameters: {'n_estimators': 744, 'max_depth': 10, 'min_samples_split': 26, 'min_samples_leaf': 1, 'max_features': 0.8636959729674244, 'class_weight': None}. Best is trial 2 with value: 0.8507300324565881.
[I 2026-03-01

In [59]:
model_tuner.save_model_and_metric(filename = 'rf_opt_fv9_300trials', 
                                  model = rf_opt_fv9_300trials_model, 
                                  params = rf_opt_fv9_300trials_best_params, 
                                  score = rf_opt_fv9_300trials_best_score,
                                  dataframe_info = rf_opt_fv9_300trials_optuna_results)

{'model': 'RandomForestClassifier', 'params': {'n_estimators': 1199, 'max_depth': 8, 'min_samples_split': 25, 'min_samples_leaf': 6, 'max_features': 0.4069300960528903, 'class_weight': None}, 'score': 0.8556577515385578, 'timestamp': '2026-03-01T15:21:31.037092'}


In [60]:
rf_opt_fv9_300trials_model = utils.load_model('rf_opt_fv9_300trials')
rf_opt_fv9_300trials_model

0,1,2
,n_estimators,1199
,criterion,'gini'
,max_depth,8
,min_samples_split,25
,min_samples_leaf,6
,min_weight_fraction_leaf,0.0
,max_features,0.4069300960528903
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [61]:
model_tuner.calculate_metrics(rf_opt_fv9_300trials_model, 
                              test_X_v9, 
                              test_y_v9, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'rf_opt_fv9_300trials_test')

{'roc_auc_score': 0.8502446977023248, 'accuracy_score': 0.8555, 'precision_score': 0.7251908396946565, 'recall_score': 0.4668304668304668, 'f1_score': 0.5680119581464873}


### NGBoost - Optuna - feature set: v9_numerical_to_categorical_plus_one_hot-encoding

In [48]:
ngb_opt_fv9_model, ngb_opt_fv9_best_params, ngb_opt_fv9_best_score, ngb_opt_fv9_optuna_results = model_tuner.tune_optuna(
    model_class = NGBClassifier, 
    X = train_X_v9, 
    y = train_y_v9, 
    param_space_func = ngboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2026-01-13 17:14:19,282] A new study created in memory with name: no-name-0f354867-702a-4fd3-81c8-b3053bd6a157
[I 2026-01-13 17:14:56,445] Trial 0 finished with value: 0.835684429505639 and parameters: {'n_estimators': 355, 'learning_rate': 0.07761792832150195, 'minibatch_frac': 0.8742526710404513, 'base_max_depth': 7, 'base_min_samples_leaf': 6, 'base_min_samples_split': 32, 'base_max_features': 0.8717176727685809}. Best is trial 0 with value: 0.835684429505639.
[I 2026-01-13 17:18:02,238] Trial 1 finished with value: 0.8293513016343865 and parameters: {'n_estimators': 1262, 'learning_rate': 0.043294087107980356, 'minibatch_frac': 0.5102535761276225, 'base_max_depth': 10, 'base_min_samples_leaf': 26, 'base_min_samples_split': 36, 'base_max_features': 0.8414847847067064}. Best is trial 0 with value: 0.835684429505639.
[I 2026-01-13 17:19:26,374] Trial 2 finished with value: 0.8385010257052325 and parameters: {'n_estimators': 685, 'learning_rate': 0.004580164333349663, 'minibatch_fra

[iter 0] loss=0.5104 val_loss=0.0000 scale=2.0000 norm=4.0258
[iter 100] loss=0.4068 val_loss=0.0000 scale=2.0000 norm=3.4933
[iter 200] loss=0.3717 val_loss=0.0000 scale=2.0000 norm=3.4579
[iter 300] loss=0.3571 val_loss=0.0000 scale=2.0000 norm=3.5128
[iter 400] loss=0.3450 val_loss=0.0000 scale=1.0000 norm=1.7787
[iter 500] loss=0.3373 val_loss=0.0000 scale=2.0000 norm=3.5716
[iter 600] loss=0.3359 val_loss=0.0000 scale=1.0000 norm=1.8314
[iter 700] loss=0.3353 val_loss=0.0000 scale=1.0000 norm=1.8454
[iter 800] loss=0.3303 val_loss=0.0000 scale=1.0000 norm=1.8259
[iter 900] loss=0.3337 val_loss=0.0000 scale=1.0000 norm=1.8480
[iter 1000] loss=0.3292 val_loss=0.0000 scale=1.0000 norm=1.8731
[iter 1100] loss=0.3259 val_loss=0.0000 scale=1.0000 norm=1.8492
[iter 1200] loss=0.3292 val_loss=0.0000 scale=0.5000 norm=0.9264


In [49]:
model_tuner.save_model_and_metric(filename = 'ngb_opt_fv9', 
                                  model = ngb_opt_fv9_model, 
                                  params = ngb_opt_fv9_best_params, 
                                  score = ngb_opt_fv9_best_score,
                                  dataframe_info = ngb_opt_fv9_optuna_results)


{'model': 'NGBClassifier', 'params': {'n_estimators': 1227, 'learning_rate': 0.003482552318536083, 'minibatch_frac': 0.8032782112904578, 'base_max_depth': 4, 'base_min_samples_leaf': 41, 'base_min_samples_split': 18, 'base_max_features': 0.6713205356049008}, 'score': 0.8572223131819976, 'timestamp': '2026-01-13T19:51:36.636083'}


In [50]:
ngb_opt_fv9_model = utils.load_model('ngb_opt_fv9')
ngb_opt_fv9_model

0,1,2
,Dist,<class 'ngboo....Categorical'>
,Score,<class 'ngboo...res.LogScore'>
,Base,DecisionTreeR...om_state=1234)
,natural_gradient,True
,n_estimators,1227
,learning_rate,0.003482552318536083
,minibatch_frac,0.8032782112904578
,col_sample,1.0
,verbose,True
,random_state,RandomState(M... 0x1D37A970740

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,4
,min_samples_split,18
,min_samples_leaf,41
,min_weight_fraction_leaf,0.0
,max_features,0.6713205356049008
,random_state,1234
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [51]:
model_tuner.calculate_metrics(ngb_opt_fv9_model, 
                              test_X_v9, 
                              test_y_v9, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'ngb_opt_fv9_test')

{'roc_auc_score': 0.8528142934922596, 'accuracy_score': 0.8575, 'precision_score': 0.7479674796747967, 'recall_score': 0.4520884520884521, 'f1_score': 0.5635528330781011}


### NGBoost - Optuna - feature set: v9_numerical_to_categorical_plus_one_hot-encoding (300 trials)

In [62]:
ngb_opt_fv9_300trials_model, ngb_opt_fv9_300trials_best_params, ngb_opt_fv9_300trials_best_score, ngb_opt_fv9_300trials_optuna_results = model_tuner.tune_optuna(
    model_class = NGBClassifier, 
    X = train_X_v9, 
    y = train_y_v9, 
    param_space_func = ngboost_param_space,
    scoring='roc_auc',
    n_trials = 300,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/300 [00:00<?, ?it/s]

[I 2026-03-01 15:22:51,957] A new study created in memory with name: no-name-a1a39109-eb4a-49ff-8f85-20c28dabcb3e
[I 2026-03-01 15:23:59,567] Trial 0 finished with value: 0.8366427174928489 and parameters: {'n_estimators': 850, 'learning_rate': 0.03330849868291622, 'minibatch_frac': 0.7733858528084853, 'base_max_depth': 9, 'base_min_samples_leaf': 50, 'base_min_samples_split': 30, 'base_max_features': 0.7834974255792079}. Best is trial 0 with value: 0.8366427174928489.
[I 2026-03-01 15:25:03,938] Trial 1 finished with value: 0.8493590546175998 and parameters: {'n_estimators': 864, 'learning_rate': 0.003697006358220936, 'minibatch_frac': 0.9147595994001568, 'base_max_depth': 9, 'base_min_samples_leaf': 7, 'base_min_samples_split': 49, 'base_max_features': 0.40530310711146844}. Best is trial 1 with value: 0.8493590546175998.
[I 2026-03-01 15:25:31,502] Trial 2 finished with value: 0.8433945546127843 and parameters: {'n_estimators': 450, 'learning_rate': 0.029536013038098198, 'minibatch_f

[iter 0] loss=0.5102 val_loss=0.0000 scale=2.0000 norm=4.0248
[iter 100] loss=0.3577 val_loss=0.0000 scale=2.0000 norm=3.4669
[iter 200] loss=0.3371 val_loss=0.0000 scale=2.0000 norm=3.6006
[iter 300] loss=0.3336 val_loss=0.0000 scale=1.0000 norm=1.8380
[iter 400] loss=0.3313 val_loss=0.0000 scale=1.0000 norm=1.8828


In [63]:
model_tuner.save_model_and_metric(filename = 'ngb_opt_fv9_300trials', 
                                  model = ngb_opt_fv9_300trials_model, 
                                  params = ngb_opt_fv9_300trials_best_params, 
                                  score = ngb_opt_fv9_300trials_best_score,
                                  dataframe_info = ngb_opt_fv9_300trials_optuna_results)


{'model': 'NGBClassifier', 'params': {'n_estimators': 406, 'learning_rate': 0.009323621216102574, 'minibatch_frac': 0.7175485751591998, 'base_max_depth': 4, 'base_min_samples_leaf': 31, 'base_min_samples_split': 42, 'base_max_features': 0.753087822898729}, 'score': 0.8575796245822538, 'timestamp': '2026-03-01T17:01:56.407197'}


In [64]:
ngb_opt_fv9_300trials_model = utils.load_model('ngb_opt_fv9_300trials')
ngb_opt_fv9_300trials_model

0,1,2
,Dist,<class 'ngboo....Categorical'>
,Score,<class 'ngboo...res.LogScore'>
,Base,DecisionTreeR...om_state=1234)
,natural_gradient,True
,n_estimators,406
,learning_rate,0.009323621216102574
,minibatch_frac,0.7175485751591998
,col_sample,1.0
,verbose,True
,random_state,RandomState(M... 0x184C55F4540

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,4
,min_samples_split,42
,min_samples_leaf,31
,min_weight_fraction_leaf,0.0
,max_features,0.753087822898729
,random_state,1234
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [66]:
model_tuner.calculate_metrics(ngb_opt_fv9_300trials_model, 
                              test_X_v9, 
                              test_y_v9, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'ngb_opt_fv9_300trials_test')

{'roc_auc_score': 0.8541731253595661, 'accuracy_score': 0.857, 'precision_score': 0.7429718875502008, 'recall_score': 0.45454545454545453, 'f1_score': 0.5640243902439024}


## F - Feature sets: v10 (One-hot encoding e produto de todas as variáveis (TODOS os modelos))

### Xgboost - Optuna - feature set: v10_one_hot-encoding_plus_poly_all_features (300 trials)

In [43]:
xgb_opt_fv10_300trials_model, xgb_opt_fv10_300trials_best_params, xgb_opt_fv10_300trials_best_score, xgb_opt_fv10_300trials_optuna_results = model_tuner.tune_optuna(
    model_class = XGBClassifier, 
    X = train_X_v10, 
    y = train_y_v10, 
    param_space_func = xgboost_param_space,
    scoring='roc_auc',
    n_trials = 300,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/300 [00:00<?, ?it/s]

[I 2026-02-24 20:39:28,831] A new study created in memory with name: no-name-94808b53-ad07-4648-8df3-4004c3972a9d
[I 2026-02-24 20:40:37,247] Trial 0 finished with value: 0.8611252901349307 and parameters: {'learning_rate': 0.0029813770307052724, 'max_depth': 8, 'n_estimators': 1594, 'reg_lambda': 13.849507661133075, 'reg_alpha': 1.3179630432958698, 'gamma': 1.3629630264132082, 'min_child_weight': 0.06751383304677265, 'subsample': 0.9207488710140077, 'colsample_bytree': 0.983255741473482, 'scale_pos_weight': 6.895799670211798, 'early_stopping_rounds': 68}. Best is trial 0 with value: 0.8611252901349307.
[I 2026-02-24 20:40:47,248] Trial 1 finished with value: 0.8611895772938718 and parameters: {'learning_rate': 0.017419098458624965, 'max_depth': 8, 'n_estimators': 2282, 'reg_lambda': 0.3026934470902619, 'reg_alpha': 0.1757052524413466, 'gamma': 2.5154158265390487, 'min_child_weight': 0.010997788128340768, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'scale_p

In [44]:
model_tuner.save_model_and_metric(filename = 'xgb_opt_fv10_300trials', 
                                  model = xgb_opt_fv10_300trials_model, 
                                  params = xgb_opt_fv10_300trials_best_params, 
                                  score = xgb_opt_fv10_300trials_best_score,
                                  dataframe_info = xgb_opt_fv10_300trials_optuna_results)

{'model': 'XGBClassifier', 'params': {'learning_rate': 0.06251121903594009, 'max_depth': 4, 'n_estimators': 1084, 'reg_lambda': 0.034605187597076086, 'reg_alpha': 0.2657157952539824, 'gamma': 3.4189927283546684, 'min_child_weight': 0.016840755668344375, 'subsample': 0.7634082525777606, 'colsample_bytree': 0.6402969551152415, 'scale_pos_weight': 1.685394631209658, 'early_stopping_rounds': 88}, 'score': 0.8697802197802199, 'timestamp': '2026-02-24T21:32:57.919080'}


In [45]:
xgb_opt_fv10_300trials_model = utils.load_model('xgb_opt_fv10_300trials')
xgb_opt_fv10_300trials_model

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.6402969551152415
,device,
,early_stopping_rounds,
,enable_categorical,False


In [46]:
model_tuner.calculate_metrics(xgb_opt_fv10_300trials_model, 
                              test_X_v10, 
                              test_y_v10, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'xgb_opt_fv10_300trials_test')

{'roc_auc_score': 0.850724376148105, 'accuracy_score': 0.848, 'precision_score': 0.6450704225352113, 'recall_score': 0.5626535626535627, 'f1_score': 0.6010498687664042}


### LightGBM - Optuna - feature set: v10_one_hot-encoding_plus_poly_all_features (300 trials)

In [47]:
lgb_opt_fv10_300trials_model, lgb_opt_fv10_300trials_best_params, lgb_opt_fv10_300trials_best_score, lgb_opt_fv10_300trials_optuna_results = model_tuner.tune_optuna(
    model_class = LGBMClassifier, 
    X = train_X_v10, 
    y = train_y_v10, 
    param_space_func = lightgbm_param_space,
    scoring='roc_auc',
    n_trials = 300,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/300 [00:00<?, ?it/s]

[I 2026-02-24 22:01:09,647] A new study created in memory with name: no-name-3218ed0c-f5f5-417e-838f-1289f3b5f8a9
[I 2026-02-24 22:01:23,753] Trial 0 finished with value: 0.8441009910335063 and parameters: {'learning_rate': 0.0029813770307052724, 'num_leaves': 165, 'max_depth': 7, 'n_estimators': 2464, 'min_child_samples': 158, 'min_child_weight': 0.012313185468743897, 'subsample': 0.7105857020572387, 'colsample_bytree': 0.9207488710140077, 'reg_lambda': 68.00759466734245, 'reg_alpha': 3.189558257433924, 'scale_pos_weight': 1.4604981309148681, 'early_stopping_rounds': 75}. Best is trial 0 with value: 0.8441009910335063.
[I 2026-02-24 22:01:26,179] Trial 1 finished with value: 0.8663814275120147 and parameters: {'learning_rate': 0.04931971559288606, 'num_leaves': 187, 'max_depth': 6, 'n_estimators': 1903, 'min_child_samples': 106, 'min_child_weight': 0.0011352037010541887, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'reg_lambda': 0.288100449320714, 'reg_alph

In [48]:
model_tuner.save_model_and_metric(filename = 'lgb_opt_fv10_300trials', 
                                  model = lgb_opt_fv10_300trials_model, 
                                  params = lgb_opt_fv10_300trials_best_params, 
                                  score = lgb_opt_fv10_300trials_best_score,
                                  dataframe_info = lgb_opt_fv10_300trials_optuna_results)

{'model': 'LGBMClassifier', 'params': {'learning_rate': 0.03813781993900014, 'num_leaves': 96, 'max_depth': 4, 'n_estimators': 1309, 'min_child_samples': 46, 'min_child_weight': 5.308638922671325, 'subsample': 0.9100158128139129, 'colsample_bytree': 0.6156800164053609, 'reg_lambda': 0.779147747497137, 'reg_alpha': 0.7525985338137784, 'scale_pos_weight': 1.65333843010044, 'early_stopping_rounds': 68}, 'score': 0.8691790024173898, 'timestamp': '2026-02-24T22:18:58.032104'}


In [49]:
lgb_opt_fv10_300trials_model = utils.load_model('lgb_opt_fv10_300trials')
lgb_opt_fv10_300trials_model

0,1,2
,boosting_type,'gbdt'
,num_leaves,96
,max_depth,4
,learning_rate,0.03813781993900014
,n_estimators,1309
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.0
,min_child_weight,5.308638922671325


In [50]:
model_tuner.calculate_metrics(lgb_opt_fv10_300trials_model, 
                              test_X_v10, 
                              test_y_v10, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'lgb_opt_fv10_300trials_test')

{'roc_auc_score': 0.8441708272216746, 'accuracy_score': 0.8465, 'precision_score': 0.6396648044692738, 'recall_score': 0.5626535626535627, 'f1_score': 0.5986928104575163}


### Catboost - Optuna - feature set: v10_one_hot-encoding_plus_poly_all_features (300 trials)

In [49]:
cat_opt_fv10_300trials_model, cat_opt_fv10_300trials_best_params, cat_opt_fv10_300trials_best_score, cat_opt_fv10_300trials_optuna_results = model_tuner.tune_optuna(
    model_class = CatBoostClassifier, 
    X = train_X_v10, 
    y = train_y_v10, 
    param_space_func = catboost_param_space,
    categorical_features = [],
    scoring='roc_auc',
    n_trials = 300,
    timeout = 21600 # limite de 6 horas
)

Trials:   0%|          | 0/300 [00:00<?, ?it/s]

[I 2026-02-25 21:44:34,912] A new study created in memory with name: no-name-38313c91-2a8b-4c93-abff-a33a22699bb5
[I 2026-02-25 21:45:44,646] Trial 0 finished with value: 0.8602926871550887 and parameters: {'learning_rate': 0.0029813770307052724, 'depth': 8, 'iterations': 1594, 'l2_leaf_reg': 13.849507661133075, 'random_strength': 3.8998790405940174, 'bagging_temperature': 1.3629630264132082, 'auto_class_weights': 'Balanced', 'early_stopping_rounds': 98}. Best is trial 0 with value: 0.8602926871550887.
[I 2026-02-25 21:45:49,302] Trial 1 finished with value: 0.866787616415136 and parameters: {'learning_rate': 0.14783979348659035, 'depth': 6, 'iterations': 1752, 'l2_leaf_reg': 5.4181589388967515, 'random_strength': 3.563510134914501, 'bagging_temperature': 1.8512537739519748, 'auto_class_weights': 'None', 'early_stopping_rounds': 50}. Best is trial 1 with value: 0.866787616415136.
[I 2026-02-25 21:46:06,879] Trial 2 pruned. 
[I 2026-02-25 21:46:15,921] Trial 3 finished with value: 0.869

0:	learn: 0.6683376	total: 5.27ms	remaining: 6.04s
1:	learn: 0.6463820	total: 11.2ms	remaining: 6.42s
2:	learn: 0.6286077	total: 17.3ms	remaining: 6.61s
3:	learn: 0.6120401	total: 22.5ms	remaining: 6.44s
4:	learn: 0.5952292	total: 29.5ms	remaining: 6.75s
5:	learn: 0.5820649	total: 35ms	remaining: 6.67s
6:	learn: 0.5665477	total: 40.3ms	remaining: 6.56s
7:	learn: 0.5542094	total: 47.5ms	remaining: 6.77s
8:	learn: 0.5422810	total: 52.8ms	remaining: 6.68s
9:	learn: 0.5313044	total: 58.6ms	remaining: 6.67s
10:	learn: 0.5196340	total: 64.7ms	remaining: 6.69s
11:	learn: 0.5076208	total: 69.8ms	remaining: 6.61s
12:	learn: 0.4993182	total: 75.5ms	remaining: 6.59s
13:	learn: 0.4902640	total: 81ms	remaining: 6.56s
14:	learn: 0.4814484	total: 86.4ms	remaining: 6.53s
15:	learn: 0.4728643	total: 92.9ms	remaining: 6.58s
16:	learn: 0.4665582	total: 98.1ms	remaining: 6.53s
17:	learn: 0.4596163	total: 103ms	remaining: 6.49s
18:	learn: 0.4534114	total: 110ms	remaining: 6.55s
19:	learn: 0.4474849	total: 

In [50]:
model_tuner.save_model_and_metric(filename = 'cat_opt_fv10_300trials', 
                                  model = cat_opt_fv10_300trials_model, 
                                  params = cat_opt_fv10_300trials_best_params, 
                                  score = cat_opt_fv10_300trials_best_score,
                                  dataframe_info = cat_opt_fv10_300trials_optuna_results)

{'model': 'CatBoostClassifier', 'params': {'learning_rate': 0.029910272747063187, 'depth': 4, 'iterations': 1148, 'l2_leaf_reg': 0.09235461279817862, 'random_strength': 0.8269795861993963, 'bagging_temperature': 3.4535388483949254, 'auto_class_weights': 'None', 'early_stopping_rounds': 98}, 'score': 0.8727807687492175, 'timestamp': '2026-02-25T22:39:45.491289'}


In [51]:
cat_opt_fv10_300trials_model = utils.load_model('cat_opt_fv10_300trials')
cat_opt_fv10_300trials_model

<catboost.core.CatBoostClassifier at 0x24574c86f00>

In [52]:
model_tuner.calculate_metrics(cat_opt_fv10_300trials_model, 
                              test_X_v10, 
                              test_y_v10, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'cat_opt_fv10_300trials_test')

{'roc_auc_score': 0.8592968931951983, 'accuracy_score': 0.869, 'precision_score': 0.7617328519855595, 'recall_score': 0.5184275184275184, 'f1_score': 0.6169590643274854}


### Random Forest - Optuna - feature set: v10_one_hot-encoding_plus_poly_all_features (300 trials)

In [43]:
rf_opt_fv10_300trials_model, rf_opt_fv10_300trials_best_params, rf_opt_fv10_300trials_best_score, rf_opt_fv10_300trials_optuna_results = model_tuner.tune_optuna(
    model_class = RandomForestClassifier, 
    X = train_X_v10, 
    y = train_y_v10, 
    param_space_func = random_forest_param_space,
    scoring='roc_auc',
    n_trials = 300,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/300 [00:00<?, ?it/s]

[I 2026-02-27 09:00:09,035] A new study created in memory with name: no-name-777aac0c-e5df-44f4-839f-b5221021d90a
[I 2026-02-27 09:00:58,532] Trial 0 finished with value: 0.856105353892383 and parameters: {'n_estimators': 530, 'max_depth': 11, 'min_samples_split': 23, 'min_samples_leaf': 40, 'max_features': 0.8679854848712821, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.856105353892383.
[I 2026-02-27 09:02:53,355] Trial 1 finished with value: 0.8592005277807205 and parameters: {'n_estimators': 1263, 'max_depth': 15, 'min_samples_split': 44, 'min_samples_leaf': 18, 'max_features': 0.7005970753140752, 'class_weight': 'balanced'}. Best is trial 1 with value: 0.8592005277807205.
[I 2026-02-27 09:04:30,111] Trial 2 finished with value: 0.8569593859252054 and parameters: {'n_estimators': 744, 'max_depth': 10, 'min_samples_split': 26, 'min_samples_leaf': 1, 'max_features': 0.8636959729674244, 'class_weight': None}. Best is trial 1 with value: 0.8592005277807205.
[I 2026-02-27 0

In [44]:
model_tuner.save_model_and_metric(filename = 'rf_opt_fv10_300trials', 
                                  model = rf_opt_fv10_300trials_model, 
                                  params = rf_opt_fv10_300trials_best_params, 
                                  score = rf_opt_fv10_300trials_best_score,
                                  dataframe_info = rf_opt_fv10_300trials_optuna_results)



{'model': 'RandomForestClassifier', 'params': {'n_estimators': 1246, 'max_depth': 7, 'min_samples_split': 30, 'min_samples_leaf': 16, 'max_features': 0.4109963527906349, 'class_weight': None}, 'score': 0.8625266057343183, 'timestamp': '2026-02-27T12:51:50.999856'}


In [45]:
rf_opt_fv10_300trials_model = utils.load_model('rf_opt_fv10_300trials')
rf_opt_fv10_300trials_model

0,1,2
,n_estimators,1246
,criterion,'gini'
,max_depth,7
,min_samples_split,30
,min_samples_leaf,16
,min_weight_fraction_leaf,0.0
,max_features,0.4109963527906349
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [46]:
model_tuner.calculate_metrics(rf_opt_fv10_300trials_model, 
                              test_X_v10, 
                              test_y_v10, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'rf_opt_fv10_300trials_test')

{'roc_auc_score': 0.8477028646520172, 'accuracy_score': 0.866, 'precision_score': 0.7622641509433963, 'recall_score': 0.4963144963144963, 'f1_score': 0.6011904761904762}


### NGBoost - Optuna - feature set: v10_one_hot-encoding_plus_poly_all_features (300 trials)

In [47]:
ngb_opt_fv10_300trials_model, ngb_opt_fv10_300trials_best_params, ngb_opt_fv10_300trials_best_score, ngb_opt_fv10_300trials_optuna_results = model_tuner.tune_optuna(
    model_class = NGBClassifier, 
    X = train_X_v10, 
    y = train_y_v10, 
    param_space_func = ngboost_param_space,
    scoring='roc_auc',
    n_trials = 300,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/300 [00:00<?, ?it/s]

[I 2026-02-27 12:51:52,583] A new study created in memory with name: no-name-061c5674-922c-4b18-9b31-8898730a3315
[I 2026-02-27 12:58:34,783] Trial 0 finished with value: 0.8607944640810548 and parameters: {'n_estimators': 1173, 'learning_rate': 0.0070646182329743646, 'minibatch_frac': 0.7559052786778259, 'base_max_depth': 5, 'base_min_samples_leaf': 21, 'base_min_samples_split': 42, 'base_max_features': 0.7537846170618119}. Best is trial 0 with value: 0.8607944640810548.
[I 2026-02-27 13:08:42,859] Trial 1 finished with value: 0.8553437797960146 and parameters: {'n_estimators': 1007, 'learning_rate': 0.006037750601963626, 'minibatch_frac': 0.8243725719505532, 'base_max_depth': 7, 'base_min_samples_leaf': 1, 'base_min_samples_split': 9, 'base_max_features': 0.9872839764681407}. Best is trial 0 with value: 0.8607944640810548.
[I 2026-02-27 13:11:59,918] Trial 2 finished with value: 0.8567865088461056 and parameters: {'n_estimators': 699, 'learning_rate': 0.03600426320001392, 'minibatch_

[iter 0] loss=0.5064 val_loss=0.0000 scale=2.0000 norm=4.0043
[iter 100] loss=0.4010 val_loss=0.0000 scale=2.0000 norm=3.4478
[iter 200] loss=0.3670 val_loss=0.0000 scale=2.0000 norm=3.4067
[iter 300] loss=0.3451 val_loss=0.0000 scale=2.0000 norm=3.4148
[iter 400] loss=0.3294 val_loss=0.0000 scale=2.0000 norm=3.4031
[iter 500] loss=0.3258 val_loss=0.0000 scale=1.0000 norm=1.7124
[iter 600] loss=0.3184 val_loss=0.0000 scale=1.0000 norm=1.7421
[iter 700] loss=0.3165 val_loss=0.0000 scale=1.0000 norm=1.7537
[iter 800] loss=0.3031 val_loss=0.0000 scale=1.0000 norm=1.6929
[iter 900] loss=0.3156 val_loss=0.0000 scale=1.0000 norm=1.7577


In [48]:
model_tuner.save_model_and_metric(filename = 'ngb_opt_fv10_300trials', 
                                  model = ngb_opt_fv10_300trials_model, 
                                  params = ngb_opt_fv10_300trials_best_params, 
                                  score = ngb_opt_fv10_300trials_best_score,
                                  dataframe_info = ngb_opt_fv10_300trials_optuna_results)

{'model': 'NGBClassifier', 'params': {'n_estimators': 976, 'learning_rate': 0.002858568647168835, 'minibatch_frac': 0.5756362933737438, 'base_max_depth': 4, 'base_min_samples_leaf': 8, 'base_min_samples_split': 47, 'base_max_features': 0.7042884754170645}, 'score': 0.8656740279877878, 'timestamp': '2026-02-27T19:35:06.598790'}


In [49]:
ngb_opt_fv10_300trials_model = utils.load_model('ngb_opt_fv10_300trials')
ngb_opt_fv10_300trials_model

0,1,2
,Dist,<class 'ngboo....Categorical'>
,Score,<class 'ngboo...res.LogScore'>
,Base,DecisionTreeR...om_state=1234)
,natural_gradient,True
,n_estimators,976
,learning_rate,0.002858568647168835
,minibatch_frac,0.5756362933737438
,col_sample,1.0
,verbose,True
,random_state,RandomState(M... 0x27AD1E53440

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,4
,min_samples_split,47
,min_samples_leaf,8
,min_weight_fraction_leaf,0.0
,max_features,0.7042884754170645
,random_state,1234
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [50]:
model_tuner.calculate_metrics(ngb_opt_fv10_300trials_model, 
                              test_X_v10, 
                              test_y_v10, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'ngb_opt_fv10_300trials_test')

{'roc_auc_score': 0.85651907685806, 'accuracy_score': 0.8645, 'precision_score': 0.7615384615384615, 'recall_score': 0.4864864864864865, 'f1_score': 0.5937031484257871}
