# Tuning de Hiperparâmetrosde Modelos

## Configurações

In [42]:
# configurações
import importlib #quando necessário reimportar pacotes

import sys
sys.path.append("../")
sys.path.append("../ml-project-template")

from config import *
import utils
import data_manager as data_mgr
import model_tuner as mod_tuner

import optuna
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from ngboost import NGBClassifier

from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score


import seaborn as sns
import matplotlib.pyplot as plt

## Carregar databases

Faz load dos databases para treinar os modelos

In [82]:
importlib.reload(mod_tuner)

<module 'model_tuner' from 'D:\\mba\\Data Science e Analytics (USP-Esalq)\\99 - TCC\\Projeto\\notebooks\\../ml-project-template\\model_tuner.py'>

In [83]:
# inicia o auxiliar de tunig dos modelos
model_tuner = mod_tuner.ModelTuner()

In [3]:
# inicia o auxiliar de importação e exportação de dados
data_manager = data_mgr.DataManager()

In [4]:
train_v0, test_v0 = data_manager.load_processed_data(feature_set = "v0_basico")

 Carregando features: v0_basico
 FeatureSet info: 11 features


In [5]:
train_X_v0, train_y_v0 = data_manager.split_features_target(train_v0)

 Features: (8000, 10), Target: (8000,)


In [6]:
test_X_v0, test_y_v0 = data_manager.split_features_target(test_v0)

 Features: (2000, 10), Target: (2000,)


In [7]:
train_v1, test_v1 = data_manager.load_processed_data(feature_set = "v1_one-hot_encoding")

 Carregando features: v1_one-hot_encoding
 FeatureSet info: 13 features


In [8]:
train_X_v1, train_y_v1 = data_manager.split_features_target(train_v1)

 Features: (8000, 12), Target: (8000,)


In [9]:
test_X_v1, test_y_v1 = data_manager.split_features_target(test_v1)

 Features: (2000, 12), Target: (2000,)


In [10]:
train_v2, test_v2 = data_manager.load_processed_data(feature_set = "v2_one-hot_encoding_plus_normalizacao")

 Carregando features: v2_one-hot_encoding_plus_normalizacao
 FeatureSet info: 13 features


In [11]:
train_X_v2, train_y_v2 = data_manager.split_features_target(train_v2)

 Features: (8000, 12), Target: (8000,)


In [12]:
test_X_v2, test_y_v2 = data_manager.split_features_target(test_v2)

 Features: (2000, 12), Target: (2000,)


In [13]:
train_v3, test_v3 = data_manager.load_processed_data(feature_set = "v3_one-hot_encoding_plus_normalizacao_plus_poly")

 Carregando features: v3_one-hot_encoding_plus_normalizacao_plus_poly
 FeatureSet info: 34 features


In [14]:
train_X_v3, train_y_v3 = data_manager.split_features_target(train_v3)

 Features: (8000, 33), Target: (8000,)


In [15]:
test_X_v3, test_y_v3 = data_manager.split_features_target(test_v3)

 Features: (2000, 33), Target: (2000,)


In [16]:
train_v4, test_v4 = data_manager.load_processed_data(feature_set = "v4_normalizacao_plus_poly")

 Carregando features: v4_normalizacao_plus_poly
 FeatureSet info: 32 features


In [17]:
train_X_v4, train_y_v4 = data_manager.split_features_target(train_v4)

 Features: (8000, 31), Target: (8000,)


In [18]:
test_X_v4, test_y_v4 = data_manager.split_features_target(test_v4)

 Features: (2000, 31), Target: (2000,)


## Optuna - parâmetros gerais de busca por tipo de modelo

In [27]:
def random_forest_param_space(trial):

    params = {}

    params["n_estimators"] = trial.suggest_int(
        "n_estimators", 300, 1500
    )

    params["max_depth"] = trial.suggest_int(
        "max_depth", 4, 15
    )

    params["min_samples_split"] = trial.suggest_int(
        "min_samples_split", 2, 50
    )

    params["min_samples_leaf"] = trial.suggest_int(
        "min_samples_leaf", 1, 50
    )

    params["max_features"] = trial.suggest_float(
        "max_features", 0.4, 1.0
    )

    params["bootstrap"] = True

    params["class_weight"] = trial.suggest_categorical(
        "class_weight", [None, "balanced"]
    )

    params["n_jobs"] = -1

    return params


In [58]:
def xgboost_param_space(trial):

    params = {}

    params["booster"] = "gbtree"
    params["tree_method"] = "hist"
    params["grow_policy"] = "depthwise"

    params["learning_rate"] = trial.suggest_float(
        "learning_rate", 1e-3, 0.3, log=True
    )

    params["max_depth"] = trial.suggest_int(
        "max_depth", 4, 10
    )

    params["n_estimators"] = trial.suggest_int(
        "n_estimators", 500, 3000
    )

    params["reg_lambda"] = trial.suggest_float(
        "reg_lambda", 1e-2, 100.0, log=True
    )

    params["reg_alpha"] = trial.suggest_float(
        "reg_alpha", 1e-3, 10.0, log=True
    )

    params["gamma"] = trial.suggest_float(
        "gamma", 0.0, 5.0
    )

    params["min_child_weight"] = trial.suggest_float(
        "min_child_weight", 1e-2, 10.0, log=True
    )

    params["subsample"] = trial.suggest_float(
        "subsample", 0.6, 1.0
    )

    params["colsample_bytree"] = trial.suggest_float(
        "colsample_bytree", 0.6, 1.0
    )

    params["scale_pos_weight"] = trial.suggest_float(
        "scale_pos_weight", 0.5, 10.0, log=True
    )

    params["eval_metric"] = "auc"
    
    params["early_stopping_rounds"] = trial.suggest_int(
        "early_stopping_rounds", 50, 100
    )

    return params


In [80]:
def lightgbm_param_space(trial):

    params = {}

    params["boosting_type"] = "gbdt"
    params["objective"] = "binary"

    params["learning_rate"] = trial.suggest_float(
        "learning_rate", 1e-3, 0.3, log=True
    )

    params["num_leaves"] = trial.suggest_int(
        "num_leaves", 16, 256
    ) 

    params["max_depth"] = trial.suggest_int(
        "max_depth", 4, 10
    )

    params["n_estimators"] = trial.suggest_int(
        "n_estimators", 500, 3000
    )

    params["min_child_samples"] = trial.suggest_int(
        "min_child_samples", 10, 200
    )

    params["min_child_weight"] = trial.suggest_float(
        "min_child_weight", 1e-3, 10.0, log=True
    )

    params["subsample"] = trial.suggest_float(
        "subsample", 0.6, 1.0
    )

    params["colsample_bytree"] = trial.suggest_float(
        "colsample_bytree", 0.6, 1.0
    )

    params["reg_lambda"] = trial.suggest_float(
        "reg_lambda", 1e-2, 100.0, log=True
    )

    params["reg_alpha"] = trial.suggest_float(
        "reg_alpha", 1e-3, 10.0, log=True
    )

    params["scale_pos_weight"] = trial.suggest_float(
        "scale_pos_weight", 0.5, 10.0, log=True
    )

    params["early_stopping_rounds"] = trial.suggest_int(
        "early_stopping_rounds", 50, 100
    )

    params["metric"] = "auc"

    params["verbosity"] = -1

    return params


In [73]:
def catboost_param_space(trial):

    params = {}

    params["boosting_type"] = "Plain"
    
    params["grow_policy"] = "SymmetricTree"

    params["learning_rate"] = trial.suggest_float(
        "learning_rate", 1e-3, 0.3, log=True
    )

    params["depth"] = trial.suggest_int("depth", 4, 10)

    params["iterations"] = trial.suggest_int("iterations", 500, 3000)

    params["l2_leaf_reg"] = trial.suggest_float(
        "l2_leaf_reg", 1e-2, 100, log=True
    )

    params["random_strength"] = trial.suggest_float(
        "random_strength", 0, 5
    )

    params["bagging_temperature"] = trial.suggest_float(
        "bagging_temperature", 0.0, 5.0
    )

    if params["bagging_temperature"] == 0.0:
        params["subsample"] = trial.suggest_float("subsample", 0.6, 1.0)


    params["auto_class_weights"] = trial.suggest_categorical(
        "auto_class_weights", ["None", "Balanced"]
    )

    params["loss_function"] = "Logloss"
    
    params["eval_metric"] = "AUC"

    params["early_stopping_rounds"] = trial.suggest_int(
        "early_stopping_rounds", 50, 100
    )

    params["verbose"] = False

    return params



In [39]:
from sklearn.tree import DecisionTreeRegressor

def ngboost_param_space(trial):

    params = {}

    # NGBoost (nível 1)
    params["n_estimators"] = trial.suggest_int(
        "n_estimators", 300, 1500
    )

    params["learning_rate"] = trial.suggest_float(
        "learning_rate", 1e-3, 0.1, log=True
    )

    params["minibatch_frac"] = trial.suggest_float(
        "minibatch_frac", 0.5, 1.0
    )

    params["natural_gradient"] = True
    
    params["verbose"] = 0

    # Base learner (nível 2)
    base_params = {
        "max_depth": trial.suggest_int("base_max_depth", 4, 10),
        "min_samples_leaf": trial.suggest_int("base_min_samples_leaf", 1, 50),
        "min_samples_split": trial.suggest_int("base_min_samples_split", 2, 50),
        "max_features": trial.suggest_float("base_max_features", 0.4, 1.0),
    }

    base = DecisionTreeRegressor(
        **base_params,
        random_state = RANDOM_STATE
    )

    params["Base"] = base

    return params

### Xgboost - Optuna - feature set: v1_one-hot_encoding

In [29]:
xgb_opt_fv1_model, xgb_opt_fv1_best_params, xgb_opt_fv1_best_score, xgb_opt_fv1_optuna_results = model_tuner.tune_optuna(
    model_class = XGBClassifier, 
    X = train_X_v1, 
    y = train_y_v1, 
    param_space_func = xgboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-30 23:42:59,330] A new study created in memory with name: no-name-46692b7c-1619-4fd4-b29c-428ce337080e
[I 2025-12-30 23:43:00,892] Trial 0 finished with value: 0.8581334090974757 and parameters: {'learning_rate': 0.0029813770307052724, 'max_depth': 8, 'n_estimators': 1594, 'reg_lambda': 13.849507661133075, 'reg_alpha': 1.3179630432958698, 'gamma': 1.3629630264132082, 'min_child_weight': 0.06751383304677265, 'subsample': 0.9207488710140077, 'colsample_bytree': 0.983255741473482, 'scale_pos_weight': 6.895799670211798, 'early_stopping_rounds': 68}. Best is trial 0 with value: 0.8581334090974757.
[I 2025-12-30 23:43:02,056] Trial 1 finished with value: 0.8622285251996032 and parameters: {'learning_rate': 0.017419098458624965, 'max_depth': 8, 'n_estimators': 2282, 'reg_lambda': 0.3026934470902619, 'reg_alpha': 0.1757052524413466, 'gamma': 2.5154158265390487, 'min_child_weight': 0.010997788128340768, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'scale_p

In [30]:
model_tuner.save_model_and_metric(filename = 'xgb_opt_fv1', 
                                  model = xgb_opt_fv1_model, 
                                  params = xgb_opt_fv1_best_params, 
                                  score = xgb_opt_fv1_best_score,
                                  dataframe_info = xgb_opt_fv1_optuna_results)

{'model': 'XGBClassifier', 'params': {'learning_rate': 0.06605345010031059, 'max_depth': 8, 'n_estimators': 558, 'reg_lambda': 0.010522134830906853, 'reg_alpha': 0.40056609823866235, 'gamma': 3.4286351277640126, 'min_child_weight': 0.016302844626512235, 'subsample': 0.6722969142430106, 'colsample_bytree': 0.6364698221789004, 'scale_pos_weight': 0.6858426394328141, 'early_stopping_rounds': 78}, 'score': 0.8712925330585278, 'timestamp': '2025-12-30T23:46:18.287967'}


In [31]:
xgb_opt_fv1_model = utils.load_model('xgb_opt_fv1')
xgb_opt_fv1_model

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.6364698221789004
,device,
,early_stopping_rounds,
,enable_categorical,False


In [32]:
model_tuner.calculate_metrics(xgb_opt_fv1_model, 
                              test_X_v1, 
                              test_y_v1, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'xgb_opt_fv1_test')

{'roc_auc_score': 0.8623546504902437, 'accuracy_score': 0.8675, 'precision_score': 0.8317757009345794, 'recall_score': 0.43734643734643736, 'f1_score': 0.573268921095008}


### Catboost - Optuna - feature set: v0_basico

In [22]:
cat_opt_fv0_model, cat_opt_fv0_best_params, cat_opt_fv0_best_score, cat_opt_fv0_optuna_results = model_tuner.tune_optuna(
    model_class = CatBoostClassifier, 
    X = train_X_v0, 
    y = train_y_v0, 
    param_space_func = catboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)

Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-30 22:03:48,962] A new study created in memory with name: no-name-bc0e618f-f119-485c-92b5-ba4c711ab99a
[I 2025-12-30 22:06:29,080] Trial 0 finished with value: 0.8575878109620442 and parameters: {'learning_rate': 0.0029813770307052724, 'depth': 8, 'iterations': 1594, 'l2_leaf_reg': 13.849507661133075, 'random_strength': 3.8998790405940174, 'bagging_temperature': 1.3629630264132082, 'auto_class_weights': 'Balanced', 'early_stopping_rounds': 98}. Best is trial 0 with value: 0.8575878109620442.
[I 2025-12-30 22:06:47,603] Trial 1 finished with value: 0.8730383989367339 and parameters: {'learning_rate': 0.14783979348659035, 'depth': 6, 'iterations': 1752, 'l2_leaf_reg': 5.4181589388967515, 'random_strength': 3.563510134914501, 'bagging_temperature': 1.8512537739519748, 'auto_class_weights': 'None', 'early_stopping_rounds': 50}. Best is trial 1 with value: 0.8730383989367339.
[I 2025-12-30 22:07:16,583] Trial 2 finished with value: 0.8664986853637162 and parameters: {'learning_ra

0:	learn: 0.6732605	total: 19.6ms	remaining: 20.1s
1:	learn: 0.6571867	total: 42.6ms	remaining: 21.9s
2:	learn: 0.6395447	total: 62ms	remaining: 21.2s
3:	learn: 0.6236943	total: 84.6ms	remaining: 21.7s
4:	learn: 0.6078605	total: 108ms	remaining: 22s
5:	learn: 0.5939374	total: 130ms	remaining: 22.2s
6:	learn: 0.5830122	total: 151ms	remaining: 22s
7:	learn: 0.5715154	total: 169ms	remaining: 21.6s
8:	learn: 0.5614321	total: 190ms	remaining: 21.5s
9:	learn: 0.5496549	total: 210ms	remaining: 21.5s
10:	learn: 0.5388896	total: 235ms	remaining: 21.7s
11:	learn: 0.5284157	total: 258ms	remaining: 21.9s
12:	learn: 0.5209232	total: 279ms	remaining: 21.8s
13:	learn: 0.5118405	total: 299ms	remaining: 21.7s
14:	learn: 0.5033104	total: 320ms	remaining: 21.7s
15:	learn: 0.4962098	total: 342ms	remaining: 21.7s
16:	learn: 0.4885384	total: 363ms	remaining: 21.6s
17:	learn: 0.4828346	total: 385ms	remaining: 21.6s
18:	learn: 0.4771741	total: 406ms	remaining: 21.6s
19:	learn: 0.4698504	total: 427ms	remaining

In [23]:
model_tuner.save_model_and_metric(filename = 'cat_opt_fv0', 
                                  model = cat_opt_fv0_model, 
                                  params = cat_opt_fv0_best_params, 
                                  score = cat_opt_fv0_best_score,
                                  dataframe_info = cat_opt_fv0_optuna_results)

{'model': 'CatBoostClassifier', 'params': {'learning_rate': 0.024501031804540142, 'depth': 4, 'iterations': 1030, 'l2_leaf_reg': 0.5144148041361079, 'random_strength': 0.9740652138500033, 'bagging_temperature': 4.316675820927967, 'auto_class_weights': 'None', 'early_stopping_rounds': 64}, 'score': 0.873923491057584, 'timestamp': '2025-12-30T23:40:54.735016'}


In [24]:
cat_opt_fv0_model = utils.load_model('cat_opt_fv0')
cat_opt_fv0_model

<catboost.core.CatBoostClassifier at 0x2b3d48dd640>

In [25]:
model_tuner.calculate_metrics(cat_opt_fv0_model, 
                              test_X_v0, 
                              test_y_v0, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'cat_opt_fv0_test')

{'roc_auc_score': 0.8642432879721015, 'accuracy_score': 0.869, 'precision_score': 0.7655677655677655, 'recall_score': 0.5135135135135135, 'f1_score': 0.6147058823529412}


### LightGBM- Optuna - feature set: v1_one-hot_encoding

In [40]:
lgb_opt_fv1_model, lgb_opt_fv1_best_params, lgb_opt_fv1_best_score, lgb_opt_fv1_optuna_results = model_tuner.tune_optuna(
    model_class = LGBMClassifier, 
    X = train_X_v1, 
    y = train_y_v1, 
    param_space_func = lightgbm_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-30 23:59:55,110] A new study created in memory with name: no-name-6e45a8f0-8ef5-4025-8773-4804b85f8dcc
[I 2025-12-30 23:59:57,447] Trial 0 finished with value: 0.8167062341689861 and parameters: {'learning_rate': 0.0029813770307052724, 'num_leaves': 165, 'max_depth': 7, 'n_estimators': 2464, 'min_child_samples': 158, 'min_child_weight': 0.012313185468743897, 'subsample': 0.7105857020572387, 'colsample_bytree': 0.9207488710140077, 'reg_lambda': 68.00759466734245, 'reg_alpha': 3.189558257433924, 'scale_pos_weight': 1.4604981309148681, 'early_stopping_rounds': 75}. Best is trial 0 with value: 0.8167062341689861.
[I 2025-12-30 23:59:58,980] Trial 1 finished with value: 0.8664664213963075 and parameters: {'learning_rate': 0.04931971559288606, 'num_leaves': 187, 'max_depth': 6, 'n_estimators': 1903, 'min_child_samples': 106, 'min_child_weight': 0.0011352037010541887, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'reg_lambda': 0.288100449320714, 'reg_alph

In [42]:
model_tuner.save_model_and_metric(filename = 'lgb_opt_fv1', 
                                  model = lgb_opt_fv1_model, 
                                  params = lgb_opt_fv1_best_params, 
                                  score = lgb_opt_fv1_best_score,
                                  dataframe_info = lgb_opt_fv1_optuna_results)

{'model': 'LGBMClassifier', 'params': {'learning_rate': 0.018908840487666216, 'num_leaves': 235, 'max_depth': 4, 'n_estimators': 2795, 'min_child_samples': 22, 'min_child_weight': 0.004628418664177039, 'subsample': 0.8467281043051259, 'colsample_bytree': 0.6232825403423599, 'reg_lambda': 0.01914246217477908, 'reg_alpha': 0.01066360987366781, 'scale_pos_weight': 0.9139906907024911, 'early_stopping_rounds': 92}, 'score': 0.8694701004516954, 'timestamp': '2025-12-31T00:06:39.118573'}


In [43]:
lgb_opt_fv1_model = utils.load_model('lgb_opt_fv1')
lgb_opt_fv1_model

0,1,2
,boosting_type,'gbdt'
,num_leaves,235
,max_depth,4
,learning_rate,0.018908840487666216
,n_estimators,2795
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.004628418664177039


In [44]:
model_tuner.calculate_metrics(lgb_opt_fv1_model, 
                              test_X_v1, 
                              test_y_v1, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'lgb_opt_fv1_test')

{'roc_auc_score': 0.8440644033864373, 'accuracy_score': 0.8575, 'precision_score': 0.7293233082706767, 'recall_score': 0.47665847665847666, 'f1_score': 0.5765230312035661}


### Random Forest - Optuna - feature set: v1_one-hot_encoding

In [52]:
rf_opt_fv1_model, rf_opt_fv1_best_params, rf_opt_fv1_best_score, rf_opt_fv1_optuna_results = model_tuner.tune_optuna(
    model_class = RandomForestClassifier, 
    X = train_X_v1, 
    y = train_y_v1, 
    param_space_func = random_forest_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-31 00:20:07,083] A new study created in memory with name: no-name-2d6be182-64c0-4151-8cd1-add1c0901706
[I 2025-12-31 00:20:16,009] Trial 0 finished with value: 0.8556457127447488 and parameters: {'n_estimators': 530, 'max_depth': 11, 'min_samples_split': 23, 'min_samples_leaf': 40, 'max_features': 0.8679854848712821, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.8556457127447488.
[I 2025-12-31 00:20:37,427] Trial 1 finished with value: 0.861520644123624 and parameters: {'n_estimators': 1263, 'max_depth': 15, 'min_samples_split': 44, 'min_samples_leaf': 18, 'max_features': 0.7005970753140752, 'class_weight': 'balanced'}. Best is trial 1 with value: 0.861520644123624.
[I 2025-12-31 00:20:53,530] Trial 2 finished with value: 0.8599050379944332 and parameters: {'n_estimators': 744, 'max_depth': 10, 'min_samples_split': 26, 'min_samples_leaf': 1, 'max_features': 0.8636959729674244, 'class_weight': None}. Best is trial 1 with value: 0.861520644123624.
[I 2025-12-31 00

In [53]:
model_tuner.save_model_and_metric(filename = 'rf_opt_fv1', 
                                  model = rf_opt_fv1_model, 
                                  params = rf_opt_fv1_best_params, 
                                  score = rf_opt_fv1_best_score,
                                  dataframe_info = rf_opt_fv1_optuna_results)

{'model': 'RandomForestClassifier', 'params': {'n_estimators': 1339, 'max_depth': 9, 'min_samples_split': 23, 'min_samples_leaf': 6, 'max_features': 0.48726312627554236, 'class_weight': None}, 'score': 0.8657626335102233, 'timestamp': '2025-12-31T00:56:59.693614'}


In [54]:
rf_opt_fv1_model = utils.load_model('rf_opt_fv1')
rf_opt_fv1_model

0,1,2
,n_estimators,1339
,criterion,'gini'
,max_depth,9
,min_samples_split,23
,min_samples_leaf,6
,min_weight_fraction_leaf,0.0
,max_features,0.48726312627554236
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [55]:
model_tuner.calculate_metrics(rf_opt_fv1_model, 
                              test_X_v1, 
                              test_y_v1, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'rf_opt_fv1_test')

{'roc_auc_score': 0.8531505311166327, 'accuracy_score': 0.8665, 'precision_score': 0.7916666666666666, 'recall_score': 0.4668304668304668, 'f1_score': 0.5873261205564142}


### NGBoost - Optuna - feature set: v1_one-hot_encoding

In [47]:
ngb_opt_fv1_model, ngb_opt_fv1_best_params, ngb_opt_fv1_best_score, ngb_opt_fv1_optuna_results = model_tuner.tune_optuna(
    model_class = NGBClassifier, 
    X = train_X_v1, 
    y = train_y_v1, 
    param_space_func = ngboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-31 15:25:14,306] A new study created in memory with name: no-name-04bf8cbd-6ed4-4789-9b91-8117af2e36f5
[I 2025-12-31 15:26:31,211] Trial 0 finished with value: 0.859309840028508 and parameters: {'n_estimators': 1207, 'learning_rate': 0.009166080569548896, 'minibatch_frac': 0.6335652954919282, 'base_max_depth': 7, 'base_min_samples_leaf': 15, 'base_min_samples_split': 17, 'base_max_features': 0.8001254913892248}. Best is trial 0 with value: 0.859309840028508.
[I 2025-12-31 15:27:14,956] Trial 1 finished with value: 0.8559567951767777 and parameters: {'n_estimators': 541, 'learning_rate': 0.050252461227493404, 'minibatch_frac': 0.705798108585104, 'base_max_depth': 7, 'base_min_samples_leaf': 14, 'base_min_samples_split': 2, 'base_max_features': 0.9314209942959957}. Best is trial 0 with value: 0.859309840028508.
[I 2025-12-31 15:28:37,503] Trial 2 finished with value: 0.8659658483497221 and parameters: {'n_estimators': 1418, 'learning_rate': 0.002504617745700087, 'minibatch_fra

[iter 0] loss=0.5064 val_loss=0.0000 scale=2.0000 norm=4.0043
[iter 100] loss=0.4126 val_loss=0.0000 scale=2.0000 norm=3.4598
[iter 200] loss=0.3795 val_loss=0.0000 scale=2.0000 norm=3.3819
[iter 300] loss=0.3475 val_loss=0.0000 scale=2.0000 norm=3.3059
[iter 400] loss=0.3282 val_loss=0.0000 scale=2.0000 norm=3.2760
[iter 500] loss=0.3185 val_loss=0.0000 scale=2.0000 norm=3.2846
[iter 600] loss=0.3099 val_loss=0.0000 scale=1.0000 norm=1.6660
[iter 700] loss=0.3026 val_loss=0.0000 scale=1.0000 norm=1.6631
[iter 800] loss=0.2892 val_loss=0.0000 scale=2.0000 norm=3.2380
[iter 900] loss=0.2985 val_loss=0.0000 scale=1.0000 norm=1.6594


In [50]:
model_tuner.save_model_and_metric(filename = 'ngb_opt_fv1', 
                                  model = ngb_opt_fv1_model, 
                                  params = ngb_opt_fv1_best_params, 
                                  score = ngb_opt_fv1_best_score,
                                  dataframe_info = ngb_opt_fv1_optuna_results)

{'model': 'NGBClassifier', 'params': {'n_estimators': 919, 'learning_rate': 0.0019391638801433634, 'minibatch_frac': 0.5585780611073167, 'base_max_depth': 6, 'base_min_samples_leaf': 3, 'base_min_samples_split': 42, 'base_max_features': 0.6296122913657486}, 'score': 0.8682329939998652, 'timestamp': '2025-12-31T16:58:59.155036'}


In [51]:
ngb_opt_fv1_model = utils.load_model('ngb_opt_fv1')
ngb_opt_fv1_model

0,1,2
,Dist,<class 'ngboo....Categorical'>
,Score,<class 'ngboo...res.LogScore'>
,Base,DecisionTreeR...om_state=1234)
,natural_gradient,True
,n_estimators,919
,learning_rate,0.0019391638801433634
,minibatch_frac,0.5585780611073167
,col_sample,1.0
,verbose,True
,random_state,RandomState(M... 0x26F8A0FEB40

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,6
,min_samples_split,42
,min_samples_leaf,3
,min_weight_fraction_leaf,0.0
,max_features,0.6296122913657486
,random_state,1234
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [52]:
model_tuner.calculate_metrics(ngb_opt_fv1_model, 
                              test_X_v1, 
                              test_y_v1, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'ngb_opt_fv1_test')

{'roc_auc_score': 0.8565237039813312, 'accuracy_score': 0.867, 'precision_score': 0.8025751072961373, 'recall_score': 0.4594594594594595, 'f1_score': 0.584375}


### XGBoost - Optuna - feature set: v3_one-hot_encoding_plus_normalizacao_plus_poly

In [68]:
xgb_opt_fv3_model, xgb_opt_fv3_best_params, xgb_opt_fv3_best_score, xgb_opt_fv3_optuna_results = model_tuner.tune_optuna(
    model_class = XGBClassifier, 
    X = train_X_v3, 
    y = train_y_v3, 
    param_space_func = xgboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-31 17:54:10,149] A new study created in memory with name: no-name-9c926822-efd3-41be-b402-5f175e5927c7
[I 2025-12-31 17:54:37,255] Trial 0 finished with value: 0.8492579287496028 and parameters: {'learning_rate': 0.0029813770307052724, 'max_depth': 8, 'n_estimators': 1594, 'reg_lambda': 13.849507661133075, 'reg_alpha': 1.3179630432958698, 'gamma': 1.3629630264132082, 'min_child_weight': 0.06751383304677265, 'subsample': 0.9207488710140077, 'colsample_bytree': 0.983255741473482, 'scale_pos_weight': 6.895799670211798, 'early_stopping_rounds': 68}. Best is trial 0 with value: 0.8492579287496028.
[I 2025-12-31 17:54:45,131] Trial 1 finished with value: 0.8544969710394776 and parameters: {'learning_rate': 0.017419098458624965, 'max_depth': 8, 'n_estimators': 2282, 'reg_lambda': 0.3026934470902619, 'reg_alpha': 0.1757052524413466, 'gamma': 2.5154158265390487, 'min_child_weight': 0.010997788128340768, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'scale_p

In [69]:
model_tuner.save_model_and_metric(filename = 'xgb_opt_fv3', 
                                  model = xgb_opt_fv3_model, 
                                  params = xgb_opt_fv3_best_params, 
                                  score = xgb_opt_fv3_best_score,
                                  dataframe_info = xgb_opt_fv3_optuna_results)

{'model': 'XGBClassifier', 'params': {'learning_rate': 0.028914701172905902, 'max_depth': 5, 'n_estimators': 2941, 'reg_lambda': 0.9973732870012123, 'reg_alpha': 0.003272456658044637, 'gamma': 4.991390367590288, 'min_child_weight': 0.6837286170194612, 'subsample': 0.6002134766115784, 'colsample_bytree': 0.6009784065217589, 'scale_pos_weight': 0.6746868968434129, 'early_stopping_rounds': 82}, 'score': 0.8684328379770975, 'timestamp': '2025-12-31T18:02:34.632275'}


In [70]:
xgb_opt_fv3_model = utils.load_model('xgb_opt_fv3')
xgb_opt_fv3_model

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.6009784065217589
,device,
,early_stopping_rounds,
,enable_categorical,False


In [71]:
model_tuner.calculate_metrics(xgb_opt_fv3_model, 
                              test_X_v3, 
                              test_y_v3, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'xgb_opt_fv3_test')

{'roc_auc_score': 0.8584054007782822, 'accuracy_score': 0.8665, 'precision_score': 0.8240740740740741, 'recall_score': 0.43734643734643736, 'f1_score': 0.5714285714285714}


### Catboost - Optuna - feature set: v4_normalizacao_plus_poly

In [74]:
cat_opt_fv4_model, cat_opt_fv4_best_params, cat_opt_fv4_best_score, cat_opt_fv4_optuna_results = model_tuner.tune_optuna(
    model_class = CatBoostClassifier, 
    X = train_X_v4, 
    y = train_y_v4, 
    param_space_func = catboost_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)

Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-31 18:13:53,451] A new study created in memory with name: no-name-95a3e096-acf9-4db0-af01-c81119dbcfc6
[I 2025-12-31 18:18:15,145] Trial 0 finished with value: 0.86193285242365 and parameters: {'learning_rate': 0.0029813770307052724, 'depth': 8, 'iterations': 1594, 'l2_leaf_reg': 13.849507661133075, 'random_strength': 3.8998790405940174, 'bagging_temperature': 1.3629630264132082, 'auto_class_weights': 'Balanced', 'early_stopping_rounds': 98}. Best is trial 0 with value: 0.86193285242365.
[I 2025-12-31 18:18:33,874] Trial 1 finished with value: 0.867371497914881 and parameters: {'learning_rate': 0.14783979348659035, 'depth': 6, 'iterations': 1752, 'l2_leaf_reg': 5.4181589388967515, 'random_strength': 3.563510134914501, 'bagging_temperature': 1.8512537739519748, 'auto_class_weights': 'None', 'early_stopping_rounds': 50}. Best is trial 1 with value: 0.867371497914881.
[I 2025-12-31 18:19:14,252] Trial 2 finished with value: 0.8664006895821093 and parameters: {'learning_rate': 0

0:	learn: 0.6616636	total: 18.5ms	remaining: 27.2s
1:	learn: 0.6343374	total: 38.4ms	remaining: 28.2s
2:	learn: 0.6071810	total: 60.5ms	remaining: 29.6s
3:	learn: 0.5823601	total: 82.7ms	remaining: 30.3s
4:	learn: 0.5620282	total: 101ms	remaining: 29.6s
5:	learn: 0.5454701	total: 122ms	remaining: 29.8s
6:	learn: 0.5280074	total: 144ms	remaining: 30.1s
7:	learn: 0.5117499	total: 165ms	remaining: 30.3s
8:	learn: 0.4989425	total: 188ms	remaining: 30.6s
9:	learn: 0.4857603	total: 210ms	remaining: 30.7s
10:	learn: 0.4757841	total: 234ms	remaining: 31.1s
11:	learn: 0.4649903	total: 255ms	remaining: 31.1s
12:	learn: 0.4566461	total: 278ms	remaining: 31.2s
13:	learn: 0.4479526	total: 300ms	remaining: 31.2s
14:	learn: 0.4389745	total: 322ms	remaining: 31.3s
15:	learn: 0.4308474	total: 343ms	remaining: 31.2s
16:	learn: 0.4239515	total: 365ms	remaining: 31.2s
17:	learn: 0.4189776	total: 386ms	remaining: 31.1s
18:	learn: 0.4138552	total: 409ms	remaining: 31.2s
19:	learn: 0.4076773	total: 434ms	rem

In [75]:
model_tuner.save_model_and_metric(filename = 'cat_opt_fv4', 
                                  model = cat_opt_fv4_model, 
                                  params = cat_opt_fv4_best_params, 
                                  score = cat_opt_fv4_best_score,
                                  dataframe_info = cat_opt_fv4_optuna_results)

{'model': 'CatBoostClassifier', 'params': {'learning_rate': 0.04155689845625358, 'depth': 4, 'iterations': 1471, 'l2_leaf_reg': 0.04510928406554372, 'random_strength': 0.7298172152062612, 'bagging_temperature': 3.6804166566407863, 'auto_class_weights': 'None', 'early_stopping_rounds': 82}, 'score': 0.8728397588388825, 'timestamp': '2025-12-31T20:42:53.171226'}


In [76]:
cat_opt_fv4_model = utils.load_model('cat_opt_fv4')
cat_opt_fv4_model

<catboost.core.CatBoostClassifier at 0x26f9e0a1040>

In [77]:
model_tuner.calculate_metrics(cat_opt_fv4_model, 
                              test_X_v4, 
                              test_y_v4, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'cat_opt_fv4_test')

{'roc_auc_score': 0.8545093629839392, 'accuracy_score': 0.8625, 'precision_score': 0.7391304347826086, 'recall_score': 0.5012285012285013, 'f1_score': 0.5973645680819912}


### LightGBM- Optuna - feature set: v3_one-hot_encoding_plus_normalizacao_plus_poly

In [85]:
lgb_opt_fv3_model, lgb_opt_fv3_best_params, lgb_opt_fv3_best_score, lgb_opt_fv3_optuna_results = model_tuner.tune_optuna(
    model_class = LGBMClassifier, 
    X = train_X_v3, 
    y = train_y_v3, 
    param_space_func = lightgbm_param_space,
    scoring='roc_auc',
    n_trials = 150,
    timeout = 21600 # limite de 6 horas
)


Trials:   0%|          | 0/150 [00:00<?, ?it/s]

[I 2025-12-31 20:51:55,235] A new study created in memory with name: no-name-dd7e11bc-b586-4604-b140-51535884f87f
[I 2025-12-31 20:52:06,181] Trial 0 finished with value: 0.8538776954859338 and parameters: {'learning_rate': 0.0029813770307052724, 'num_leaves': 165, 'max_depth': 7, 'n_estimators': 2464, 'min_child_samples': 158, 'min_child_weight': 0.012313185468743897, 'subsample': 0.7105857020572387, 'colsample_bytree': 0.9207488710140077, 'reg_lambda': 68.00759466734245, 'reg_alpha': 3.189558257433924, 'scale_pos_weight': 1.4604981309148681, 'early_stopping_rounds': 75}. Best is trial 0 with value: 0.8538776954859338.
[I 2025-12-31 20:52:07,746] Trial 1 finished with value: 0.8619080525084032 and parameters: {'learning_rate': 0.04931971559288606, 'num_leaves': 187, 'max_depth': 6, 'n_estimators': 1903, 'min_child_samples': 106, 'min_child_weight': 0.0011352037010541887, 'subsample': 0.9091306486449496, 'colsample_bytree': 0.9530564762544467, 'reg_lambda': 0.288100449320714, 'reg_alph

In [86]:
model_tuner.save_model_and_metric(filename = 'lgb_opt_fv3', 
                                  model = lgb_opt_fv3_model, 
                                  params = lgb_opt_fv3_best_params, 
                                  score = lgb_opt_fv3_best_score,
                                  dataframe_info = lgb_opt_fv3_optuna_results)

{'model': 'LGBMClassifier', 'params': {'learning_rate': 0.07536878912888248, 'num_leaves': 82, 'max_depth': 5, 'n_estimators': 652, 'min_child_samples': 59, 'min_child_weight': 0.004579615133130554, 'subsample': 0.7999212004193511, 'colsample_bytree': 0.751933847977377, 'reg_lambda': 84.97107096585827, 'reg_alpha': 0.0024935323644191703, 'scale_pos_weight': 3.0742724365966536, 'early_stopping_rounds': 73}, 'score': 0.8670281515154434, 'timestamp': '2025-12-31T21:17:14.146386'}


In [87]:
lgb_opt_fv3_model = utils.load_model('lgb_opt_fv3')
lgb_opt_fv3_model

0,1,2
,boosting_type,'gbdt'
,num_leaves,82
,max_depth,5
,learning_rate,0.07536878912888248
,n_estimators,652
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.004579615133130554


In [88]:
model_tuner.calculate_metrics(lgb_opt_fv3_model, 
                              test_X_v3, 
                              test_y_v3, 
                              [roc_auc_score, accuracy_score, precision_score, recall_score, f1_score], 
                              'lgb_opt_fv3_test')

{'roc_auc_score': 0.8443744206456072, 'accuracy_score': 0.8225, 'precision_score': 0.5548523206751055, 'recall_score': 0.6461916461916462, 'f1_score': 0.5970488081725313}


### Random Forest - Optuna - feature set: v3_one-hot_encoding_plus_normalizacao_plus_poly