In [2]:
# 🚗 Montar Google Drive
from google.colab import drive
drive.mount('/content/drive')

# 📦 Instalar librerías necesarias
!pip install lightgbm optuna imbalanced-learn pyarrow --quiet

# 📚 Importar librerías
import pandas as pd
import numpy as np
import optuna
from lightgbm import LGBMClassifier
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from tqdm import tqdm

# 📂 Cargar base completa
ruta = "/content/drive/MyDrive/Datos/6_Base_Modelos_Predictivos.parquet"
df = pd.read_parquet(ruta)

# ➕ Calcular número de años por empresa
df["n_anios"] = df.groupby("NIT")["Año"].transform("count")

# 🧼 Limpieza de columnas categóricas
X = df.drop(columns=["RQ", "NIT"])
y = df["RQ"]
X = pd.get_dummies(X, columns=["DEP", "CIIU_Letra"], drop_first=True)
X = X.replace([np.inf, -np.inf], 1e6)

# ⚙️ Definir función objetivo para Optuna
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 500),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "random_state": 42
    }

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    f1_scores = []

    for train_idx, test_idx in skf.split(X, y):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

        sm = SMOTE(random_state=42)
        X_res, y_res = sm.fit_resample(X_train, y_train)

        model = LGBMClassifier(**params)
        model.fit(X_res, y_res)
        y_pred = model.predict(X_test)
        f1_scores.append(f1_score(y_test, y_pred))

    return np.mean(f1_scores)

# 🚀 Ejecutar búsqueda con Optuna
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30)

# 🏆 Mostrar mejores parámetros
print("\n✅ Mejores parámetros encontrados:")
print(study.best_params)
print(f"🔝 Mejor F1-score: {study.best_value:.4f}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


[I 2025-05-19 02:33:06,046] A new study created in memory with name: no-name-4f58a73d-300d-4bfb-99e2-bdf34f8cdf38


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.024509 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.032047 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0

[I 2025-05-19 02:33:37,631] Trial 0 finished with value: 0.4740733412333613 and parameters: {'n_estimators': 331, 'max_depth': 7, 'learning_rate': 0.05655616343150092, 'subsample': 0.7079493567598145, 'colsample_bytree': 0.9396184882665024}. Best is trial 0 with value: 0.4740733412333613.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.056458 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060747 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-th

[I 2025-05-19 02:34:12,549] Trial 1 finished with value: 0.5161875088994918 and parameters: {'n_estimators': 429, 'max_depth': 9, 'learning_rate': 0.19241269896238575, 'subsample': 0.6518239170936834, 'colsample_bytree': 0.8789471428121596}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.022739 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.022586 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0

[I 2025-05-19 02:34:30,030] Trial 2 finished with value: 0.4304066546322615 and parameters: {'n_estimators': 295, 'max_depth': 3, 'learning_rate': 0.20397765622154093, 'subsample': 0.6430864003507382, 'colsample_bytree': 0.8254273073611935}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023212 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023305 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0

[I 2025-05-19 02:34:57,470] Trial 3 finished with value: 0.48554776606553995 and parameters: {'n_estimators': 269, 'max_depth': 9, 'learning_rate': 0.06510251471057862, 'subsample': 0.7747938329610566, 'colsample_bytree': 0.8960457713071155}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.034619 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.063292 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of n

[I 2025-05-19 02:35:26,422] Trial 4 finished with value: 0.42073317514457276 and parameters: {'n_estimators': 446, 'max_depth': 4, 'learning_rate': 0.044178968232622455, 'subsample': 0.6266668463071343, 'colsample_bytree': 0.6209168271215016}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.024148 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023451 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0

[I 2025-05-19 02:35:47,308] Trial 5 finished with value: 0.4743465412704918 and parameters: {'n_estimators': 301, 'max_depth': 4, 'learning_rate': 0.23493030634863174, 'subsample': 0.6654556591113171, 'colsample_bytree': 0.9875167793075199}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.053798 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.056620 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-th

[I 2025-05-19 02:36:03,982] Trial 6 finished with value: 0.44851073896479904 and parameters: {'n_estimators': 185, 'max_depth': 5, 'learning_rate': 0.11316330959940117, 'subsample': 0.9521306511240797, 'colsample_bytree': 0.6934996865722342}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.024303 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023648 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0

[I 2025-05-19 02:36:24,381] Trial 7 finished with value: 0.4481579311554255 and parameters: {'n_estimators': 347, 'max_depth': 3, 'learning_rate': 0.24643544590042418, 'subsample': 0.797515148331283, 'colsample_bytree': 0.9087107401532216}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.056281 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.056974 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-th

[I 2025-05-19 02:36:47,968] Trial 8 finished with value: 0.5043906474480307 and parameters: {'n_estimators': 263, 'max_depth': 7, 'learning_rate': 0.2523854085608998, 'subsample': 0.7116328228194679, 'colsample_bytree': 0.8027580756889501}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023093 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023364 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0

[I 2025-05-19 02:37:21,786] Trial 9 finished with value: 0.5082802528017055 and parameters: {'n_estimators': 427, 'max_depth': 12, 'learning_rate': 0.29475094300199417, 'subsample': 0.7111083819214805, 'colsample_bytree': 0.8940854208632238}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.055434 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.056757 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-th

[I 2025-05-19 02:38:00,094] Trial 10 finished with value: 0.5140700183336595 and parameters: {'n_estimators': 498, 'max_depth': 10, 'learning_rate': 0.15753017167375027, 'subsample': 0.9119624524690426, 'colsample_bytree': 0.7163691162139765}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.053801 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.083218 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-th

[I 2025-05-19 02:38:39,370] Trial 11 finished with value: 0.5127699748945684 and parameters: {'n_estimators': 500, 'max_depth': 10, 'learning_rate': 0.15608843732518987, 'subsample': 0.9184897232768812, 'colsample_bytree': 0.7228406644942726}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.056432 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.056330 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-th

[I 2025-05-19 02:39:17,808] Trial 12 finished with value: 0.513270190137192 and parameters: {'n_estimators': 499, 'max_depth': 10, 'learning_rate': 0.16295161726780422, 'subsample': 0.8925679205692216, 'colsample_bytree': 0.7270262313622221}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.057313 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.055929 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-th

[I 2025-05-19 02:39:51,014] Trial 13 finished with value: 0.5034460593391266 and parameters: {'n_estimators': 401, 'max_depth': 12, 'learning_rate': 0.11126515987307363, 'subsample': 0.8679590317925033, 'colsample_bytree': 0.6265401665855912}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.024421 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023157 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0

[I 2025-05-19 02:40:05,432] Trial 14 finished with value: 0.49057811438581594 and parameters: {'n_estimators': 112, 'max_depth': 9, 'learning_rate': 0.19472634729578361, 'subsample': 0.9871514551408306, 'colsample_bytree': 0.8405158044433851}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.052266 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.055931 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-th

[I 2025-05-19 02:40:38,698] Trial 15 finished with value: 0.5036238601682399 and parameters: {'n_estimators': 397, 'max_depth': 8, 'learning_rate': 0.12001740641244743, 'subsample': 0.8498683921757187, 'colsample_bytree': 0.7420999434211931}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.024576 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.054656 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of n

[I 2025-05-19 02:41:14,323] Trial 16 finished with value: 0.5073764666256626 and parameters: {'n_estimators': 458, 'max_depth': 11, 'learning_rate': 0.19261158687124924, 'subsample': 0.7653867271597298, 'colsample_bytree': 0.7727300789391286}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.053996 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.055710 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-th

[I 2025-05-19 02:41:43,340] Trial 17 finished with value: 0.49576109822240033 and parameters: {'n_estimators': 367, 'max_depth': 6, 'learning_rate': 0.13839827150114256, 'subsample': 0.837098268149616, 'colsample_bytree': 0.662096528354542}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.038683 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.054983 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of n

[I 2025-05-19 02:42:23,719] Trial 18 finished with value: 0.5045838832515506 and parameters: {'n_estimators': 471, 'max_depth': 9, 'learning_rate': 0.085728930194537, 'subsample': 0.6070025640662349, 'colsample_bytree': 0.7717186054389995}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023456 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.039065 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0

[I 2025-05-19 02:42:56,017] Trial 19 finished with value: 0.5109016713266914 and parameters: {'n_estimators': 408, 'max_depth': 11, 'learning_rate': 0.17560318109316292, 'subsample': 0.9982400441085726, 'colsample_bytree': 0.8617448786403247}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.025522 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023563 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0

[I 2025-05-19 02:43:22,073] Trial 20 finished with value: 0.4485736287547207 and parameters: {'n_estimators': 224, 'max_depth': 8, 'learning_rate': 0.017265175408468947, 'subsample': 0.9385638307141043, 'colsample_bytree': 0.9673312046963556}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.052665 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.088809 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-th

[I 2025-05-19 02:43:59,491] Trial 21 finished with value: 0.5105860732248146 and parameters: {'n_estimators': 498, 'max_depth': 10, 'learning_rate': 0.16269787303630687, 'subsample': 0.8828205514300063, 'colsample_bytree': 0.6791792310886008}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.054331 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.056071 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-th

[I 2025-05-19 02:44:35,625] Trial 22 finished with value: 0.5045884659591887 and parameters: {'n_estimators': 476, 'max_depth': 10, 'learning_rate': 0.219445287680059, 'subsample': 0.9053738926431703, 'colsample_bytree': 0.7343858262470944}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.055330 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.055652 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-th

[I 2025-05-19 02:45:12,113] Trial 23 finished with value: 0.5073941607962089 and parameters: {'n_estimators': 441, 'max_depth': 11, 'learning_rate': 0.14182331530172962, 'subsample': 0.8231767756026024, 'colsample_bytree': 0.7786844584058739}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.054341 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.054414 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-th

[I 2025-05-19 02:45:49,460] Trial 24 finished with value: 0.5068907591020888 and parameters: {'n_estimators': 497, 'max_depth': 9, 'learning_rate': 0.17784834587274814, 'subsample': 0.960412800023645, 'colsample_bytree': 0.7083350873635885}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.054250 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.057905 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-th

[I 2025-05-19 02:46:19,355] Trial 25 finished with value: 0.5097634837136646 and parameters: {'n_estimators': 381, 'max_depth': 10, 'learning_rate': 0.269188338475398, 'subsample': 0.8947070594270695, 'colsample_bytree': 0.6541041238370878}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023405 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023025 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0

[I 2025-05-19 02:46:52,758] Trial 26 finished with value: 0.514127922911694 and parameters: {'n_estimators': 435, 'max_depth': 8, 'learning_rate': 0.21697334359064047, 'subsample': 0.7422386698015556, 'colsample_bytree': 0.8573200297622942}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023921 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023058 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0

[I 2025-05-19 02:47:26,101] Trial 27 finished with value: 0.5074740122446889 and parameters: {'n_estimators': 416, 'max_depth': 8, 'learning_rate': 0.2084268828199313, 'subsample': 0.6883665883677175, 'colsample_bytree': 0.8640376720457186}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.055843 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.058112 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-th

[I 2025-05-19 02:47:56,087] Trial 28 finished with value: 0.5085738436705138 and parameters: {'n_estimators': 358, 'max_depth': 7, 'learning_rate': 0.22784743627633405, 'subsample': 0.7431003020034357, 'colsample_bytree': 0.8208783937553302}. Best is trial 1 with value: 0.5161875088994918.


[LightGBM] [Info] Number of positive: 36448, number of negative: 36448
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023221 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72896, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 36449, number of negative: 36449
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023062 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11630
[LightGBM] [Info] Number of data points in the train set: 72898, number of used features: 93
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0

[I 2025-05-19 02:48:21,938] Trial 29 finished with value: 0.5023087749495527 and parameters: {'n_estimators': 314, 'max_depth': 6, 'learning_rate': 0.27019172487657894, 'subsample': 0.740365709142714, 'colsample_bytree': 0.931066717090171}. Best is trial 1 with value: 0.5161875088994918.



✅ Mejores parámetros encontrados:
{'n_estimators': 429, 'max_depth': 9, 'learning_rate': 0.19241269896238575, 'subsample': 0.6518239170936834, 'colsample_bytree': 0.8789471428121596}
🔝 Mejor F1-score: 0.5162
