In [1]:
!pip install optuna==3.6.1

Collecting optuna==3.6.1
  Downloading optuna-3.6.1-py3-none-any.whl.metadata (17 kB)
Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
Installing collected packages: optuna
  Attempting uninstall: optuna
    Found existing installation: optuna 4.6.0
    Uninstalling optuna-4.6.0:
      Successfully uninstalled optuna-4.6.0
Successfully installed optuna-3.6.1


In [2]:
# @title
from token import SEMI
import optuna
import lightgbm as lgb
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
import numpy as np
import logging
import os
import datetime
from pathlib import Path
import polars as pl



In [3]:
# @title Setup Logging
def setup_logging(log_path: Path):
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(name)s %(lineno)d - %(message)s',
        handlers=[
            logging.FileHandler(log_path, mode="w", encoding="utf-8-sig"),
            logging.StreamHandler()
        ]
    )
    return logging.getLogger(__name__)

In [4]:
# @title Build Foto Mes
def build_foto_mes(start_foto_mes: int, end_foto_mes: int, list_of_foto_mes_to_avoid: list[int] = None) -> list[int]:
    if list_of_foto_mes_to_avoid is None:
        list_of_foto_mes_to_avoid = []

    result = []
    current_year = start_foto_mes // 100
    current_month = start_foto_mes % 100

    end_year = end_foto_mes // 100
    end_month = end_foto_mes % 100

    while True:
        current_foto_mes = current_year * 100 + current_month

        if current_foto_mes > end_foto_mes:
            break

        if current_foto_mes not in list_of_foto_mes_to_avoid:
            result.append(current_foto_mes)

        current_month += 1
        if current_month > 12:
            current_month = 1
            current_year += 1

    return result

In [5]:
# @title Drop Columns
def drop_columns(df : pl.DataFrame):

    col_drops = ['mprestamos_personales','cprestamos_personales']

    if "Master_Finiciomora" in df.columns:
      col_drops.append("Master_Finiciomora")

    df = df.drop(col_drops)
    return df

In [32]:
# @title Config
CONFIG = {
    "STUDY_NAME": "competencia-03-exp2-rus",
    "BUCKETS": "/home/tomas_freilij/buckets",
    "BUCKET_ORIGIN": "b1",
    "BUCKET_TARGET": "b2",
    "SEMILLA": [100, 420029, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000, 2100, 2200, 2300, 2400, 2500],
    "MES_TRAIN_FLOOR": 202101,
    "MES_TRAIN_CEILING": 202103,
    "MES_TO_DROP": [202006],
    "MES_VALIDACION": 202104,
    "MES_TEST": 202105,
    "FINAL_TRAIN_FLOOR": 201901,
    "FINAL_TRAIN_CEILING": 202107,
    "FINAL_PREDICT": 202109,
    "GANANCIA_ACIERTO": 780000,
    "COSTO_ESTIMULO": 20000,
    "RUN_BAYESIAN_OPTIMIZATION": False,
    "UNDERSAMPLING_FRACTION": 0.1,
    "IS_EXPERIMENTO": False,
    "N_TRIALS": 30,
    "TEST" : False,
    "SUFIX": "us-0-1-rus",
    "IS_RANDOM_UNDERSAMPLING": False

}

In [7]:
# @title Modelos y Log Directory
bucket_target = os.path.join(CONFIG["BUCKETS"], CONFIG["BUCKET_TARGET"])
modelos_directory = os.path.join(bucket_target, "modelos")
log_directory = os.path.join(bucket_target, "log")

os.makedirs(log_directory, exist_ok=True)
os.makedirs(modelos_directory, exist_ok=True)

timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H_%M_%S")
log_dir = Path(CONFIG["BUCKETS"]) / CONFIG["BUCKET_TARGET"] / "log"
log_dir.mkdir(parents=True, exist_ok=True)
log_path = log_dir / f"log_fe_{timestamp}.log"

logger = setup_logging(log_path)


In [8]:
# @title Undersample DF
def undersample_df(df: pl.DataFrame, fraction, is_random_undersampling: bool = False) -> pl.DataFrame:

  if is_random_undersampling:
    logger.info(f"Performing random undersampling with fraction (proportion to KEEP): {fraction}, DF shape: {df.shape}")
    # 'CONTINUA' is the majority, 'BAJA+1' and 'BAJA+2' are minorities
    df_minority = df.filter(pl.col("clase_ternaria").is_in(["BAJA+1", "BAJA+2"])) # Or ['BAJA+1', 'BAJA+2']
    df_majority = df.filter(pl.col("clase_ternaria") == "CONTINUA")

    # `fraction` represents the proportion of the majority class to KEEP.
    if fraction >= 1.0 or df_majority.shape[0] == 0: # Keep all if fraction is 100% or no majority samples
        df_majority_sampled = df_majority
    else:
        # Sample the majority class to keep `fraction` of its rows
        df_majority_sampled = df_majority.sample(fraction=fraction, seed=1000, shuffle=True)

    df_filtered = pl.concat([df_minority, df_majority_sampled])
    logger.info(f"DF shape after random undersampling: {df_filtered.shape}")

  else:
    logger.info(f"Performing client-based undersampling with fraction (proportion to KEEP): {fraction}, DF shape: {df.shape}")
    clientes_solo_continuas = (
        df.group_by("numero_de_cliente")
        .agg(
            n_bajas=pl.col("clase_ternaria")
            .is_in(["BAJA+1", "BAJA+2"])
            .sum()
        )
        .filter(pl.col("n_bajas") == 0)
    )

    # Sample (1 - fraction) of 'solo_continuas' clients to REMOVE them, effectively keeping 'fraction' of them.
    clientes_solo_continuas_undersampled = clientes_solo_continuas.sample(
        fraction=1 - fraction, seed=1000
    )

    df_filtered = df.filter(
        ~pl.col('numero_de_cliente').is_in(
            clientes_solo_continuas_undersampled["numero_de_cliente"]
        )
    )
    logger.info(f"DF shape after client-based undersampling: {df_filtered.shape}")

  return df_filtered

In [9]:
# @title Clientes Sin bajas
def clientes_sin_bajas(df: pl.DataFrame) -> pl.DataFrame:
  logger.info(f"Clientes sin bajas")

  clientes_sin_bajas = (
      df.group_by("numero_de_cliente")
      .agg(
          n_bajas=pl.col("clase_ternaria")
          .is_in(["BAJA+1", "BAJA+2"])
          .sum()
      )
      .filter(pl.col("n_bajas") == 0)
  )

  logger.info(f"Cantidad de clientes sin bajas: {clientes_sin_bajas.shape}")
  return clientes_sin_bajas

In [10]:
# @title Clase Binaria
def generate_clase_binaria(df : pl.DataFrame, is_prediction = False):
    ternarias_baja = []
    if is_prediction:
      ternarias_baja = ["BAJA+2"]
    else:
      ternarias_baja = ["BAJA+2", "BAJA+1"]

    df = df.with_columns(pl.lit(0).alias('clase_binaria'))


    df = df.with_columns(
        pl.when(pl.col('clase_ternaria').is_in(ternarias_baja)).then(pl.lit(1)).otherwise(pl.lit(0)).alias('clase_binaria')
    )

    return df

In [11]:
# @title Lectura de Datasets

file_comp = Path(CONFIG["BUCKETS"]) / CONFIG["BUCKET_ORIGIN"] / "competencia_03_fe.csv"

df = pl.read_csv(file_comp, infer_schema_length=None)



In [12]:
if CONFIG["TEST"]:
  df_crudo = df_crudo.filter(pl.col("foto_mes").is_in(build_foto_mes(CONFIG["MES_TRAIN_FLOOR"],CONFIG["FINAL_PREDICT"]+2)))
df_ternaria = df

logger.info(f"DF Shape : {df_ternaria.shape}")
logger.info(f"DF Foto_mes : {df_ternaria.group_by("clase_ternaria", "foto_mes").count()}")

2025-12-03 22:47:55,273 - INFO - __main__ 5 - DF Shape : (4729949, 774)
  logger.info(f"DF Foto_mes : {df_ternaria.group_by("clase_ternaria", "foto_mes").count()}")
2025-12-03 22:48:01,755 - INFO - __main__ 6 - DF Foto_mes : shape: (92, 3)
┌────────────────┬──────────┬────────┐
│ clase_ternaria ┆ foto_mes ┆ count  │
│ ---            ┆ ---      ┆ ---    │
│ str            ┆ i64      ┆ u32    │
╞════════════════╪══════════╪════════╡
│ BAJA+2         ┆ 201909   ┆ 570    │
│ BAJA+1         ┆ 202012   ┆ 618    │
│ BAJA+1         ┆ 202008   ┆ 475    │
│ BAJA+1         ┆ 201910   ┆ 581    │
│ BAJA+1         ┆ 202003   ┆ 166    │
│ …              ┆ …        ┆ …      │
│ CONTINUA       ┆ 202004   ┆ 148432 │
│ CONTINUA       ┆ 202008   ┆ 155497 │
│ CONTINUA       ┆ 201906   ┆ 127453 │
│ CONTINUA       ┆ 201905   ┆ 126016 │
│ BAJA+1         ┆ 202009   ┆ 496    │
└────────────────┴──────────┴────────┘


In [13]:
df = drop_columns(df_ternaria)

In [14]:
df_optimizacion = df.filter(pl.col('foto_mes').is_in(build_foto_mes(CONFIG["MES_TRAIN_FLOOR"],CONFIG["MES_TEST"],CONFIG["MES_TO_DROP"])))
df_optimizacion = generate_clase_binaria(df_optimizacion, is_prediction=False)

In [15]:
df_optimizacion = undersample_df(df_optimizacion, CONFIG["UNDERSAMPLING_FRACTION"], CONFIG["IS_RANDOM_UNDERSAMPLING"])

df_optimizacion_val = df_optimizacion.filter(pl.col('foto_mes') == CONFIG["MES_VALIDACION"])
df_optimizacion_train = df_optimizacion.filter(pl.col('foto_mes').is_in(build_foto_mes(CONFIG["MES_TRAIN_FLOOR"],CONFIG["MES_VALIDACION"]-1,CONFIG["MES_TO_DROP"])))

2025-12-03 22:48:08,269 - INFO - __main__ 21 - Performing client-based undersampling with fraction (proportion to KEEP): 0.1, DF shape: (814126, 772)
Please use `implode` to return to previous behavior.

See https://github.com/pola-rs/polars/issues/22149 for more information.
  df_filtered = df.filter(
2025-12-03 22:48:10,937 - INFO - __main__ 42 - DF shape after client-based undersampling: (98506, 772)


In [16]:
target_class = ["clase_ternaria", "clase_binaria", "numero_de_cliente"]

cols_train = df.columns

for c in ['numero_de_cliente', 'clase_binaria','foto_mes',"clase_ternaria"]:
  if c in cols_train:
    cols_train.remove(c)

#df_optimizacion_val_with_target = df_optimizacion_val.select(['numero_de_cliente', 'clase_binaria','foto_mes',"clase_ternaria"])
#df_optimizacion_train_with_target = df_optimizacion_train.select(['numero_de_cliente', 'clase_binaria','foto_mes',"clase_ternaria"])

#df_optimizacion_val = df_optimizacion_val.drop(['numero_de_cliente', 'clase_binaria','foto_mes',"clase_ternaria"])
#df_optimizacion_train = df_optimizacion_train.drop(['numero_de_cliente', 'clase_binaria','foto_mes',"clase_ternaria"])

In [17]:
# @title Build Predictions
def build_predictions(modelos, dataset : pl.DataFrame, cols_train : list) -> pl.DataFrame:
  predicciones = {}


  df_to_predict = dataset.select(cols_train)
  dataset_np = df_to_predict.to_numpy()

  for seed,model in modelos.items():
    predictions = model.predict(dataset_np)
    predicciones[seed] = predictions

  mean_predictions = np.mean(list(predicciones.values()), axis=0)
  return pl.DataFrame({'numero_de_cliente': dataset["numero_de_cliente"], 'Predicted': mean_predictions})

In [18]:
# @title Calcular Cantidad Envíos para la máxima ganancia
def calcular_cantidad_envios(y_pred : pl.DataFrame, y_true : pl.DataFrame) -> float:
    df = y_pred.join(y_true, on="numero_de_cliente")

    df_ordenado = df.sort("Predicted", descending=True)

    # Ganancia individual por fila, cast to Float64 to prevent potential overflow
    df_ordenado = df_ordenado.with_columns([
        pl.when(pl.col("clase_binaria") == 1)
          .then(pl.lit(CONFIG["GANANCIA_ACIERTO"]).cast(pl.Float64))
          .otherwise(pl.lit(-CONFIG["COSTO_ESTIMULO"]).cast(pl.Float64))
          .alias("ganancia_individual")
    ])

    # Ganancia acumulada
    df_ordenado = df_ordenado.with_columns([
        pl.col("ganancia_individual").cum_sum().alias("ganancia_acumulada")
    ])

    # Obtener ganancia maxima
    ganancia_maxima = df_ordenado.select(pl.col("ganancia_acumulada").max()).item()

    # Find the index of the first occurrence of the maximum cumulative gain
    idx_max_ganancia = df_ordenado["ganancia_acumulada"].arg_max()

    # The number of sends is the index + 1 (since index is 0-based)
    cantidad_envios_real = idx_max_ganancia + 1

    return "calcular_cantidad_envios", float(ganancia_maxima), cantidad_envios_real

In [19]:
# @title Función de Evaluación de Ganancia sin n_envios (A REVISION)
def lgb_gan_eval(y_pred, data: lgb.Dataset):
  label = data.get_label()

  ganancia = np.where(label == 1, CONFIG["GANANCIA_ACIERTO"], 0) - np.where(
      label < 1.00002, CONFIG["COSTO_ESTIMULO"], 0
  )

  ganancia = ganancia[np.argsort(y_pred)[::-1]]
  ganancia = np.cumsum(ganancia)

  return "lgb_gan_eval", float(np.max(ganancia)), True


In [27]:
def objective(trial) -> float:

  num_leaves = trial.suggest_int('num_leaves', 30, 200)
  learning_rate = trial.suggest_float('learning_rate', 0.01, 0.1)
  min_data_in_leaf = trial.suggest_int('min_data_in_leaf', 100, 500)
  feature_fraction = trial.suggest_float('feature_fraction', 0.1, 1.0)
  max_bin = trial.suggest_int('max_bin', 30, 200, step=10)
  num_iterations = trial.suggest_int('num_iterations', 500, 2000)

  params = {
      'objective': 'binary',
      'metric': 'custom',
      'boosting_type': 'gbdt',
      'first_metric_only': True,
      'boost_from_average': True,
      'feature_pre_filter': False,
      'max_bin': max_bin,
      'num_leaves': num_leaves,
      'learning_rate': learning_rate,
      'min_data_in_leaf': min_data_in_leaf,
      'feature_fraction': feature_fraction,
      'seed': CONFIG["SEMILLA"][1],
      'verbose': -1,
      'num_iterations': num_iterations
      }

  train_data = lgb.Dataset(df_optimizacion_train.select(cols_train).to_numpy(),
                            label=df_optimizacion_train["clase_binaria"].to_numpy())

  val_data = lgb.Dataset(df_optimizacion_val.select(cols_train).to_numpy(),
                           label=df_optimizacion_val["clase_binaria"].to_numpy())
  modelos = {}

  for seed in CONFIG["SEMILLA"]:
    params["seed"] = seed

    model = lgb.train(
        params,
        train_data,
        valid_sets=[val_data],
        feval = lgb_gan_eval,
        callbacks=[
                lgb.early_stopping(stopping_rounds=int(50 + 5 / params['learning_rate']), verbose=False)
            ]

      )
    modelos[seed] = model

    optimization_predictions = build_predictions(modelos, df_optimizacion_val, cols_train)
    print(f"Val target: {df_optimizacion_val["clase_binaria"].value_counts()}")
    df_test = generate_clase_binaria(df.filter(pl.col("foto_mes") == CONFIG["MES_TEST"]),is_prediction = True)
    _,ganancia_total, n_envios_test = calcular_cantidad_envios(optimization_predictions, df_test)
    print(f"Resultado: {ganancia_total}, {n_envios_test}")
    logger.info(f"Finished Trial {trial.number}: Ganancia = {ganancia_total}")

    trial.set_user_attr("best_iteration", model.best_iteration)

    return ganancia_total


# SE INTENTA RECUPERAR UN ESTUDIO O SE INICIA UNO NUEVO
#storage_name = f"sqlite:////{os.path.join(BUCKETS, BUCKET_TARGET,STUDY_NAME)}.db"
db_file_path = os.path.join(modelos_directory, CONFIG["STUDY_NAME"] + ".db")
storage_name = f"sqlite:///{db_file_path}"
study_name = CONFIG["STUDY_NAME"]

study = optuna.create_study(
    direction="maximize",
    study_name=study_name,
    storage=storage_name,
    load_if_exists=True,
)

# HAY UN FLAG EN EL CONFIG PARA EVITAR CORRER LA OPTIMIZACION SIEMPRE
if CONFIG["RUN_BAYESIAN_OPTIMIZATION"]:
  logger.info(f"Run Optimization with {CONFIG["N_TRIALS"]}")
  study.optimize(lambda trial: objective(trial), n_trials=CONFIG["N_TRIALS"])

[I 2025-12-03 23:07:34,466] Using an existing study with name 'competencia-03-exp2-rus' instead of creating a new one.


In [28]:
# @title Build and Save Model
## SE ARMA EL MODELO Y DE SER POSIBLE SE PERSISTE PARA PODER USARLO PARA OTRA PREDICCION.
def build_and_save_or_load_models(study, semillas : list, train_dataset : pl.DataFrame, undersampling_fraction) -> dict:

  if undersampling_fraction == None:
    raise RuntimeError(f"Undersampling Fraction {undersampling_fraction} no puede ser None")

  modelos = {}

  all_models_exist = True
  for seed in CONFIG["SEMILLA"]:
    model_name = f"lgb_predict_{seed}_{CONFIG["SUFIX"]}.txt"
    model_file_path = os.path.join(modelos_directory, model_name)
    if not os.path.exists(model_file_path):
      all_models_exist = False
      break

  if all_models_exist:
    logger.info("All predict models exist. Loading them.")
    for seed in CONFIG["SEMILLA"]:
        model_name = f"lgb_predict_{seed}_{CONFIG["SUFIX"]}.txt"
        model_file_path = os.path.join(modelos_directory, model_name)
        modelos[seed] = lgb.Booster(model_file=model_file_path)
  else:
    logger.info("Train Predict Models (some models were missing or not all present)")
    train_dataset_np = train_dataset.select(cols_train).to_numpy()
    y_target_np = train_dataset["clase_binaria"].to_numpy()

    train_data = lgb.Dataset(train_dataset_np,
                                label=y_target_np,
                                feature_name=cols_train)

    if len(study.trials) == 0:
      raise RuntimeError("No trials found in study. Run optimization first.")

    best_params = study.best_trial.params.copy()
    best_iter = study.best_trial.user_attrs.get("best_iter", 110)
    best_params["min_data_in_leaf"] = int(best_params["min_data_in_leaf"] * 100 / undersampling_fraction)
    logger.info(f"Best Params: {best_params}")
    for seed in semillas:

      params = {
              'objective': 'binary',
                'metric': 'custom',
                'boosting_type': 'gbdt',
                'first_metric_only': True,
                'boost_from_average': True,
                'feature_pre_filter': False,
                'seed': seed,
                'verbose': -1,
                **best_params
          }

      model = lgb.train(params, train_data, num_boost_round=best_iter)

      modelos[seed] = model
      model.save_model(os.path.join(modelos_directory,f"lgb_predict_{seed}_{CONFIG["SUFIX"]}.txt"))

  return modelos

In [29]:
# @title Build Final Predictions


def build_final_predictions(predict_models, df_predict, n_envios, cols_train):
  mean_predictions = build_predictions(predict_models, df_predict, cols_train)
  sorted_mean_predictions = mean_predictions.sort('Predicted', descending=True)
  final_predictions = sorted_mean_predictions.with_columns(
        (pl.arange(0, sorted_mean_predictions.height) < n_envios)
        .cast(pl.Int8)
        .alias("Predicted")
    )

  return final_predictions.select(["numero_de_cliente", "Predicted"])

In [30]:
# @title Forzar N Envios
def forzar_n_envios(y_pred : pl.DataFrame, y_true : pl.DataFrame, n_envios: int) -> tuple[float, int]:
    df_eval = y_pred.join(y_true, on="numero_de_cliente")

    df_ordenado = df_eval.sort("Predicted", descending=True)

    # Ganancia individual por fila, cast to Float64 to prevent potential overflow
    df_ordenado = df_ordenado.with_columns([
        pl.when(pl.col("clase_binaria") == 1)
          .then(pl.lit(CONFIG["GANANCIA_ACIERTO"]).cast(pl.Float64))
          .otherwise(pl.lit(-CONFIG["COSTO_ESTIMULO"]).cast(pl.Float64))
          .alias("ganancia_individual")
    ])

    # Calculate cumulative gain for all possible sends
    df_ordenado = df_ordenado.with_columns([
        pl.col("ganancia_individual").cum_sum().alias("ganancia_acumulada")
    ])

    # Get the gain corresponding to the specified n_envios.
    # If n_envios is larger than the dataframe, cap it at dataframe height.
    # If n_envios is 0 or negative, return 0 gain and 0 sends.
    if n_envios <= 0:
        return 0.0, 0

    # Ensure n_envios does not exceed the number of rows in the DataFrame
    actual_n_envios = min(n_envios, df_ordenado.height)

    if actual_n_envios == 0: # Case where df_ordenado is empty or n_envios <= 0
        return 0.0, 0

    # Get the cumulative gain at the actual_n_envios position (0-indexed: actual_n_envios - 1)
    ganancia_at_n_envios = df_ordenado["ganancia_acumulada"].item(actual_n_envios - 1)

    return float(ganancia_at_n_envios), actual_n_envios

In [33]:
# Retrieve best n_envios from the study's best trial
df_predict = df.filter(pl.col('foto_mes') == CONFIG["FINAL_PREDICT"])
df_train_predict = df.filter(pl.col('foto_mes').is_in(build_foto_mes(CONFIG["FINAL_TRAIN_FLOOR"],CONFIG["FINAL_TRAIN_CEILING"],CONFIG["MES_TO_DROP"])))
df_train_predict = undersample_df(df_train_predict, 0.5, CONFIG["IS_RANDOM_UNDERSAMPLING"])
df_train_predict = generate_clase_binaria(df_train_predict, is_prediction=True)

predict_models = build_and_save_or_load_models(study, CONFIG["SEMILLA"],df_train_predict, undersampling_fraction=CONFIG["UNDERSAMPLING_FRACTION"])


2025-12-03 23:31:57,891 - INFO - __main__ 21 - Performing client-based undersampling with fraction (proportion to KEEP): 0.5, DF shape: (4400034, 771)
Please use `implode` to return to previous behavior.

See https://github.com/pola-rs/polars/issues/22149 for more information.
  df_filtered = df.filter(
2025-12-03 23:38:38,436 - INFO - __main__ 42 - DF shape after client-based undersampling: (2362446, 771)
2025-12-03 23:39:39,547 - INFO - __main__ 19 - All predict models exist. Loading them.


In [37]:
best_n_envios = 10500
if CONFIG["IS_EXPERIMENTO"]:

  print(f"Ganancia idealizada : { (df_predict.filter(pl.col('clase_ternaria') == 'BAJA+2').shape[0]) * CONFIG['GANANCIA_ACIERTO']}")
  comp_predictions = build_predictions(predict_models, df_predict, cols_train)
  print(comp_predictions["Predicted"].value_counts())

  df_predict = generate_clase_binaria(df_predict,is_prediction = True)

  _, ganancia, n_envios_final = calcular_cantidad_envios(comp_predictions, df_predict)
  logger.info(f"Ganancia en Prediccion de Experimento : {ganancia} con {n_envios_final} envios")
  print(f"Ganancia en Prediccion de Experimento : {ganancia} con {n_envios_final} envios")
else:

  prediction_path = Path(CONFIG["BUCKETS"]) / CONFIG["BUCKET_TARGET"] / "predictions.csv"
  logger.info(f"Build submission {prediction_path}")
  comp_predictions = build_final_predictions(predict_models, df_predict, best_n_envios, cols_train)
  comp_predictions.write_csv(prediction_path)

logger.info(f"Program Ends")

2025-12-04 00:10:53,015 - INFO - __main__ 16 - Build submission /home/tomas_freilij/buckets/b2/predictions.csv
2025-12-04 00:11:30,235 - INFO - __main__ 20 - Program Ends


In [None]:
import matplotlib.pyplot as plt
lgb.plot_importance(predict_models[50], figsize=(30, 40))
plt.show()

In [None]:
print(df.filter(pl.col("foto_mes") == 202108)["clase_ternaria"].value_counts())