AutoGluon - Predicción de ventas (tn) por producto para febrero 2020

In [2]:
# 📦 1. Importar librerías
import pandas as pd

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# 💬 Instalar AutoGluon si es necesario
!pip install autogluon.timeseries

from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

Collecting autogluon.timeseries
  Downloading autogluon.timeseries-1.3.1-py3-none-any.whl.metadata (12 kB)
Collecting lightning<2.7,>=2.2 (from autogluon.timeseries)
  Downloading lightning-2.5.2-py3-none-any.whl.metadata (38 kB)
Collecting pytorch-lightning (from autogluon.timeseries)
  Downloading pytorch_lightning-2.5.2-py3-none-any.whl.metadata (21 kB)
Collecting transformers<4.50,>=4.38.0 (from transformers[sentencepiece]<4.50,>=4.38.0->autogluon.timeseries)
  Downloading transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
Collecting gluonts<0.17,>=0.15.0 (from autogluon.timeseries)
  Downloading gluonts-0.16.2-py3-none-any.whl.metadata (9.8 kB)
Collecting statsforecast<2.0.2,>=1.7.0 (from autogluon.timeseries)
  Downloading statsforecast-2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (29 kB)
Collecting mlforecast<0.14,>0.13 (from aut

In [5]:
# 📄 2. Cargar datasets
from pathlib import Path
# Rutas locales
#ruta_base = Path("D:/Repos/MCD_labo3")
ruta_base = Path("/content/drive/MyDrive/Colab Notebooks")

path_sell = ruta_base / "sell-in.txt"
path_prod = ruta_base / "tb_productos.txt"
path_stocks = ruta_base / "tb_stocks.txt"
path_encuestas = ruta_base / "encuesta_ventas.csv"
df_sellin = pd.read_csv(path_sell, sep="\t")
df_productos = pd.read_csv(path_prod, sep="\t")

In [6]:
# 📄 Leer lista de productos a predecir

path_id_apredecir = ruta_base / "product_id_apredecir201912.txt"
with open(path_id_apredecir, "r") as f:
    product_ids = [int(line.strip()) for line in f if line.strip().isdigit()]

In [7]:
# 🧹 3. Preprocesamiento
# Convertir periodo a datetime
df_sellin['timestamp'] = pd.to_datetime(df_sellin['periodo'], format='%Y%m')

In [8]:
# Filtrar hasta dic 2019 y productos requeridos
df_filtered = df_sellin[
    (df_sellin['timestamp'] <= '2019-12-01') &
    (df_sellin['product_id'].isin(product_ids))
]

In [9]:
# Agregar tn por periodo, cliente y producto
df_grouped = df_filtered.groupby(['timestamp', 'customer_id', 'product_id'], as_index=False)['tn'].sum()

In [10]:
df_raw = pd.read_csv(path_encuestas, sep=";")
# convertir millones_pesos a numérico
df_raw['millones_pesos'] = pd.to_numeric(df_raw['millones_pesos'], errors='coerce')
df_raw.head()

Unnamed: 0,anio,mes,millones_pesos
0,2017,Enero,27.477
1,2017,Febrero,25.223
2,2017,Marzo,26.334
3,2017,Abril,27.046
4,2017,Mayo,25.632


In [11]:
# mapear mes a número
mes_map = {
    "Enero": 1, "Febrero": 2, "Marzo": 3, "Abril": 4, "Mayo": 5, "Junio": 6,
    "Julio": 7, "Agosto": 8, "Septiembre": 9, "Octubre": 10, "Noviembre": 11, "Diciembre": 12
}
df_raw["mes_n"] = df_raw["mes"].map(mes_map)

# crear columna 'periodo' (int: YYYYMM)
df_macro = (
    df_raw
      .assign(periodo=lambda d: d["anio"]*100 + d["mes_n"])
      .loc[:, ["periodo", "millones_pesos"]]
      .rename(columns={"millones_pesos": "ventas_macro"})
)

# Convertir periodo a datetime format YYYY-MM-01
df_macro['periodo'] = pd.to_datetime(df_macro['periodo'], format='%Y%m')

#renombrar columnas
df_macro.rename(columns={'periodo': 'timestamp'}, inplace=True)

df_macro.head()
df_macro.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96 entries, 0 to 95
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   timestamp     96 non-null     datetime64[ns]
 1   ventas_macro  96 non-null     float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 1.6 KB


In [12]:
# Agregar tn total por periodo y producto
df_monthly_product = df_grouped.groupby(['timestamp', 'product_id'], as_index=False)['tn'].sum()
df_monthly_product['item_id'] = df_monthly_product['product_id']

# Unir info de producto (covariables estáticas)
df_monthly_product = df_monthly_product.merge(
    df_productos[['product_id', 'cat1', 'cat2', 'brand', 'sku_size']],
    on='product_id', how='left'
)


df_monthly_product['year'] = df_monthly_product['timestamp'].dt.year
df_monthly_product['month'] = df_monthly_product['timestamp'].dt.month
df_monthly_product['quarter'] = df_monthly_product['timestamp'].dt.quarter
df_monthly_product['is_year_start'] = df_monthly_product['timestamp'].dt.is_year_start.astype(int)
df_monthly_product['is_year_end'] = df_monthly_product['timestamp'].dt.is_year_end.astype(int)

# --- NUEVO: Rolling mean y lags ---
df_monthly_product = df_monthly_product.sort_values(['product_id', 'timestamp'])

# Número de meses desde el inicio de la serie para cada producto
df_monthly_product['months_since_start'] = (
    df_monthly_product.groupby('product_id').cumcount()
)

# Ventas acumuladas hasta el mes anterior
df_monthly_product['tn_cumsum'] = (
    df_monthly_product.groupby('product_id')['tn'].cumsum().shift(1).fillna(0)
)

# Rolling mean de los últimos 3 meses (sin incluir el mes actual)
df_monthly_product['tn_roll3'] = (
    df_monthly_product.groupby('product_id')['tn']
    .transform(lambda x: x.shift(1).rolling(window=3, min_periods=1).mean())
)

# Desviación estándar de los últimos 3 meses (sin incluir el mes actual)
df_monthly_product['tn_std3'] = (
    df_monthly_product.groupby('product_id')['tn']
    .transform(lambda x: x.shift(1).rolling(window=3, min_periods=1).std())
)


# Baseline: promedio de los últimos 3 meses (sin incluir el mes actual)
df_monthly_product['baseline_3m'] = (
    df_monthly_product.groupby('product_id')['tn']
    .transform(lambda x: x.shift(1).rolling(window=3, min_periods=1).mean())
)



# Lag de 1 mes
df_monthly_product['tn_lag1'] = (
    df_monthly_product.groupby('product_id')['tn']
    .shift(1)
)

# Lag de 2 meses
df_monthly_product['tn_lag2'] = (
    df_monthly_product.groupby('product_id')['tn']
    .shift(2)
)



In [13]:
df_monthly_product.head()

Unnamed: 0,timestamp,product_id,tn,item_id,cat1,cat2,brand,sku_size,year,month,quarter,is_year_start,is_year_end,months_since_start,tn_cumsum,tn_roll3,tn_std3,baseline_3m,tn_lag1,tn_lag2
0,2017-01-01,20001,934.77222,20001,HC,ROPA LAVADO,ARIEL,3000,2017,1,1,1,0,0,0.0,,,,,
496,2017-02-01,20001,798.0162,20001,HC,ROPA LAVADO,ARIEL,3000,2017,2,1,0,0,1,934.77222,934.77222,,934.77222,934.77222,
994,2017-03-01,20001,1303.35771,20001,HC,ROPA LAVADO,ARIEL,3000,2017,3,1,0,0,2,1732.78842,866.39421,96.701109,866.39421,798.0162,934.77222
1495,2017-04-01,20001,1069.9613,20001,HC,ROPA LAVADO,ARIEL,3000,2017,4,2,0,0,3,3036.14613,1012.04871,261.383344,1012.04871,1303.35771,798.0162
1995,2017-05-01,20001,1502.20132,20001,HC,ROPA LAVADO,ARIEL,3000,2017,5,2,0,0,4,4106.10743,1057.111737,252.915685,1057.111737,1069.9613,1303.35771


In [14]:
df = df_monthly_product.merge(df_macro, on="timestamp", how="left")

# 👉 Si faltan meses, rellena con forward-fill o con la media móvil:
df["ventas_macro"] = df["ventas_macro"].fillna(method="ffill")

# 🎁 Features extra (opcional)
for lag in [1, 3, 6, 12]:
    df[f"ventas_macro_lag{lag}"] = df["ventas_macro"].shift(lag)

# también ratios de crecimiento
#df["macro_pct_change"] = df["ventas_macro"].pct_change()

df.head()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22349 entries, 0 to 22348
Data columns (total 25 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   timestamp           22349 non-null  datetime64[ns]
 1   product_id          22349 non-null  int64         
 2   tn                  22349 non-null  float64       
 3   item_id             22349 non-null  int64         
 4   cat1                22349 non-null  object        
 5   cat2                22349 non-null  object        
 6   brand               22349 non-null  object        
 7   sku_size            22349 non-null  int64         
 8   year                22349 non-null  int32         
 9   month               22349 non-null  int32         
 10  quarter             22349 non-null  int32         
 11  is_year_start       22349 non-null  int64         
 12  is_year_end         22349 non-null  int64         
 13  months_since_start  22349 non-null  int64     

  df["ventas_macro"] = df["ventas_macro"].fillna(method="ffill")


In [15]:
df_monthly_product.head()

Unnamed: 0,timestamp,product_id,tn,item_id,cat1,cat2,brand,sku_size,year,month,quarter,is_year_start,is_year_end,months_since_start,tn_cumsum,tn_roll3,tn_std3,baseline_3m,tn_lag1,tn_lag2
0,2017-01-01,20001,934.77222,20001,HC,ROPA LAVADO,ARIEL,3000,2017,1,1,1,0,0,0.0,,,,,
496,2017-02-01,20001,798.0162,20001,HC,ROPA LAVADO,ARIEL,3000,2017,2,1,0,0,1,934.77222,934.77222,,934.77222,934.77222,
994,2017-03-01,20001,1303.35771,20001,HC,ROPA LAVADO,ARIEL,3000,2017,3,1,0,0,2,1732.78842,866.39421,96.701109,866.39421,798.0162,934.77222
1495,2017-04-01,20001,1069.9613,20001,HC,ROPA LAVADO,ARIEL,3000,2017,4,2,0,0,3,3036.14613,1012.04871,261.383344,1012.04871,1303.35771,798.0162
1995,2017-05-01,20001,1502.20132,20001,HC,ROPA LAVADO,ARIEL,3000,2017,5,2,0,0,4,4106.10743,1057.111737,252.915685,1057.111737,1069.9613,1303.35771


In [16]:
# Agregar columna 'item_id' para AutoGluon
# (Ya agregada en el bloque anterior)

In [18]:
import numpy as np
# Reemplaza inf y -inf por NaN
df.replace([np.inf, -np.inf], np.nan, inplace=True)

# Muestra cuántos NaN quedan por columna
print("Valores NaN por columna antes de limpiar:")
print(df.isna().sum())

# Opciones para limpiar:
# 1. Rellenar NaN numéricos con 0 (o puedes usar .fillna(method="ffill") si prefieres)
df = df.fillna(0)

# 2. Si prefieres eliminar filas con NaN (menos recomendado para series de tiempo):
# df = df.dropna()

# Verifica que ya no haya NaN ni inf
print("Valores NaN después de limpiar:", df.isna().sum().sum())
# Solo aplicar isinf a columnas numéricas
numeric_df = df.select_dtypes(include=[np.number])
print("Valores inf después de limpiar:", np.isinf(numeric_df.to_numpy()).sum())

Valores NaN por columna antes de limpiar:
timestamp                0
product_id               0
tn                       0
item_id                  0
cat1                     0
cat2                     0
brand                    0
sku_size                 0
year                     0
month                    0
quarter                  0
is_year_start            0
is_year_end              0
months_since_start       0
tn_cumsum                0
tn_roll3               780
tn_std3               1560
baseline_3m            780
tn_lag1                780
tn_lag2               1560
ventas_macro             0
ventas_macro_lag1        1
ventas_macro_lag3        3
ventas_macro_lag6        6
ventas_macro_lag12      12
dtype: int64
Valores NaN después de limpiar: 0
Valores inf después de limpiar: 0


In [19]:
import numpy as np

# 1. Reemplaza inf y -inf por NaN
df.replace([np.inf, -np.inf], np.nan, inplace=True)

# 2. Muestra cuántos NaN quedan por columna
print("Valores NaN por columna antes de limpiar:")
print(df.isna().sum())

# 3. Elimina filas con NaN en columnas críticas (item_id, timestamp, tn)
df = df.dropna(subset=['item_id', 'timestamp', 'tn'])

# 4. Rellena el resto de NaN con 0
df = df.fillna(0)

# 5. Verifica que no haya NaN ni inf
print("Valores NaN después de limpiar:", df.isna().sum().sum())
numeric_df = df.select_dtypes(include=[np.number])
print("Valores inf después de limpiar:", np.isinf(numeric_df.to_numpy()).sum())

# 6. Verifica tipos
print(df.dtypes)

Valores NaN por columna antes de limpiar:
timestamp             0
product_id            0
tn                    0
item_id               0
cat1                  0
cat2                  0
brand                 0
sku_size              0
year                  0
month                 0
quarter               0
is_year_start         0
is_year_end           0
months_since_start    0
tn_cumsum             0
tn_roll3              0
tn_std3               0
baseline_3m           0
tn_lag1               0
tn_lag2               0
ventas_macro          0
ventas_macro_lag1     0
ventas_macro_lag3     0
ventas_macro_lag6     0
ventas_macro_lag12    0
dtype: int64
Valores NaN después de limpiar: 0
Valores inf después de limpiar: 0
timestamp             datetime64[ns]
product_id                     int64
tn                           float64
item_id                        int64
cat1                          object
cat2                          object
brand                         object
sku_size          

In [20]:
# ⏰ 4. Crear TimeSeriesDataFrame
ts_data = TimeSeriesDataFrame.from_data_frame(
    df,
    id_column='item_id',
    timestamp_column='timestamp'
)

In [21]:
# Completar valores faltantes
ts_data = ts_data.fill_missing_values()

In [22]:
#drop columnas cat1, cat2, brand
ts_data = ts_data.drop(columns=['cat1', 'cat2', 'brand'])

In [23]:
ts_data.info()

<class 'autogluon.timeseries.dataset.ts_dataframe.TimeSeriesDataFrame'>
MultiIndex: 22349 entries, (np.int64(20001), Timestamp('2017-01-01 00:00:00')) to (np.int64(21276), Timestamp('2019-12-01 00:00:00'))
Data columns (total 20 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   product_id          22349 non-null  int64  
 1   tn                  22349 non-null  float64
 2   sku_size            22349 non-null  int64  
 3   year                22349 non-null  int32  
 4   month               22349 non-null  int32  
 5   quarter             22349 non-null  int32  
 6   is_year_start       22349 non-null  int64  
 7   is_year_end         22349 non-null  int64  
 8   months_since_start  22349 non-null  int64  
 9   tn_cumsum           22349 non-null  float64
 10  tn_roll3            22349 non-null  float64
 11  tn_std3             22349 non-null  float64
 12  baseline_3m         22349 non-null  float64
 13  tn_lag1             2

In [24]:
#mostrar(ts_data.head())
ts_data.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,product_id,tn,sku_size,year,month,quarter,is_year_start,is_year_end,months_since_start,tn_cumsum,tn_roll3,tn_std3,baseline_3m,tn_lag1,tn_lag2,ventas_macro,ventas_macro_lag1,ventas_macro_lag3,ventas_macro_lag6,ventas_macro_lag12
item_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20001,2017-01-01,20001,934.77222,3000,2017,1,1,1,0,0,0.0,0.0,0.0,0.0,0.0,0.0,27.477,0.0,0.0,0.0,0.0
20001,2017-02-01,20001,798.0162,3000,2017,2,1,0,0,1,934.77222,934.77222,0.0,934.77222,934.77222,0.0,25.223,27.477,0.0,0.0,0.0
20001,2017-03-01,20001,1303.35771,3000,2017,3,1,0,0,2,1732.78842,866.39421,96.701109,866.39421,798.0162,934.77222,26.334,25.223,0.0,0.0,0.0
20001,2017-04-01,20001,1069.9613,3000,2017,4,2,0,0,3,3036.14613,1012.04871,261.383344,1012.04871,1303.35771,798.0162,27.046,26.334,27.477,0.0,0.0
20001,2017-05-01,20001,1502.20132,3000,2017,5,2,0,0,4,4106.10743,1057.111737,252.915685,1057.111737,1069.9613,1303.35771,25.632,27.046,25.223,0.0,0.0


In [25]:
# ⚙️ 5. Definir y entrenar predictor
predictor = TimeSeriesPredictor(
    prediction_length=2,
    target='tn',
    freq='MS',  # Frecuencia mensual (Month Start)
    eval_metric ="MAPE" ,  # Métrica de evaluación


)



predictor.fit(
    ts_data,
    num_val_windows=2,
    presets="best_quality",
    refit_full=True,

)

Beginning AutoGluon training...
AutoGluon will save models to '/content/AutogluonModels/ag-20250718_171747'
AutoGluon Version:  1.3.1
Python Version:     3.11.13
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP PREEMPT_DYNAMIC Sun Mar 30 16:01:29 UTC 2025
CPU Count:          12
GPU Count:          1
Memory Avail:       80.72 GB / 83.48 GB (96.7%)
Disk Space Avail:   69.76 GB / 112.64 GB (61.9%)
Setting presets to: best_quality

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': MAPE,
 'freq': 'MS',
 'hyperparameters': 'default',
 'known_covariates_names': [],
 'num_val_windows': 2,
 'prediction_length': 2,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': True,
 'skip_model_selection': False,
 'target': 'tn',
 'verbosity': 2}

train_data with frequency 'IRREG' has been resampled to frequency 'MS'.
Provided train_data has 22375 rows (NaN fraction=0.1%), 780 tim

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/821M [00:00<?, ?B/s]

	-0.5734       = Validation score (-MAPE)
	18.50   s     = Training runtime
	1.16    s     = Validation (prediction) runtime
Training timeseries model ChronosFineTuned[bolt_small]. 
	Skipping covariate_regressor since the dataset contains no covariates or static features.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/191M [00:00<?, ?B/s]

	Saving fine-tuned model to /content/AutogluonModels/ag-20250718_171747/models/ChronosFineTuned[bolt_small]/W0/fine-tuned-ckpt
	Skipping covariate_regressor since the dataset contains no covariates or static features.
	Saving fine-tuned model to /content/AutogluonModels/ag-20250718_171747/models/ChronosFineTuned[bolt_small]/W1/fine-tuned-ckpt
	-0.4785       = Validation score (-MAPE)
	149.57  s     = Training runtime
	0.08    s     = Validation (prediction) runtime
Training timeseries model TemporalFusionTransformer. 
	-0.5160       = Validation score (-MAPE)
	151.40  s     = Training runtime
	0.43    s     = Validation (prediction) runtime
Training timeseries model DeepAR. 
	-0.4792       = Validation score (-MAPE)
	130.19  s     = Training runtime
	0.44    s     = Validation (prediction) runtime
Training timeseries model PatchTST. 
	-0.5900       = Validation score (-MAPE)
	69.57   s     = Training runtime
	0.87    s     = Validation (prediction) runtime
Training timeseries model TiD

<autogluon.timeseries.predictor.TimeSeriesPredictor at 0x7b2b8cb1c0d0>

In [26]:
predictor.leaderboard(silent=True)

Unnamed: 0,model,score_val,pred_time_val,fit_time_marginal,fit_order
0,WeightedEnsemble,-0.461171,1.394786,1.317159,13
1,ChronosFineTuned[bolt_small],-0.478533,0.079072,149.56721,8
2,DeepAR,-0.479166,0.435588,130.189276,10
3,TemporalFusionTransformer,-0.515975,0.433763,151.397979,9
4,AutoETS,-0.556599,2.977781,3.179147,6
5,SeasonalNaive,-0.558642,0.446362,2.430673,1
6,ChronosZeroShot[bolt_base],-0.573437,1.161583,18.496595,7
7,DynamicOptimizedTheta,-0.586229,0.657383,4.214189,5
8,PatchTST,-0.590002,0.870204,69.569257,11
9,DirectTabular,-0.656259,0.080111,2.014716,3


In [None]:
# 🔮 6. Generar predicción
forecast = predictor.predict(ts_data)

In [27]:
from pprint import pprint
info = predictor.get_model_info('WeightedEnsemble')
pprint(info['info']['weights'])  # pesos por modelo base

AttributeError: 'TimeSeriesPredictor' object has no attribute 'get_model_info'

In [None]:
# Extraer predicción media y filtrar febrero 2020
forecast_mean = forecast['mean'].reset_index()
print(forecast_mean.columns)

In [None]:
# Tomar solo item_id y la predicción 'mean'
resultado = forecast['mean'].reset_index()[['item_id', 'mean']]
resultado.columns = ['product_id', 'tn']

# Filtrar solo febrero 2020
resultado = forecast['mean'].reset_index()
resultado = resultado[resultado['timestamp'] == '2020-02-01']

# Renombrar columnas
resultado = resultado[['item_id', 'mean']]
resultado.columns = ['product_id', 'tn']


# Extraer baseline para febrero 2020
baseline_feb = df_monthly_product[df_monthly_product['timestamp'] == '2020-02-01'][['product_id', 'baseline_3m']]
baseline_feb['baseline_3m'] = baseline_feb['baseline_3m'].fillna(0)  # <-- primero rellenar NaN





In [None]:
# Asegura que ambos sean int o str, pero iguales
resultado['product_id'] = resultado['product_id'].astype(int)
baseline_feb['product_id'] = baseline_feb['product_id'].astype(int)

In [None]:
print("IDs en resultado:", set(resultado['product_id']))
print("IDs en baseline_feb:", set(baseline_feb['product_id']))
print("IDs en común:", set(resultado['product_id']).intersection(set(baseline_feb['product_id'])))

In [None]:
# Ahora sí, merge
resultado2 = resultado.merge(baseline_feb, on='product_id', how='left')

# Blending
resultado2['tn_blend'] = 1 * resultado2['tn'] + 0 * resultado2['baseline_3m']
resultado2['tn'] = resultado2['tn_blend']

In [None]:
resultado2 = resultado.merge(baseline_feb, on='product_id', how='left')
resultado2['baseline_3m'] = resultado2['baseline_3m'].fillna(0)
resultado2['tn_blend'] = 1 * resultado2['tn'] + 0 * resultado2['baseline_3m']
resultado2['tn'] = resultado2['tn_blend']

In [None]:
print(resultado2.isna().sum())
print(resultado2.head())

In [None]:
# dropear las columnas baseline y tn_blend
resultado2 = resultado2.drop(columns=['baseline_3m', 'tn_blend'])

In [None]:
# 💾 7. Guardar archivo
# Crear carpeta de salida si no existe
output_dir = ruta_base / "salidas"
output_dir.mkdir(exist_ok=True)
path_out = output_dir / "predicciones_autogluon_16.csv"
resultado2.to_csv(path_out, index=False)
resultado2.head()
