In [31]:
# AutoGluon - Predicción de ventas (tn) por producto para febrero 2020

In [32]:
## 1. Importar librerías necesarias
import pandas as pd
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

In [33]:
## 2. Cargar datasets
df_sellin = pd.read_csv("sell-in.txt", sep="\t")
df_productos = pd.read_csv("tb_productos.txt", sep="\t")

In [34]:
# Leer lista de productos a predecir
with open("product_id_apredecir201912.TXT", "r") as f:
    product_ids = [int(line.strip()) for line in f if line.strip().isdigit()]

In [35]:
## 3. Preprocesamiento
# Convertir periodo a datetime
df_sellin['timestamp'] = pd.to_datetime(df_sellin['periodo'], format='%Y%m')

In [36]:
# Filtrar hasta dic 2019 y productos requeridos
df_filtered = df_sellin[
    (df_sellin['timestamp'] <= '2019-12-01') &
    (df_sellin['product_id'].isin(product_ids))
]

In [37]:
# Agregar tn por periodo, cliente y producto
df_grouped = df_filtered.groupby(['timestamp', 'customer_id', 'product_id'], as_index=False)['tn'].sum()

In [38]:
# Agregar tn total por periodo y producto
df_monthly_product = df_grouped.groupby(['timestamp', 'product_id'], as_index=False)['tn'].sum()

In [39]:
# Agregar columna 'item_id' para AutoGluon
df_monthly_product['item_id'] = df_monthly_product['product_id']

In [40]:
## 4. Crear TimeSeriesDataFrame
ts_data = TimeSeriesDataFrame.from_data_frame(
    df_monthly_product,
    id_column='item_id',
    timestamp_column='timestamp'
)

In [57]:
print(ts_data.loc[20001])

            product_id          tn
timestamp                         
2017-01-01       20001   934.77222
2017-02-01       20001   798.01620
2017-03-01       20001  1303.35771
2017-04-01       20001  1069.96130
2017-05-01       20001  1502.20132
2017-06-01       20001  1520.06539
2017-07-01       20001  1030.67391
2017-08-01       20001  1267.39462
2017-09-01       20001  1316.94604
2017-10-01       20001  1439.75563
2017-11-01       20001  1580.47401
2017-12-01       20001  1049.38860
2018-01-01       20001  1169.07532
2018-02-01       20001  1043.76470
2018-03-01       20001  1856.83534
2018-04-01       20001  1251.28462
2018-05-01       20001  1293.89788
2018-06-01       20001  1150.79169
2018-07-01       20001  1470.41009
2018-08-01       20001  1800.96168
2018-09-01       20001  1438.67455
2018-10-01       20001  2295.19832
2018-11-01       20001  1813.01511
2018-12-01       20001  1486.68669
2019-01-01       20001  1275.77351
2019-02-01       20001  1259.09363
2019-03-01       200

In [41]:
# Completar valores faltantes
ts_data = ts_data.fill_missing_values()

In [44]:
predictor = TimeSeriesPredictor(
    prediction_length=1,
    target='tn',
    freq='MS'  # Frecuencia mensual (Month Start)
)

In [45]:
predictor.fit(ts_data)



Beginning AutoGluon training...
AutoGluon will save models to '/Users/patricialorenasarmientotagle/austral-labo-iii/notebooks/AutogluonModels/ag-20250609_223417'
AutoGluon Version:  1.3.1
Python Version:     3.9.6
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 24.5.0: Tue Apr 22 19:53:27 PDT 2025; root:xnu-11417.121.6~2/RELEASE_ARM64_T6041
CPU Count:          12
GPU Count:          0
Memory Avail:       6.66 GB / 24.00 GB (27.7%)
Disk Space Avail:   275.87 GB / 460.43 GB (59.9%)

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': WQL,
 'freq': 'MS',
 'hyperparameters': 'default',
 'known_covariates_names': [],
 'num_val_windows': 1,
 'prediction_length': 1,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'tn',
 'verbosity': 2}

train_data with frequency 'IRREG' has been resampled to frequency

<autogluon.timeseries.predictor.TimeSeriesPredictor at 0x395a84220>

In [47]:
print(ts_data.num_items)
print(ts_data.freq)


780
MS


In [48]:
forecast = predictor.predict(ts_data)

data with frequency 'IRREG' has been resampled to frequency 'MS'.
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble


In [49]:
print(forecast.columns)


Index(['mean', '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9'], dtype='object')


In [50]:
forecast_mean = forecast['mean'].reset_index()
print(forecast_mean.columns)


Index(['item_id', 'timestamp', 'mean'], dtype='object')


In [51]:
# Tomar solo item_id y la predicción 'mean'
resultado = forecast['mean'].reset_index()[['item_id', 'mean']]
resultado.columns = ['product_id', 'tn']

# Guardar a CSV
resultado.to_csv("predicciones_febrero2020.csv", index=False)
resultado.head()


Unnamed: 0,product_id,tn
0,20001,1237.338298
1,20002,957.624901
2,20003,700.614432
3,20004,522.681982
4,20005,497.931661


In [58]:
# --- Cargar predicciones de AutoGluon ---
df_autogluon = pd.read_csv("predicciones_febrero2020.csv")  # columnas: product_id, tn

# --- Cargar el promedio simple (ya lo calculaste) ---
df_promedios = pd.read_csv("promedio_12m_febrero2020_2.csv")  # columnas: product_id, tn

# --- Renombrar columna del promedio para claridad ---
df_promedios = df_promedios.rename(columns={"tn": "tn_promedio"})
df_autogluon = df_autogluon.rename(columns={"tn": "tn_autogluon"})

# --- Unir ambos resultados por product_id ---
df_ensemble = df_autogluon.merge(df_promedios, on="product_id", how="inner")

# --- Calcular promedio simple como ensemble ---
df_ensemble["tn_ensemble"] = (
    df_ensemble["tn_autogluon"] + df_ensemble["tn_promedio"]
) / 2

# --- Guardar resultado final ---
df_ensemble[["product_id", "tn_ensemble"]].to_csv("ensemble_febrero2020.csv", index=False)
