In [None]:


# 1) Imports y configuración
import os
import pandas as pd
import numpy as np
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor




  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 2) Carga de datos
sellin_path    = r"C:\Users\monte\Desktop\Lab3\sell-in.txt"
productos_path = r"C:\Users\monte\Desktop\Lab3\tb_productos.txt"
stock_path     = r"C:\Users\monte\Desktop\Lab3\tb_stocks.txt"
ids_path       = r"C:\Users\monte\Desktop\Lab3\product_id_apredecir201912.TXT"

df_sellin    = pd.read_csv(sellin_path, sep="\t")
df_productos = pd.read_csv(productos_path, sep="\t")
df_stock     = pd.read_csv(stock_path, sep="\t")
with open(ids_path, "r") as f:
    product_ids = [int(line.strip()) for line in f if line.strip().isdigit()]


In [3]:

# 3) Preprocesamiento y df_monthly
df_sellin["timestamp"] = pd.to_datetime(df_sellin["periodo"].astype(str), format="%Y%m")
df_sellin = df_sellin[
    (df_sellin["timestamp"] <= "2019-12-01") &
    (df_sellin["product_id"].isin(product_ids))
]
df_monthly = (
    df_sellin
      .groupby(["product_id","timestamp"], as_index=False)["tn"]
      .sum()
      .rename(columns={"product_id":"item_id","tn":"target"})
)



In [4]:
# 4) Feature engineering: rolling stats
windows = [3,6,12]
for w in windows:
    grp = df_monthly.groupby("item_id")["target"]
    df_monthly[f"roll_mean_{w}"] = grp.transform(lambda x: x.shift(1).rolling(w,1).mean())
    df_monthly[f"roll_std_{w}"]  = grp.transform(lambda x: x.shift(1).rolling(w,1).std().fillna(0))
    df_monthly[f"roll_med_{w}"]  = grp.transform(lambda x: x.shift(1).rolling(w,1).median())


In [5]:

# 5) EWM y diffs
df_monthly["ewm_mean_6"] = df_monthly.groupby("item_id")["target"].transform(
    lambda x: x.shift(1).ewm(span=6,adjust=False).mean())
df_monthly["diff_1"]    = df_monthly.groupby("item_id")["target"].diff(1)
df_monthly["pct_chg_1"] = df_monthly.groupby("item_id")["target"].pct_change(1).fillna(0)



In [6]:
# 6) (Opcional) min/max rolling
for w in windows:
    grp = df_monthly.groupby("item_id")["target"]
    df_monthly[f"roll_min_{w}"] = grp.transform(lambda x: x.shift(1).rolling(w,1).min())
    df_monthly[f"roll_max_{w}"] = grp.transform(lambda x: x.shift(1).rolling(w,1).max())



In [7]:
# 7) Construir TimeSeriesDataFrame y forzar frecuencia
ts_data = TimeSeriesDataFrame.from_data_frame(
    df_monthly, id_column="item_id", timestamp_column="timestamp")
ts_data = ts_data.convert_frequency(freq="M")



  offset = pd.tseries.frequencies.to_offset(freq)


In [8]:
# 8) Entrenar predictor con horizonte 2 (para predecir hasta feb-2020)
predictor = TimeSeriesPredictor(
    target="target",
    prediction_length=2,
    eval_metric="RMSE",
    path="autogluon_models_2steps",
    freq="M"
)
predictor.fit(train_data=ts_data, presets="medium_quality")



  offset = pd.tseries.frequencies.to_offset(self.freq)
Frequency 'M' stored as 'ME'
Beginning AutoGluon training...
AutoGluon will save models to 'c:\Users\monte\AppData\Local\Programs\Microsoft VS Code\autogluon_models_2steps'
AutoGluon Version:  1.3.1
Python Version:     3.9.23
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          8
GPU Count:          0
Memory Avail:       0.66 GB / 7.77 GB (8.5%)
Disk Space Avail:   27.80 GB / 237.12 GB (11.7%)
Setting presets to: medium_quality

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': RMSE,
 'freq': 'ME',
 'hyperparameters': 'light',
 'known_covariates_names': [],
 'num_val_windows': 1,
 'prediction_length': 2,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'target',
 'verbosity': 2}

Provided train_data has 22375 rows (NaN fraction=0.1%),

<autogluon.timeseries.predictor.TimeSeriesPredictor at 0x1f6b834e250>

In [9]:
# 9) Generar forecast
forecast = predictor.predict(ts_data)



Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble


In [10]:
# 10) Convertir a DataFrame plano y revisar
df_pred = forecast.reset_index()
print("Columnas:", df_pred.columns.tolist())
print("Timestamps:", sorted(df_pred["timestamp"].unique()))



Columnas: ['item_id', 'timestamp', 'mean', '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9']
Timestamps: [Timestamp('2020-01-31 00:00:00'), Timestamp('2020-02-29 00:00:00')]


In [None]:
# 11) Exportar solo febrero 2020, columna 'mean' → 'tn'
df_export = df_pred[["item_id","timestamp","mean"]].copy()
df_export.rename(columns={"item_id":"product_id","mean":"tn"}, inplace=True)
df_export = df_export[df_export["timestamp"] == pd.to_datetime("2020-02-29")]
df_export["tn"] = df_export["tn"].round().astype(int)

output_path = os.path.join(os.path.expanduser("~"), "Desktop",
                           "predicciones_Febrero2020_with_RN.csv")
df_export[["product_id","tn"]].sort_values("product_id")\
    .to_csv(output_path, index=False)

print("CSV listo:", output_path)
print(df_export.head())

✅ CSV listo: C:\Users\monte\Desktop\predicciones_Febrero2020_with_RN.csv
   product_id  timestamp    tn
1       20001 2020-02-29  1311
3       20002 2020-02-29  1080
5       20003 2020-02-29   752
7       20004 2020-02-29   552
9       20005 2020-02-29   541
