# AutoGluon - Predicción de ventas (tn) por producto para febrero 2020

In [1]:
# !pip install autogluon.timeseries

In [2]:
## 1. Importar librerías necesarias
import pandas as pd
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
## 2. Cargar datasets
df = pd.read_csv('../data/sell-in.txt', sep='\t', encoding='utf-8')
df_productos_predecir = pd.read_csv('../data/product_id_apredecir201912.txt', sep='\t', encoding='utf-8')

#filtrar productos a predecir
df = df[df['product_id'].isin(df_productos_predecir['product_id'])]

In [4]:
df_pivot = df.pivot_table(
    index=['product_id', 'customer_id'],
    columns='periodo',
    values='tn',
    aggfunc='sum',
    fill_value=None
)
df_pivot = df_pivot.reset_index()
df_pivot.columns.name = None
df_pivot.head()

Unnamed: 0,product_id,customer_id,201701,201702,201703,201704,201705,201706,201707,201708,...,201903,201904,201905,201906,201907,201908,201909,201910,201911,201912
0,20001,10001,99.43861,198.84365,92.46537,13.29728,101.00563,128.04792,101.20711,43.3393,...,130.54927,364.37071,439.90647,65.92436,144.78714,33.63991,109.05244,176.0298,236.65556,180.21938
1,20001,10002,35.72806,6.79415,29.94128,22.81133,31.22847,47.57025,21.84874,17.08052,...,31.97079,55.41679,30.87299,144.07021,37.14616,,72.08551,17.40806,45.61495,113.33165
2,20001,10003,143.49426,20.48319,137.87537,68.89292,135.1219,171.01785,64.66196,83.6341,...,170.89924,230.00152,1.84835,,138.23391,162.07198,233.20532,76.00625,86.14415,102.27517
3,20001,10004,184.72927,104.03894,295.43924,247.65632,188.37819,195.02683,379.4427,237.16848,...,102.64484,91.67799,389.02653,66.71971,228.62366,96.11402,288.34205,324.96172,195.67828,34.6481
4,20001,10005,19.08407,5.17117,5.17117,0.86186,37.95546,19.08407,43.35049,67.53856,...,6.90049,22.18016,15.89578,,8.25595,,12.804,17.13921,12.22149,19.60368


In [5]:
print("\n--- 1. Transformando datos a formato largo ---")
df = df_pivot.melt(
    id_vars=['product_id', 'customer_id'],
    var_name='periodo',
    value_name='tn' # MLForecast usa 'y' como nombre de la variable objetivo
)


--- 1. Transformando datos a formato largo ---


In [6]:
# Convertir periodos a fechas
df['timestamp'] = pd.to_datetime(df['periodo'], format='%Y%m')

In [7]:
df.head()

Unnamed: 0,product_id,customer_id,periodo,tn,timestamp
0,20001,10001,201701,99.43861,2017-01-01
1,20001,10002,201701,35.72806,2017-01-01
2,20001,10003,201701,143.49426,2017-01-01
3,20001,10004,201701,184.72927,2017-01-01
4,20001,10005,201701,19.08407,2017-01-01


In [8]:
# Eliminar la columna 'periodo'
df = df.drop('periodo', axis=1)

In [None]:
# Agregar tn por periodo, cliente y producto
# df_grouped = df_filtered.groupby(['timestamp', 'customer_id', 'product_id'], as_index=False)['tn'].sum()

In [9]:
# Agregar tn total por periodo y producto
df_monthly_product = df.groupby(['timestamp', 'product_id'], as_index=False)['tn'].sum()

In [10]:
df_monthly_product = df_monthly_product.rename(columns={'product_id': 'item_id'})

In [13]:
df_monthly_product.head()

Unnamed: 0,timestamp,item_id,tn
0,2017-01-01,20001,934.77222
1,2017-01-01,20002,550.15707
2,2017-01-01,20003,1063.45835
3,2017-01-01,20004,555.91614
4,2017-01-01,20005,494.27011


In [11]:
## 4. Crear TimeSeriesDataFrame
ts_data = TimeSeriesDataFrame.from_data_frame(
    df_monthly_product,
    id_column='item_id',
    timestamp_column='timestamp'
)

In [12]:
print(ts_data.loc[20001])

                    tn
timestamp             
2017-01-01   934.77222
2017-02-01   798.01620
2017-03-01  1303.35771
2017-04-01  1069.96130
2017-05-01  1502.20132
2017-06-01  1520.06539
2017-07-01  1030.67391
2017-08-01  1267.39462
2017-09-01  1316.94604
2017-10-01  1439.75563
2017-11-01  1580.47401
2017-12-01  1049.38860
2018-01-01  1169.07532
2018-02-01  1043.76470
2018-03-01  1856.83534
2018-04-01  1251.28462
2018-05-01  1293.89788
2018-06-01  1150.79169
2018-07-01  1470.41009
2018-08-01  1800.96168
2018-09-01  1438.67455
2018-10-01  2295.19832
2018-11-01  1813.01511
2018-12-01  1486.68669
2019-01-01  1275.77351
2019-02-01  1259.09363
2019-03-01  1470.65653
2019-04-01  1647.63848
2019-05-01  1629.78233
2019-06-01  1109.93769
2019-07-01  1678.99318
2019-08-01  1261.34529
2019-09-01  1660.00561
2019-10-01  1561.50552
2019-11-01  1397.37231
2019-12-01  1504.68856


In [14]:
# Completar valores faltantes
ts_data = ts_data.fill_missing_values()

In [None]:
# Prueba de nuevo entrenamiento, creaando  un nuevo predictor:
new_predictor = TimeSeriesPredictor(
    prediction_length=2,
    target='tn',
    freq='MS'
)

new_predictor.fit(
    ts_data,
    num_val_windows=3,
    val_step_size=1,
    hyperparameters={
        'TemporalFusionTransformer': {},
        'PatchTST': {},
        'Chronos': {},
        'DeepAR': {}
    }
    #time_limit=600,  # Opcional, en segundos (10 minutos)
    #hyperparameters='default'  # Podés usar un dict para afinar modelos
)

Beginning AutoGluon training...
AutoGluon will save models to '/home/nespina/Documentos/austral/labo_3/src/AutogluonModels/ag-20250709_003925'
AutoGluon Version:  1.3.1
Python Version:     3.12.3
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #65-Ubuntu SMP PREEMPT_DYNAMIC Mon May 19 17:15:03 UTC 2025
CPU Count:          8
GPU Count:          0
Memory Avail:       5.85 GB / 11.37 GB (51.4%)
Disk Space Avail:   105.51 GB / 284.85 GB (37.0%)

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': WQL,
 'freq': 'MS',
 'hyperparameters': {'Chronos': {},
                     'DeepAR': {},
                     'PatchTST': {},
                     'TemporalFusionTransformer': {}},
 'known_covariates_names': [],
 'num_val_windows': 3,
 'prediction_length': 2,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'tn',
 'val_step_si

<autogluon.timeseries.predictor.TimeSeriesPredictor at 0x7c4fe69db9b0>

In [16]:
print(ts_data.num_items)
print(ts_data.freq)

780
MS


In [17]:
forecast = new_predictor.predict(ts_data)

Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble


In [18]:
print(forecast.columns)

Index(['mean', '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9'], dtype='object')


In [19]:
forecast_mean = forecast['mean'].reset_index()
print(forecast_mean.columns)

Index(['item_id', 'timestamp', 'mean'], dtype='object')


In [20]:
forecast.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
item_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
20001,2020-01-01,1263.661871,883.770537,1030.138558,1120.827998,1191.113017,1263.612409,1336.153531,1414.40106,1509.129867,1661.327678
20001,2020-02-01,1285.253602,940.822762,1056.403906,1153.933507,1218.75955,1287.739967,1360.657689,1434.07329,1512.443557,1662.827495
20002,2020-01-01,1041.90509,722.186751,854.512278,928.356915,988.877897,1046.247917,1109.229703,1172.51899,1245.154729,1388.073355
20002,2020-02-01,1024.661398,734.324914,841.44301,913.895143,967.168466,1029.143626,1091.206335,1148.673586,1224.933742,1363.634235
20003,2020-01-01,719.142308,487.326772,573.880997,623.89833,673.580472,717.571119,759.766682,810.438503,880.341433,996.609628


In [21]:
# Tomar solo item_id y la predicción 'mean'
resultado = forecast['mean'].reset_index()[['item_id', 'mean']]
resultado.columns = ['product_id', 'tn']

# Filtrar solo febrero 2020
resultado = forecast['mean'].reset_index()
resultado = resultado[resultado['timestamp'] == '2020-02-01']

# Renombrar columnas
resultado = resultado[['item_id', 'mean']]
resultado.columns = ['product_id', 'tn']

# Guardar a CSV
resultado.to_csv("predicciones_febrero2020_01-07-25_2.csv", index=False)
resultado.head()

Unnamed: 0,product_id,tn
1,20001,1285.253602
3,20002,1024.661398
5,20003,712.187592
7,20004,513.682919
9,20005,535.98295
