In [51]:
# AutoGluon - Predicción de ventas (tn) por producto para febrero 2020

In [52]:
## 1. Importar librerías necesarias
import pandas as pd
from autogluon.common import space
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

In [53]:
%pip install autogluon.timeseries


Note: you may need to restart the kernel to use updated packages.


In [54]:
## 2. Cargar datasets
df_sellin = pd.read_csv("sell-in.txt", sep="\t")
df_productos = pd.read_csv("tb_productos.txt", sep="\t")

In [55]:
# Leer lista de productos a predecir
with open("product_id_apredecir201912.TXT", "r") as f:
    product_ids = [int(line.strip()) for line in f if line.strip().isdigit()]

In [56]:
## 3. Preprocesamiento
# Convertir periodo a datetime
df_sellin['timestamp'] = pd.to_datetime(df_sellin['periodo'], format='%Y%m')

In [57]:
# Filtrar hasta dic 2019 y productos requeridos
df_filtered = df_sellin[
    (df_sellin['timestamp'] <= '2019-12-01') &
    (df_sellin['product_id'].isin(product_ids))
]

In [58]:
# Agregar tn por periodo, cliente y producto
df_grouped = df_filtered.groupby(['timestamp', 'customer_id', 'product_id'], as_index=False)['tn'].sum()

In [59]:
# Agregar tn total por periodo y producto
df_monthly_product = df_grouped.groupby(['timestamp', 'product_id'], as_index=False)['tn'].sum()

In [60]:
# Agregar columna 'item_id' para AutoGluon
df_monthly_product['item_id'] = df_monthly_product['product_id']

In [61]:
## 4. Crear TimeSeriesDataFrame
ts_data = TimeSeriesDataFrame.from_data_frame(
    df_monthly_product,
    id_column='item_id',
    timestamp_column='timestamp'
)

In [62]:
print(ts_data.loc[20001])

            product_id          tn
timestamp                         
2017-01-01       20001   934.77222
2017-02-01       20001   798.01620
2017-03-01       20001  1303.35771
2017-04-01       20001  1069.96130
2017-05-01       20001  1502.20132
2017-06-01       20001  1520.06539
2017-07-01       20001  1030.67391
2017-08-01       20001  1267.39462
2017-09-01       20001  1316.94604
2017-10-01       20001  1439.75563
2017-11-01       20001  1580.47401
2017-12-01       20001  1049.38860
2018-01-01       20001  1169.07532
2018-02-01       20001  1043.76470
2018-03-01       20001  1856.83534
2018-04-01       20001  1251.28462
2018-05-01       20001  1293.89788
2018-06-01       20001  1150.79169
2018-07-01       20001  1470.41009
2018-08-01       20001  1800.96168
2018-09-01       20001  1438.67455
2018-10-01       20001  2295.19832
2018-11-01       20001  1813.01511
2018-12-01       20001  1486.68669
2019-01-01       20001  1275.77351
2019-02-01       20001  1259.09363
2019-03-01       200

In [63]:
# Completar valores faltantes
ts_data = ts_data.fill_missing_values()

In [None]:
# Prueba de nuevo entrenamiento, creaando  un nuevo predictor:
new_predictor = TimeSeriesPredictor(
    prediction_length=2,
    eval_metric='RMSE',
    target='tn',
    freq='MS'
)



In [None]:
# Crear un nuevo predictor antes de ajustar

new_predictor.fit(
    ts_data,
    num_val_windows=3,
    val_step_size=1,
    hyperparameters=hyperparameters,
    hyperparameter_tune_kwargs='auto',  # usa Hyperband por defecto
    
)

	Trained 0 models while tuning PatchTST.
Fitting simple weighted ensemble.
	Ensemble weights: {'ETS': 0.52, 'SeasonalNaive': 0.48}
	-31.5385      = Validation score (-RMSE)
	0.16    s     = Training runtime
	1.17    s     = Validation (prediction) runtime
Training complete. Models trained: ['SeasonalNaive', 'Naive', 'ETS', 'WeightedEnsemble']
Total runtime: 51.98 s
Best model: WeightedEnsemble
Best model score: -31.5385


<autogluon.timeseries.predictor.TimeSeriesPredictor at 0x34a5c4cd0>

In [71]:
new_predictor.leaderboard(ts_data, silent=True)

data with frequency 'IRREG' has been resampled to frequency 'MS'.
Additional data provided, testing on additional data. Resulting leaderboard will be sorted according to test score (`score_test`).


Unnamed: 0,model,score_test,score_val,pred_time_test,pred_time_val,fit_time_marginal,fit_order
0,WeightedEnsemble,-26.875034,-31.538525,1.790843,1.173683,0.163739,4
1,SeasonalNaive,-31.013073,-37.954457,0.11189,0.125704,2.166694,1
2,ETS,-32.867835,-37.215312,1.677526,1.047979,2.80698,3
3,Naive,-46.113437,-46.941168,1.874035,0.159942,0.257484,2


In [66]:
print(ts_data.num_items)
print(ts_data.freq)

780
MS


In [67]:
forecast = new_predictor.predict(ts_data)

data with frequency 'IRREG' has been resampled to frequency 'MS'.
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble


In [68]:
print(forecast.columns)

Index(['mean', '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9'], dtype='object')


In [69]:
forecast_mean = forecast['mean'].reset_index()
print(forecast_mean.columns)

Index(['item_id', 'timestamp', 'mean'], dtype='object')


In [70]:
# Tomar solo item_id y la predicción 'mean'
resultado = forecast['mean'].reset_index()[['item_id', 'mean']]
resultado.columns = ['product_id', 'tn']

# Filtrar solo febrero 2020
resultado = forecast['mean'].reset_index()
resultado = resultado[resultado['timestamp'] == '2020-02-01']

# Renombrar columnas
resultado = resultado[['item_id', 'mean']]
resultado.columns = ['product_id', 'tn']

# Guardar a CSV
resultado.to_csv("predicciones_febrero2020_01-07-25_4.csv", index=False)
resultado.head()

Unnamed: 0,product_id,tn
1,20001,1376.990477
3,20002,1238.944873
5,20003,699.824743
7,20004,542.778781
9,20005,505.563725
