# Propuesta "Baseline" y "Medias"

* 1ro: Considerar el periodo 201912 para estimar "base predict" (202002).
* 2do: Hacer promedios de los últimos n meses.
* 3ro: Hacer promedios de los últimos n meses salteando de a t meses. 

## Librerías

In [1]:
import numpy as np
import pandas as pd
from src.utils.utils import get_base_dir

## Importar conjunto de datos

In [2]:
base_dir = get_base_dir()
sell_in = pd.read_csv(base_dir / "data/raw/sell-in.txt", sep="\t", encoding="utf-8")
sell_in.head()

Unnamed: 0,periodo,customer_id,product_id,plan_precios_cuidados,cust_request_qty,cust_request_tn,tn
0,201701,10234,20524,0,2,0.053,0.053
1,201701,10032,20524,0,1,0.13628,0.13628
2,201701,10217,20524,0,1,0.03028,0.03028
3,201701,10125,20524,0,1,0.02271,0.02271
4,201701,10012,20524,0,11,1.54452,1.54452


## Agrupar por periodo y producto (sumar toneladas totales)

In [3]:
data_baseline = sell_in.groupby(['periodo','product_id']).agg({'tn': 'sum'}).reset_index(drop=False)
data_baseline

Unnamed: 0,periodo,product_id,tn
0,201701,20001,934.77222
1,201701,20002,550.15707
2,201701,20003,1063.45835
3,201701,20004,555.91614
4,201701,20005,494.27011
...,...,...,...
31238,201912,21265,0.05007
31239,201912,21266,0.05121
31240,201912,21267,0.01569
31241,201912,21271,0.00298


## Filtrar periodo 201912

In [4]:
data_baseline_201912 = data_baseline[data_baseline['periodo'] == 201912].reset_index(drop=True)
data_baseline_201912

Unnamed: 0,periodo,product_id,tn
0,201912,20001,1504.68856
1,201912,20002,1087.30855
2,201912,20003,892.50129
3,201912,20004,637.90002
4,201912,20005,593.24443
...,...,...,...
922,201912,21265,0.05007
923,201912,21266,0.05121
924,201912,21267,0.01569
925,201912,21271,0.00298


## Imoporto archivo a predecir

In [5]:
df_pred = pd.read_csv(base_dir / "data/predict/raw/product_id_apredecir201912.txt")
df_pred

Unnamed: 0,product_id
0,20001
1,20002
2,20003
3,20004
4,20005
...,...
775,21263
776,21265
777,21266
778,21267


## Merge archivo a predecir con base 201912

In [6]:
df_pred_processed = df_pred.merge(data_baseline_201912[["product_id","tn"]], on='product_id', how='left', suffixes=('', '_baseline'))

In [9]:
df_pred_processed.to_csv((base_dir / "data/predict/final/product_id_clase_3_20250613.csv"), index=False)

## Ahora CONSIDERANDO PROMEDIOS

In [10]:
# obtener ultimos meses de un periodo dado
def obtener_ultimos_meses(fin, n):
    anio = fin // 100
    mes = fin % 100
    resultado = []

    for _ in range(n):
        resultado.append(anio * 100 + mes)
        mes -= 1
        if mes == 0:
            mes = 12
            anio -= 1

    return resultado

### Promedio de los últimos 3 meses

In [11]:
# promedio ultimos 3 meses
n = 3
data_baseline_promedio = data_baseline[data_baseline["periodo"].isin(obtener_ultimos_meses(201912, n))].groupby("product_id").agg({"tn": "mean"}).reset_index(drop=False)

In [12]:
df_pred_processed_2 = df_pred.merge(data_baseline_promedio[["product_id","tn"]], on='product_id', how='left', suffixes=('', '_baseline'))

In [13]:
df_pred_processed_2.to_csv((base_dir / "data/predict/final/product_id_clase_3_ultimos_3_meses_20250613.csv"), index=False)

### Promedio de los últimos 12 meses

In [14]:
# promedio ultimos 12 meses
n = 12
data_baseline_promedio = data_baseline[data_baseline["periodo"].isin(obtener_ultimos_meses(201912, n))].groupby("product_id").agg({"tn": "mean"}).reset_index(drop=False)

df_pred_processed_3 = df_pred.merge(data_baseline_promedio[["product_id","tn"]], on='product_id', how='left', suffixes=('', '_baseline'))

df_pred_processed_3.to_csv((base_dir / "data/predict/final/product_id_clase_3_ultimos_12_meses_20250613.csv"), index=False)

### Promedio de los últimos 24 meses

In [15]:
# promedio ultimos n_meses
n = 24
data_baseline_promedio = data_baseline[data_baseline["periodo"].isin(obtener_ultimos_meses(201912, n))].groupby("product_id").agg({"tn": "mean"}).reset_index(drop=False)

df_pred_processed_4 = df_pred.merge(data_baseline_promedio[["product_id","tn"]], on='product_id', how='left', suffixes=('', '_baseline'))

df_pred_processed_4.to_csv((base_dir / "data/predict/final/product_id_clase_3_ultimos_24_meses_20250613.csv"), index=False)

### Promedio de los últimos 32 meses

In [16]:
# promedio ultimos n_meses
n = 32
data_baseline_promedio = data_baseline[data_baseline["periodo"].isin(obtener_ultimos_meses(201912, n))].groupby("product_id").agg({"tn": "mean"}).reset_index(drop=False)

df_pred_processed_4 = df_pred.merge(data_baseline_promedio[["product_id","tn"]], on='product_id', how='left', suffixes=('', '_baseline'))

df_pred_processed_4.to_csv((base_dir / "data/predict/final/product_id_clase_3_ultimos_32_meses_20250613.csv"), index=False)

## Prueba saltando meses

In [17]:
def obtener_meses_salteando(fin, n, paso=1):
    anio = fin // 100
    mes = fin % 100
    resultado = []

    for _ in range(n):
        resultado.append(anio * 100 + mes)
        mes -= paso
        while mes <= 0:
            mes += 12
            anio -= 1

    return resultado

### 18 meses

In [18]:
n = 18
data_baseline_promedio = data_baseline[data_baseline["periodo"].isin(obtener_meses_salteando(201912, n))].groupby("product_id").agg({"tn": "mean"}).reset_index(drop=False)

df_pred_processed_4 = df_pred.merge(data_baseline_promedio[["product_id","tn"]], on='product_id', how='left', suffixes=('', '_baseline'))

df_pred_processed_4.to_csv((base_dir / "data/predict/final/product_id_clase_3_salteando_2_18_meses_20250613.csv"), index=False)

## 12 meses

In [19]:
n = 12
data_baseline_promedio = data_baseline[data_baseline["periodo"].isin(obtener_meses_salteando(201912, n))].groupby("product_id").agg({"tn": "mean"}).reset_index(drop=False)

df_pred_processed_4 = df_pred.merge(data_baseline_promedio[["product_id","tn"]], on='product_id', how='left', suffixes=('', '_baseline'))

df_pred_processed_4.to_csv((base_dir / "data/predict/final/product_id_clase_3_salteando_2_12_meses_20250613.csv"), index=False)