# Predecir 202002 a partir de los coeficientes del modelo de regresión lineal

Se consideran los coeficientes del modelo ajustado en la notebook 'notebooks\4_modelado\regresion_lineal\modelo_regresion_2018_para_predecir_con_2019_v3.ipynb'.

In [1]:
import pandas as pd
from src.utils.utils import get_base_dir

In [2]:
base_dir = get_base_dir()
base_dir

WindowsPath('C:/Users/lauta/Desktop/Lautaro/maestria_ds/labo3/repo-entrega')

## Importar datos

In [3]:
# archivo a predecir 202002
df_pred = pd.read_csv((base_dir / "data/predict/raw/product_id_apredecir201912.txt"), sep="\t", encoding="utf-8")

In [4]:
# df original sell-in
DATA_PATH_SELL_IN = base_dir/ "data/raw/sell-in.txt"
sell_in = pd.read_csv(DATA_PATH_SELL_IN, sep="\t")

## Agrupar por periodo-producto

In [5]:
sell_in_agrup = sell_in.groupby(["periodo","product_id"]).agg({"tn":"sum"}).reset_index().sort_values(by=["periodo","product_id"])

In [6]:
sell_in_agrup = sell_in_agrup[sell_in_agrup["product_id"].isin(df_pred["product_id"])].reset_index(drop=True)

## Filtrar 2019 (enero a diciembre)

In [7]:
sell_in_agrup_2019 = sell_in_agrup[sell_in_agrup["periodo"].between(201901, 201912)]
sell_in_agrup_2019

Unnamed: 0,periodo,product_id,tn
13561,201901,20001,1275.77351
13562,201901,20002,1266.78751
13563,201901,20003,964.76919
13564,201901,20004,511.33713
13565,201901,20005,363.58438
...,...,...,...
22344,201912,21263,0.01270
22345,201912,21265,0.05007
22346,201912,21266,0.05121
22347,201912,21267,0.01569


## Obtener listado productos "magicos" y coeficientes del modelo de regresiòn lineal ajustado en la notebook

In [8]:
product_id_coef = pd.read_csv("product_id_coef_magicos.csv")
product_id_coef_list = product_id_coef["product_id"].tolist()

In [9]:
coef = [ 0.07340737,  0.12000926,  0.10378153,  0.14164472,  0.04479068,
         0.15376063, -0.00874092, -0.16108622, -0.05917115,  0.17759373,
         0.23588766, -0.00156842]
coef_dict = {}
periodo_inicial = 201901
for i in coef:
    coef_dict[periodo_inicial] = i
    periodo_inicial += 1    

product_id_coef = product_id_coef_list

In [17]:
# productos con coeficientes mágicos
print(product_id_coef)

# coeficientes por año-mes
coef_dict


[20001, 20002, 20003, 20006, 20008, 20010, 20011, 20017, 20018, 20019, 20021, 20026, 20028, 20035, 20039, 20042, 20044, 20045, 20046, 20049, 20051, 20052, 20053, 20055, 20086, 20180, 20193, 20320, 20532, 20612, 20637, 20807, 20838]


{201901: 0.07340737,
 201902: 0.12000926,
 201903: 0.10378153,
 201904: 0.14164472,
 201905: 0.04479068,
 201906: 0.15376063,
 201907: -0.00874092,
 201908: -0.16108622,
 201909: -0.05917115,
 201910: 0.17759373,
 201911: 0.23588766,
 201912: -0.00156842}

## Aplicar modelo en 2019 para predecir 202002

SOLO se aplica el modelo a los productos "magicos", para el resto se considera el promedio de los ultimos 12 meses.


In [18]:
# Crear una función para calcular el valor según el criterio
def calcular_valor(group, product_id):
    if product_id in product_id_coef:
        # Para productos en la lista: multiplicar tn por coeficiente del periodo y sumar
        group['tn_ajustado'] = group.apply(lambda row: row['tn'] * coef_dict[row['periodo']], axis=1)
        return group['tn_ajustado'].sum()
    else:
        # Para productos NO en la lista: calcular promedio
        return group['tn'].mean()

# Aplicar la función agrupando por product_id
resultado = sell_in_agrup_2019.groupby('product_id').apply(lambda x: calcular_valor(x, x.name), include_groups=False).reset_index()
resultado.columns = ['product_id', 'tn']

# Mostrar el resultado
resultado

Unnamed: 0,product_id,tn
0,20001,1162.914886
1,20002,1182.845672
2,20003,684.835899
3,20004,627.215328
4,20005,668.270104
...,...,...
775,21263,0.029993
776,21265,0.089541
777,21266,0.094659
778,21267,0.092835


In [19]:
df_pred_processed = df_pred.merge(resultado[["product_id","tn"]], on='product_id', how='left', suffixes=('', '_baseline'))

In [20]:
df_pred_processed

Unnamed: 0,product_id,tn
0,20001,1162.914886
1,20002,1182.845672
2,20003,684.835899
3,20004,627.215328
4,20005,668.270104
...,...,...
775,21263,0.029993
776,21265,0.089541
777,21266,0.094659
778,21267,0.092835


## Guardar predicciones

In [21]:
df_pred_processed.to_csv((base_dir / "data/predict/final/product_id_clase_6_modelo_reg_simple_v1_magicos.csv"), index=False)

## Aplico otra alternativa

In [22]:
# Crear una función para calcular el valor según el criterio
def calcular_valor(group, product_id):
    if product_id in product_id_coef:
        # Para productos en la lista: multiplicar tn por coeficiente del periodo y sumar
        group['tn_ajustado'] = group.apply(lambda row: row['tn'] * coef_dict[row['periodo']], axis=1)
        suma_ponderada = group['tn_ajustado'].sum()
        suma_coeficientes = sum(coef_dict[periodo] for periodo in group['periodo'])
        return suma_ponderada / suma_coeficientes  # Promedio ponderado
    else:
        # Para productos NO en la lista: calcular promedio
        return group['tn'].mean()

# Aplicar la función agrupando por product_id
resultado = sell_in_agrup_2019.groupby('product_id').apply(lambda x: calcular_valor(x, x.name), include_groups=False).reset_index()
resultado.columns = ['product_id', 'tn']

# Mostrar el resultado
resultado

Unnamed: 0,product_id,tn
0,20001,1417.654896
1,20002,1441.951581
2,20003,834.851266
3,20004,627.215328
4,20005,668.270104
...,...,...
775,21263,0.029993
776,21265,0.089541
777,21266,0.094659
778,21267,0.092835


In [23]:
df_pred_processed = df_pred.merge(resultado[["product_id","tn"]], on='product_id', how='left', suffixes=('', '_baseline'))

In [24]:
df_pred_processed

Unnamed: 0,product_id,tn
0,20001,1417.654896
1,20002,1441.951581
2,20003,834.851266
3,20004,627.215328
4,20005,668.270104
...,...,...
775,21263,0.029993
776,21265,0.089541
777,21266,0.094659
778,21267,0.092835


In [25]:
df_pred_processed.to_csv((base_dir / "data/predict/final/product_id_clase_6_modelo_reg_simple_v1_alternativa_2_magicos.csv"), index=False)
