### ANALIZAR MÉTRICAS OBTENIDAS DE CÁLCULO DE COSTOS - BACKTEST

In [1]:
import pandas as pd
import numpy as np

from utils.utils import read_processed_data, set_root_path
from mlforecast import MLForecast
import lightgbm as lgb

from mlforecast.lag_transforms import (
    RollingMean,
    SeasonalRollingMean,
)
from sklearn.metrics import mean_absolute_error

import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# set root repo
set_root_path()

root path: /Users/joseortega/Documents/GitHub/vn2_challenge


### 1. leer tablas a analizar

In [3]:
# leer output backtest estrategia optimizar costos
folder_output = "data/submission/backtest"
next_data_state_backtest = pd.read_csv(
    f"{folder_output}/next_data_state_backtest.csv"
)

In [4]:
next_data_state_backtest.head()

Unnamed: 0.1,Unnamed: 0,unique_id,Store,Product,Start Inventory,Sales,Missed Sales,End Inventory,In Transit W+1,In Transit W+2,Holding Cost,Shortage Cost,Cumulative Holding Cost,Cumulative Shortage Cost,fcst_w1,fcst_w2,fcst_w3,week0_update
0,0,0-126,0,126,0,0,6.0,0,0,3,0.0,6.0,0.0,6.0,4.0,4.0,3.0,2023-06-05
1,1,0-182,0,182,0,0,1.0,0,0,1,0.0,1.0,0.0,1.0,1.0,1.0,1.0,2023-06-05
2,2,1-124,1,124,0,0,6.0,0,0,4,0.0,6.0,0.0,6.0,4.0,4.0,4.0,2023-06-05
3,3,2-124,2,124,0,0,18.0,0,0,8,0.0,18.0,0.0,18.0,9.0,10.0,8.0,2023-06-05
4,4,2-126,2,126,0,0,7.0,0,0,3,0.0,7.0,0.0,7.0,3.0,3.0,3.0,2023-06-05


### 2. Resumen costos - en la última ejecución del backtest - resumen global

In [30]:
# obtener última fecha de actualización
last_week_backtest = next_data_state_backtest["week0_update"].max()
last_week_backtest

'2024-03-18'

In [31]:
# filtrar para tener la última actualización
last_update_backtest_costs = next_data_state_backtest[next_data_state_backtest["week0_update"] == last_week_backtest]

# crear columna auxiliar total de ambos costos
last_update_backtest_costs.loc[:, "Cumulative Holding+Shortage Cost"] = last_update_backtest_costs["Cumulative Holding Cost"] + last_update_backtest_costs["Cumulative Shortage Cost"]

last_update_backtest_costs.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  last_update_backtest_costs.loc[:, "Cumulative Holding+Shortage Cost"] = last_update_backtest_costs["Cumulative Holding Cost"] + last_update_backtest_costs["Cumulative Shortage Cost"]


Unnamed: 0.1,Unnamed: 0,unique_id,Store,Product,Start Inventory,Sales,Missed Sales,End Inventory,In Transit W+1,In Transit W+2,Holding Cost,Shortage Cost,Cumulative Holding Cost,Cumulative Shortage Cost,fcst_w1,fcst_w2,fcst_w3,week0_update,Cumulative Holding+Shortage Cost
24559,0,0-126,0,126,4,0,0.0,4,0,0,0.8,0.0,14.6,62.0,1.0,2.0,1.0,2024-03-18,76.6
24560,1,0-182,0,182,0,0,3.0,0,1,1,0.0,3.0,5.6,18.0,1.0,1.0,1.0,2024-03-18,23.6
24561,2,1-124,1,124,10,7,0.0,3,0,0,0.6,0.0,4.8,163.0,9.0,8.0,9.0,2024-03-18,167.8


In [34]:
# SUMAR LOS COSTOS A NIVEL "UNIQUE_ID" para tener los costos totales
cum_holding_costs = last_update_backtest_costs["Cumulative Holding Cost"].sum()
cum_shortage_costs = last_update_backtest_costs["Cumulative Shortage Cost"].sum()
cum_total_costs = last_update_backtest_costs["Cumulative Holding+Shortage Cost"].sum()

print("cum_holding_costs: ", cum_holding_costs)
print("cum_shortage_costs: ", cum_shortage_costs)
print("cum_total_costs: ", cum_total_costs)

cum_holding_costs:  4402.6
cum_shortage_costs:  45714.0
cum_total_costs:  50116.6


### 2. Resumen costos - en la última ejecución del backtest - series individualmente

In [8]:
# obtener última fecha de actualización
last_week_backtest = next_data_state_backtest["week0_update"].max()
last_week_backtest

'2024-03-18'

In [16]:
# filtrar para tener la última actualización
last_update_backtest_costs = next_data_state_backtest[next_data_state_backtest["week0_update"] == last_week_backtest]

# crear columna auxiliar total de ambos costos
last_update_backtest_costs.loc[:, "Cumulative Holding+Shortage Cost"] = last_update_backtest_costs["Cumulative Holding Cost"] + last_update_backtest_costs["Cumulative Shortage Cost"]

# ordenar por costos acumulado de mayor a menor costo
last_update_backtest_costs = last_update_backtest_costs.sort_values("Cumulative Holding+Shortage Cost", ascending=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  last_update_backtest_costs.loc[:, "Cumulative Holding+Shortage Cost"] = last_update_backtest_costs["Cumulative Holding Cost"] + last_update_backtest_costs["Cumulative Shortage Cost"]


In [23]:
last_update_backtest_costs

Unnamed: 0.1,Unnamed: 0,unique_id,Store,Product,Start Inventory,Sales,Missed Sales,End Inventory,In Transit W+1,In Transit W+2,Holding Cost,Shortage Cost,Cumulative Holding Cost,Cumulative Shortage Cost,fcst_w1,fcst_w2,fcst_w3,week0_update,Cumulative Holding+Shortage Cost
24760,201,61-23,61,23,81,68,0.0,13,0,28,2.6,0.0,92.4,2459.0,62.0,91.0,80.0,2024-03-18,2551.4
24836,277,61-124,61,124,2,2,57.0,0,52,78,0.0,57.0,26.2,2332.0,66.0,81.0,78.0,2024-03-18,2358.2
25091,532,63-23,63,23,5,5,68.0,0,6,26,0.0,68.0,71.6,1662.0,36.0,38.0,26.0,2024-03-18,1733.6
25113,554,63-124,63,124,46,46,23.0,0,82,0,0.0,23.0,84.0,1611.0,72.0,81.0,77.0,2024-03-18,1695.0
24692,133,60-125,60,125,27,27,31.0,0,0,62,0.0,31.0,72.0,1544.0,72.0,71.0,62.0,2024-03-18,1616.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24923,364,61-249,61,249,2,2,0.0,0,0,0,0.0,0.0,5.0,7.0,0.0,1.0,1.0,2024-03-18,12.0
24701,142,60-166,60,166,0,0,0.0,0,0,1,0.0,0.0,6.8,5.0,0.0,1.0,1.0,2024-03-18,11.8
25088,529,62-296,62,296,1,1,0.0,0,1,0,0.0,0.0,5.8,6.0,0.0,1.0,1.0,2024-03-18,11.8
24823,264,61-101,61,101,1,1,0.0,0,0,1,0.0,0.0,3.0,7.0,0.0,1.0,1.0,2024-03-18,10.0


In [24]:
last_update_backtest_costs.head(20)

Unnamed: 0.1,Unnamed: 0,unique_id,Store,Product,Start Inventory,Sales,Missed Sales,End Inventory,In Transit W+1,In Transit W+2,Holding Cost,Shortage Cost,Cumulative Holding Cost,Cumulative Shortage Cost,fcst_w1,fcst_w2,fcst_w3,week0_update,Cumulative Holding+Shortage Cost
24760,201,61-23,61,23,81,68,0.0,13,0,28,2.6,0.0,92.4,2459.0,62.0,91.0,80.0,2024-03-18,2551.4
24836,277,61-124,61,124,2,2,57.0,0,52,78,0.0,57.0,26.2,2332.0,66.0,81.0,78.0,2024-03-18,2358.2
25091,532,63-23,63,23,5,5,68.0,0,6,26,0.0,68.0,71.6,1662.0,36.0,38.0,26.0,2024-03-18,1733.6
25113,554,63-124,63,124,46,46,23.0,0,82,0,0.0,23.0,84.0,1611.0,72.0,81.0,77.0,2024-03-18,1695.0
24692,133,60-125,60,125,27,27,31.0,0,0,62,0.0,31.0,72.0,1544.0,72.0,71.0,62.0,2024-03-18,1616.0
24653,94,60-23,60,23,49,30,0.0,19,32,0,3.8,0.0,101.8,1498.0,46.0,35.0,50.0,2024-03-18,1599.8
24977,418,62-23,62,23,44,12,0.0,32,42,0,6.4,0.0,103.0,1469.0,40.0,43.0,52.0,2024-03-18,1572.0
25145,586,64-17,64,17,43,38,0.0,5,35,0,1.0,0.0,41.4,1043.0,56.0,47.0,58.0,2024-03-18,1084.4
25148,589,64-23,64,23,22,21,0.0,1,17,11,0.2,0.0,125.8,800.0,26.0,33.0,39.0,2024-03-18,925.8
24781,222,61-48,61,48,12,12,11.0,0,22,0,0.0,11.0,24.6,665.0,22.0,19.0,20.0,2024-03-18,689.6


### 3. Revisar todas las ejecuciones para una serie en particular

In [26]:
# param - serie a filtrar
unique_id_filter = "61-23"

In [29]:
# filtrar por la serie de interés
next_data_state_backtest[next_data_state_backtest["unique_id"] == unique_id_filter]

Unnamed: 0.1,Unnamed: 0,unique_id,Store,Product,Start Inventory,Sales,Missed Sales,End Inventory,In Transit W+1,In Transit W+2,Holding Cost,Shortage Cost,Cumulative Holding Cost,Cumulative Shortage Cost,fcst_w1,fcst_w2,fcst_w3,week0_update
201,201,61-23,61,23,0,0,104.0,0,0,24,0.0,104.0,0.0,104.0,10.0,28.0,24.0,2023-06-05
800,201,61-23,61,23,0,0,25.0,0,24,66,0.0,25.0,0.0,129.0,82.0,59.0,66.0,2023-06-12
1399,201,61-23,61,23,24,24,0.0,0,66,0,0.0,0.0,0.0,129.0,36.0,33.0,50.0,2023-06-19
1998,201,61-23,61,23,66,12,0.0,54,0,0,10.8,0.0,10.8,129.0,42.0,36.0,39.0,2023-06-26
2597,201,61-23,61,23,54,29,0.0,25,0,18,5.0,0.0,15.8,129.0,44.0,52.0,64.0,2023-07-03
3196,201,61-23,61,23,25,17,0.0,8,18,59,1.6,0.0,17.4,129.0,45.0,54.0,73.0,2023-07-10
3795,201,61-23,61,23,26,26,34.0,0,59,37,0.0,34.0,17.4,163.0,48.0,76.0,72.0,2023-07-17
4394,201,61-23,61,23,59,59,42.0,0,37,4,0.0,42.0,17.4,205.0,70.0,71.0,88.0,2023-07-24
4993,201,61-23,61,23,37,37,41.0,0,4,78,0.0,41.0,17.4,246.0,86.0,85.0,78.0,2023-07-31
5592,201,61-23,61,23,4,4,93.0,0,78,58,0.0,93.0,17.4,339.0,72.0,85.0,59.0,2023-08-07
