#### WPE & WAPE Calculation

In [1]:
!pip install uritools

Collecting uritools
  Downloading uritools-3.0.2-py3-none-any.whl (12 kB)
Installing collected packages: uritools
Successfully installed uritools-3.0.2


In [1]:
import pandas as pd
import numpy as np
import boto3
import pyarrow.parquet as pq
import s3fs
import math
import yaml

import sys
sys.path.append('../../')
from src.utils import read_multipart_parquet_s3, write_df_to_parquet_on_s3

##### Fetch data
- Fetch realized sales

In [2]:
realized_sales = read_multipart_parquet_s3('fcst-workspace/forecast-cn/fcst-refined-demand-forecast-dev',
                                       'global/model_week_sales', prefix_filename='part-')
realized_sales = realized_sales.groupby(by=['model_id','week_id','date'],as_index=False).agg({'sales_quantity':'sum'})

- Fetch AI forecast

In [3]:
def keys(bucket_name, prefix='/', delimiter='/'):
    prefix = prefix[1:] if prefix.startswith(delimiter) else prefix
    bucket = boto3.resource('s3').Bucket(bucket_name)
    return (_.key for _ in bucket.objects.filter(Prefix=prefix))

def get_result_path(run_name,bucket_name,cut_off):
    file = []
    cutoff_l = []
    for cutoff in cut_off:
        for i in keys(bucket_name,
                      prefix=f'forecast-cn/fcst-refined-demand-forecast-dev/specific/{run_name}/{run_name}-deepar-hw-{cutoff}/output/',
                      delimiter='/'):
            file.append(i)
            cutoff_l.append(cutoff)
    file_df = pd.DataFrame({'cut_off':cutoff_l,'path':file})
    file_df = file_df[[i.endswith('parquet.out') for i in file_df.path]]
    return file_df

def get_week_id(str_,step):
    str_ = str(str_)
    year = int(str_[:4])
    week = int(str_[4:])
    step = step - 1
    if week + step <= 52:
        if week+step < 10:
            return int(str(year) + '0' +str(week + step))
        else:
            return int(str(year) + str(week + step))
    else:
        round_ = math.floor((week + step) / 52)
        year = year + round_
        week = week + step - 52 * round_
        if week < 10:
            return int(str(year) + '0' + str(week))
        else:
            return int(str(year) + str(week))

In [55]:
# To get all the path of model result
run_name = 'piloted'
ai_cut_off = [i for i in range(202025,202034)]
bucket_name = 'fcst-workspace'
file = get_result_path(run_name,bucket_name,ai_cut_off)

In [97]:
ai_forecast = pd.DataFrame()
for w,f in file[['cut_off','path']].values:
    try:
        forecast_df = read_multipart_parquet_s3('fcst-workspace',f, prefix_filename='part-')
        forecast_df['cut_off'] = int(w)
    except:
        forecast_df = pd.DataFrame()
    ai_forecast = ai_forecast.append(forecast_df)
    
ai_forecast = ai_forecast[(ai_forecast.forecast_step <= 104)].groupby(by=['model_id','forecast_step','cut_off'],
                                                  as_index=False).agg({'forecast':'sum'})
ai_forecast['target'] = [get_week_id(x,y) for x,y in zip(ai_forecast['cut_off'], ai_forecast['forecast_step'])]

In [98]:
ai_forecast.info(2)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6103448 entries, 0 to 6103447
Data columns (total 5 columns):
 #   Column         Dtype
---  ------         -----
 0   model_id       int64
 1   forecast_step  int64
 2   cut_off        int64
 3   forecast       int64
 4   target         int64
dtypes: int64(5)
memory usage: 279.4 MB


In [71]:
#ai_forecast = ai_forecast[ai_forecast.target < 202147]

In [139]:
apo_gd = pd.read_csv('apo_gd.csv')

In [140]:
# transform the schema of apo global demand data
apo_cutoff = apo_gd['Weeks of snapshot'].unique()
apo_period = [int(i) for i in apo_gd.columns if i.startswith('2')]

cut_w, tar_w = pd.core.reshape.util.cartesian_product([apo_cutoff, apo_period])
w = pd.DataFrame({'cut_off': cut_w, 'target': tar_w})
w = w[(w.target - w.cut_off >= 0)]

def get_value(data,x,y,col):
    df = data[(data['Weeks of snapshot'] == x)][[str(y),'model_id']]
    df[['cut_off','target']] = x,y
    df.columns = [col,'model_id','cut_off','target']
    return df[['cut_off','target','model_id',col]]

fore = pd.DataFrame()
for x, y in w.values:
    tmp_fore = get_value(apo_gd,x,y,'apo_gd')
    fore = fore.append(tmp_fore)

In [141]:
fore = fore[fore.target < 202147]

- Fetch price

In [31]:
# fetch price to calculate turnover
price = read_multipart_parquet_s3('fcst-workspace/forecast-cn/fcst-refined-demand-forecast-dev',
                                  'global/model_week_price', prefix_filename='part-')
price = price[price.week_id >= min(ai_cut_off)].groupby(by=['model_id','week_id','date'],as_index=False).agg({'average_price':'mean'})

In [32]:
df_price = price.groupby(by='model_id',as_index=False).agg(initial_price =('average_price','mean'))

In [33]:
df_price.info(2)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 21882 entries, 0 to 21881
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   model_id       21882 non-null  int64  
 1   initial_price  21882 non-null  float64
dtypes: float64(1), int64(1)
memory usage: 512.9 KB


- Selling length

In [34]:
start_selling_week = realized_sales.groupby(by='model_id',as_index=False).agg(start_week=('week_id','min'),
                                                                              last_week=('week_id','max')
                                                                             )

#### Generate WPE and WAPE

In [35]:
realized_sales = realized_sales.rename(columns={'week_id':'target'})

In [142]:
base = ai_forecast[['model_id','cut_off','target','forecast','forecast_step']]\
.merge(realized_sales[['model_id','target','sales_quantity']],on=['model_id','target'],how='left')\
.merge(df_price,on='model_id',how='left')\
.merge(start_selling_week,on='model_id',how='left')\

In [143]:
# remove model_id with no price, which means there was no sales for this model since 2021W1 but the MRP status is active 
base = base[(base.initial_price.isna() == False) & (base.last_week >= 202147)]

In [144]:
base = base[(base.forecast_step >= 31) & (base.forecast_step <= 82)]
base = base.merge(fore, on=['model_id','target','cut_off'], how='left')

In [145]:
base.apo_gd = base.apo_gd.astype(float)

- WPE

In [108]:
def wpe_func(forecast_base,horizon,forecast):
    if forecast == 'apo_gd':
        cutoff = apo_cutoff
    else:
        cutoff = ai_cut_off
    recent_sales = forecast_base[forecast_base.sales_quantity.isna()==False].target.max()
    cutoff = [i for i in cutoff if get_week_id(i,horizon) <= recent_sales]
    wpe = forecast_base[(forecast_base.forecast_step <= horizon) \
                        & (forecast_base.cut_off.isin(cutoff))\
                        & (forecast_base.target <= recent_sales)]\
    .groupby(by=['cut_off','model_id'],as_index=False).agg({'sales_quantity':'sum',
                                                            forecast:'sum',
                                                            'initial_price':'mean'})
    wpe['gap_qty'] = abs(wpe['sales_quantity'] - wpe[forecast])
    wpe['gap_to'] = (wpe['gap_qty'] * wpe['initial_price']).astype(int)
    wpe['wpe_{}W_qty'.format(horizon)] = wpe['gap_qty']/wpe['sales_quantity'] 
    wpe['total_to'] = (wpe['sales_quantity'] * wpe['initial_price']).astype(int)
    wpe = wpe.fillna(0)    
    wpe_all = wpe.groupby(by='cut_off',as_index=False).agg({'gap_qty':'sum','sales_quantity':'sum',
                                                            'gap_to':'sum','total_to':'sum'})
    wpe_all['wpe_{}W_qty'.format(horizon)] = wpe_all['gap_qty']/wpe_all['sales_quantity']
    wpe['wpe_{}W_to'.format(horizon)] = wpe['gap_to']/wpe['total_to']
  
    return wpe, wpe_all


def wpe_all(data,horizon):
    data_ = data.groupby(by=['model_id'],as_index=False).agg({'gap_qty':'sum','sales_quantity':'sum',
                                                          'gap_to':'sum','total_to':'sum'})

    data_['wpe_{}W_qty'.format(horizon)] = data_['gap_qty']/data_['sales_quantity']
    data_['wpe_{}W_to'.format(horizon)] = data_['gap_to']/data_['total_to']
    return data_

In [115]:
# Generate wpe 52W of AI
df_wpe_ai = wpe_func(base,52,'forecast')[0]
df_wpe_all_ai = wpe_all(df_wpe_ai,52)
df_wpe_all_ai.columns = ['model_id','gap_qty','sales_quantity','gap_to','total_to','wpe_52W_qty_ai','wpe_52W_to_ai']

In [131]:
# Generate wpe 52W of APO GD
df_wpe_ai = wpe_func(base[(base.apo_gd.isna()==False)],52,'forecast')[0]
df_wpe_all_ai = wpe_all(df_wpe_ai,52)
df_wpe_all_ai.columns = ['model_id','gap_qty','sales_quantity','gap_to','total_to','wpe_52W_qty_ai','wpe_52W_to_ai']

In [146]:
# Generate wpe 52W of APO GD
df_wpe_gd = wpe_func(base[(base.apo_gd.isna()==False)],52,'apo_gd')[0]
df_wpe_all_gd = wpe_all(df_wpe_gd,52)
df_wpe_all_gd.columns = ['model_id','gap_qty','sales_quantity','gap_to','total_to','wpe_52W_qty_gd','wpe_52W_to_gd']

In [147]:
def mul_wpe_all(data,horizon):
    data_ = data.groupby(by=['cut_off'],as_index=False).agg({'gap_qty':'sum','sales_quantity':'sum',
                                                          'gap_to':'sum','total_to':'sum'})

    data_['wpe_{}W_qty'.format(horizon)] = data_['gap_qty']/data_['sales_quantity']
    data_['wpe_{}W_to'.format(horizon)] = data_['gap_to']/data_['total_to']
    return data_
wpe_by_cutoff = mul_wpe_all(df_wpe_gd,52)

In [148]:
wpe_by_cutoff.to_csv('wpe_52.csv',index=False)

In [154]:
base[(base.apo_gd.isna()==False) & (base.cut_off == 202028)]

Unnamed: 0,model_id,cut_off,target,forecast,forecast_step,sales_quantity,initial_price,start_week,last_week,apo_gd
3,720719,202028,202106,78,31,47.0,21.013429,201601,202203,69.0
12,720719,202028,202107,82,32,48.0,21.013429,201601,202203,72.0
21,720719,202028,202108,77,33,49.0,21.013429,201601,202203,81.0
30,720719,202028,202109,97,34,39.0,21.013429,201601,202203,76.0
39,720719,202028,202110,77,35,68.0,21.013429,201601,202203,89.0
...,...,...,...,...,...,...,...,...,...,...
2715973,8640028,202028,202136,22,61,163.0,39.962872,202024,202203,133.0
2715982,8640028,202028,202137,24,62,93.0,39.962872,202024,202203,113.0
2715991,8640028,202028,202138,18,63,121.0,39.962872,202024,202203,97.0
2716000,8640028,202028,202139,15,64,101.0,39.962872,202024,202203,119.0


- WAPE

In [49]:
def wape_func(forecast_base,horizon,forecast):
    if forecast == 'apo_gd':
        cutoff = apo_cutoff
    else:
        cutoff = ai_cut_off
    recent_sales = forecast_base[forecast_base.sales_quantity.isna()==False].target.max()
    cutoff = [i for i in cutoff if get_week_id(i,horizon) <= recent_sales]
    wape = forecast_base[(forecast_base.forecast_step <= horizon)\
                         & (forecast_base.cut_off.isin(cutoff))]\
    .groupby(by=['cut_off','model_id','forecast_step'],as_index=False).agg({'sales_quantity':'sum',
                                                                            forecast:'sum','initial_price':'mean'})
    wape['gap_qty'] = abs(wape['sales_quantity'] - wape[forecast])
    wape['gap_to'] = (wape['gap_qty'] * wape['initial_price']).astype(int)
    wape['total_to'] = (wape['sales_quantity'] * wape['initial_price']).astype(int)

    wape['wape_{}W_qty'.format(horizon)] = wape['gap_qty']/wape['sales_quantity']
    wape['wape_{}W_to'.format(horizon)] = wape['gap_to']/wape['total_to']
    
    wape = wape.fillna(0)
    
    wape_all = wape.groupby(by=['model_id','cut_off'],as_index=False).agg({'gap_qty':'sum','sales_quantity':'sum',
                                                                           'gap_to':'sum','total_to':'sum'})
    wape_all['wape_{}W_qty'.format(horizon)] = wape_all['gap_qty']/wape_all['sales_quantity']
    wape_all['wape_{}W_to'.format(horizon)] = wape_all['gap_to']/wape_all['total_to']
    
    return wape, wape_all

def wape_all(data,horizon):
    data_ = data.groupby(by=['model_id'],as_index=False).agg({'gap_qty':'sum','sales_quantity':'sum',
                                                              'gap_to':'sum','total_to':'sum'})

    data_['wape_{}W_qty'.format(horizon)] = data_['gap_qty']/data_['sales_quantity']
    data_['wape_{}W_to'.format(horizon)] = data_['gap_to']/data_['total_to']
    return data_

In [84]:
# Generate wape 10W of AI
df_wape_ai = wape_func(base,52,'forecast')[0]
df_wape_all_ai = wape_all(df_wape_ai,52)
df_wape_all_ai.columns = ['model_id','gap_qty','sales_quantity','gap_to','total_to','wape_10W_qty_ai','wape_10W_to_ai']

In [90]:
def mul_wape_all(data,horizon):
    data_ = data.groupby(by=['cut_off'],as_index=False).agg({'gap_qty':'sum','sales_quantity':'sum',
                                                              'gap_to':'sum','total_to':'sum'})

    data_['wape_{}W_qty'.format(horizon)] = data_['gap_qty']/data_['sales_quantity']
    data_['wape_{}W_to'.format(horizon)] = data_['gap_to']/data_['total_to']
    return data_
wape_by_cutoff = mul_wape_all(wape_func(base,52,'forecast')[1],52)

In [92]:
wape_by_cutoff.to_csv('wape_52.csv',index=False)

In [97]:
realized_sales[realized_sales.model_id == 164123]

Unnamed: 0,model_id,target,date,sales_quantity
