#### WPE & WAPE Calculation

In [2]:
!pip install uritools

Collecting uritools
  Downloading uritools-3.0.2-py3-none-any.whl (12 kB)
Installing collected packages: uritools
Successfully installed uritools-3.0.2


In [3]:
import pandas as pd
import numpy as np
import boto3
import pyarrow.parquet as pq
import s3fs
import math
import yaml

import sys
sys.path.append('../../')
from src.utils import read_multipart_parquet_s3, write_df_to_parquet_on_s3,read_csv_s3

In [4]:
with open('monitor_whitelist.yaml') as config_file:
    monitor_wl = yaml.full_load(config_file)

##### Fetch data

In [5]:
df_tree = read_multipart_parquet_s3('fcst-workspace/forecast-cn/fcst-refined-demand-forecast-dev',
                                    'global/model_week_tree', prefix_filename='part-')
df_tree = df_tree[(df_tree.week_id == 202214)]

In [6]:
df_mrp = read_multipart_parquet_s3('fcst-workspace/forecast-cn/fcst-refined-demand-forecast-dev',
                                    'global/model_week_mrp', prefix_filename='part-')
df_mrp = df_mrp[(df_mrp.week_id == 202214)][['model_id','is_mrp_active']]

In [7]:
fcst_bi_static_feat = pd.read_csv('s3://fcst-data-exchange-dev/demand-forecast/cn/dashboard/tableau/total_cutoff/fcst_bi_static_feat_w.csv')

In [8]:
fcst_bi_static_feat

Unnamed: 0,model_id,wpe_reach_target,wape_reach_target,univers_label,department_label,family_label,rs_email,AW2021_CodeA,tier,model_label,ec_tag,selling_length,aw_seasonality,annual_seasonality,wpe_cutoff_ok_pct,wape_cutoff_ok_pct,update_time
0,720719,False,False,HIKING TREKKING,HIKING TREK GEA,3174_MT BP ACC SECU,jerry.xiao@decathlon.com,False,Tier_1,720719_2 STRAPS BACKPACK 25MM*1M,No,>1year,Normal,Normal,0.2063,0.3509,04/06/2022-14:01:18
1,863879,False,False,HIKING TREKKING,HIKING TREK GEA,1165_HIKE BINOCULARS,yingzhong.huo@decathlon.com,False,Tier_3,863879_MH M 120 X8 GREEN,No,>1year,Seasonal,Seasonal,0.2500,0.3500,04/06/2022-14:01:18
2,863881,False,False,HIKING TREKKING,HIKING TREK GEA,1165_HIKE BINOCULARS,yingzhong.huo@decathlon.com,False,Tier_1,863881_MH M 120 X8 ORANGE,No,>1year,Normal,Normal,0.3469,0.4318,04/06/2022-14:01:18
3,863883,False,False,HIKING TREKKING,HIKING TREK GEA,1165_HIKE BINOCULARS,yingzhong.huo@decathlon.com,False,Tier_3,863883_MH M 120 X8 BLUE,No,>1year,Normal,Seasonal,0.2167,0.2909,04/06/2022-14:01:18
4,863896,False,False,FITNESS WELLNES,FITNESS TRAININ,11956_WOMAN TS LEGG,sibyl.wang@decathlon.com,False,Tier_3,863896_SPORTEE 100 TEST TAILLANT CHINE PINK,No,>1year,Normal,Seasonal,0.3175,0.3684,04/06/2022-14:01:18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10907,8773706,False,False,FITNESS WELLNES,FITNESS TRAININ,214_WEIGHTS&ELAST.,darren.zhen@decathlon.com,False,Tier_4,8773706_CN MINI BAND 20LB BLUE,No,<20W,,Normal,0.0000,,04/06/2022-14:01:18
10908,8781243,False,False,FITNESS WELLNES,YOGA,10786_WOMAN YOGA,sibyl.wang@decathlon.com,False,Tier_4,8781243_BRA 520 BLACK,No,<20W,,Normal,0.1429,0.0000,04/06/2022-14:01:18
10909,8781245,False,False,FITNESS WELLNES,YOGA,10786_WOMAN YOGA,sibyl.wang@decathlon.com,False,Tier_4,8781245_BRA 520 RED,No,<20W,,Normal,0.0000,,04/06/2022-14:01:18
10910,8781247,False,False,FITNESS WELLNES,YOGA,10786_WOMAN YOGA,sibyl.wang@decathlon.com,False,Tier_4,8781247_BRA 520 BLUE,No,<20W,,Normal,0.0000,,04/06/2022-14:01:18


In [9]:
fcst_bi_static_feat = fcst_bi_static_feat.merge(df_tree[['model_id','family_id']],on='model_id')
fcst_bi_static_feat = fcst_bi_static_feat[(fcst_bi_static_feat.family_id.isin(monitor_wl['pre_selection_family']))]

In [10]:
fcst_bi_static_feat = fcst_bi_static_feat.merge(df_mrp,on='model_id',how='left')

In [11]:
fcst_bi_static_feat[['model_id', 'wpe_reach_target', 'wape_reach_target',
       'univers_label', 'department_label', 'family_label', 'rs_email',
       'AW2021_CodeA', 'tier', 'model_label', 'ec_tag', 'selling_length',
       'aw_seasonality', 'annual_seasonality', 'wpe_cutoff_ok_pct',
       'wape_cutoff_ok_pct','is_mrp_active', 'update_time']].to_csv('fcst_bi_static_feat_w.csv',index=False)

In [12]:
fcst_bi_dynamic_feat = pd.read_csv('s3://fcst-data-exchange-dev/demand-forecast/cn/dashboard/tableau/total_cutoff/fcst_bi_dynamic_feat.csv')

In [13]:
fcst_bi_dynamic_feat.head(2)

Unnamed: 0,model_id,week_id,weekly_average_price,num_store_following,update_time
0,720719,202101,21.059167,12,2022-04-07T03:11:36.860Z
1,765283,202101,20.462963,54,2022-04-07T03:11:36.860Z


In [64]:
fcst_bi_dynamic_feat = pd.DataFrame()
for cutoff in list(range(202101,202153)) + list(range(202201,202214)):
    print(cutoff)
    df = pd.read_csv(f"s3://fcst-data-exchange-dev/demand-forecast/cn/dashboard/tableau/postgres/fcst_bi_dynamic_feat/fcst_bi_dynamic_feat_{cutoff}.csv")
    fcst_bi_dynamic_feat = fcst_bi_dynamic_feat.append(df)

202101
202102
202103
202104
202105
202106
202107
202108
202109
202110
202111
202112
202113
202114
202115
202116
202117
202118
202119
202120
202121
202122
202123
202124
202125
202126
202127
202128
202129
202130
202131
202132
202133
202134
202135
202136
202137
202138
202139
202140
202141
202142
202143
202144
202145
202146
202147
202148
202149
202150
202151
202152
202201
202202
202203
202204
202205
202206
202207
202208
202209
202210
202211
202212
202213


In [14]:
fcst_bi_dynamic_feat = fcst_bi_dynamic_feat.merge(df_tree[['model_id','family_id']],on='model_id')
fcst_bi_dynamic_feat = fcst_bi_dynamic_feat[(fcst_bi_dynamic_feat.family_id.isin(monitor_wl['pre_selection_family']))]

In [15]:
fcst_bi_dynamic_feat[['model_id','week_id','weekly_average_price','num_store_following']]\
.to_csv('fcst_bi_dynamic_feat.csv',index=False)

In [16]:
fcst_bi_forecast_result = pd.read_csv('s3://fcst-data-exchange-dev/demand-forecast/cn/dashboard/tableau/total_cutoff/fcst_bi_forecast_result.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [59]:
fcst_bi_forecast_result = pd.DataFrame()
for cutoff in list(range(202101,202153)) + list(range(202201,202215)):
    print(cutoff)
    df = pd.read_csv(f"s3://fcst-data-exchange-dev/demand-forecast/cn/dashboard/tableau/postgres/fcst_bi_forecast_result/fcst_bi_forecast_result_{cutoff}.csv")
    fcst_bi_forecast_result = fcst_bi_forecast_result.append(df)

202101
202102
202103
202104
202105
202106
202107
202108
202109
202110
202111
202112
202113
202114
202115
202116
202117
202118
202119


  interactivity=interactivity, compiler=compiler, result=result)


202120
202121
202122
202123
202124
202125
202126
202127
202128
202129
202130
202131
202132
202133
202134
202135
202136
202137
202138
202139
202140
202141
202142
202143
202144
202145
202146
202147
202148
202149
202150
202151
202152
202201
202202
202203
202204
202205
202206
202207
202208
202209
202210
202211
202212
202213
202214


In [17]:
fcst_bi_forecast_result[(fcst_bi_forecast_result.algorithm == 'realized_sales') \
                        & (fcst_bi_forecast_result.qty_tag == 'sales_w')].quantity.sum()

#51537265/152819899

165043649

In [18]:
fcst_bi_forecast_result = fcst_bi_forecast_result.merge(df_tree[['model_id','family_id']],on='model_id')
fcst_bi_forecast_result = fcst_bi_forecast_result[(fcst_bi_forecast_result.family_id.isin(monitor_wl['pre_selection_family']))]

In [19]:
fcst_bi_forecast_result[['model_id','week_id','algorithm','qty_tag','quantity']]\
.to_csv('fcst_bi_forecast_result.csv',index=False)

In [20]:
fcst_bi_forecast_result.groupby(by='algorithm',as_index=False).agg({"week_id":"max"})

Unnamed: 0,algorithm,week_id
0,AI,202224
1,APO_GD,202218
2,APO_STAT,202218
3,realized_sales,202214


In [26]:
fcst_bi_forecast_result.shape

(2539572, 7)

In [23]:
fcst_bi_forecast_performance = pd.read_csv('s3://fcst-data-exchange-dev/demand-forecast/cn/dashboard/tableau/total_cutoff/fcst_bi_forecast_performance_a.csv')

In [29]:
fcst_bi_forecast_performance.cut_off.max()

202211

In [22]:
fcst_bi_forecast_performance = pd.DataFrame()
for cutoff in list(range(202101,202153)) + list(range(202201,202211)):
    print(cutoff)
    df = pd.read_csv(f"s3://fcst-data-exchange-dev/demand-forecast/cn/dashboard/tableau/postgres/fcst_bi_forecast_performance_a/fcst_bi_forecast_performance_a_{cutoff}.csv")
    fcst_bi_forecast_performance = fcst_bi_forecast_performance.append(df)

202101
202102
202103
202104
202105
202106
202107
202108
202109
202110
202111
202112
202113
202114
202115
202116
202117
202118
202119
202120
202121
202122
202123
202124
202125
202126
202127
202128
202129
202130
202131
202132
202133
202134
202135


ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.

In [24]:
fcst_bi_forecast_performance = fcst_bi_forecast_performance.merge(df_tree[['model_id','family_id']],on='model_id')
fcst_bi_forecast_performance = fcst_bi_forecast_performance[(fcst_bi_forecast_performance.family_id.isin(monitor_wl['pre_selection_family']))]

In [25]:
fcst_bi_forecast_performance.to_csv('fcst_bi_forecast_performance_a.csv',index=False)

In [20]:
fcst_bi_static_feat[['model_id']].drop_duplicates().to_csv('model_id.csv',index=False)

#### Check

In [6]:
df_sales = read_multipart_parquet_s3('fcst-workspace/forecast-cn/fcst-refined-demand-forecast-dev',
                                    'global/model_week_sales', prefix_filename='part-')

In [9]:
df_sales[(df_sales.model_id == 8554046)].groupby(by='week_id').agg({"sales_quantity":"sum"})

Unnamed: 0_level_0,sales_quantity
week_id,Unnamed: 1_level_1
201952,144
202001,434
202002,477
202003,554
202004,552
...,...
202208,503
202209,396
202210,409
202211,437
