#### WPE & WAPE Calculation

In [1]:
!pip install uritools



In [2]:
import pandas as pd
import numpy as np
import boto3
import pyarrow.parquet as pq
import s3fs
import math
import yaml

import sys
sys.path.append('../../')
from src.utils import read_multipart_parquet_s3, write_df_to_parquet_on_s3,read_csv_s3

In [3]:
with open('monitor_whitelist.yaml') as config_file:
    monitor_wl = yaml.full_load(config_file)

##### Fetch data

In [4]:
df_tree = read_multipart_parquet_s3('fcst-workspace/forecast-cn/fcst-refined-demand-forecast-dev',
                                    'global/model_week_tree', prefix_filename='part-')
df_tree = df_tree[(df_tree.week_id == 202213)]

In [5]:
df_mrp = read_multipart_parquet_s3('fcst-workspace/forecast-cn/fcst-refined-demand-forecast-dev',
                                    'global/model_week_mrp', prefix_filename='part-')
df_mrp = df_mrp[(df_mrp.week_id == 202213)][['model_id','is_mrp_active']]

In [5]:
fcst_bi_static_feat = read_csv_s3('fcst-workspace', 
                                   'forecast-cn/fcst-data-exchange-dev/dashboard/tableau/total_cutoff/fcst_bi_static_feat_w.csv')

In [6]:
fcst_bi_static_feat[fcst_bi_static_feat.model_id == 8554046]

Unnamed: 0,model_id,wpe_reach_target,wape_reach_target,univers_label,department_label,family_label,rs_email,AW2021_CodeA,tier,model_label,ec_tag,selling_length,aw_seasonality,annual_seasonality,wpe_cutoff_ok_pct,wape_cutoff_ok_pct,update_time
4757,8554046,False,False,RUNNING WALKING,RUNNING TRAIL A,224_ADULT RUN SOCKS,april.wang@decathlon.com,False,Tier_4,8554046_5 FINGERS SOCK BLACK YELLOW,No,<20W,Normal,Seasonal,0.0,0.0,03/28/2022-05:20:49


In [41]:
fcst_bi_static_feat = fcst_bi_static_feat.merge(df_tree[['model_id','family_id']],on='model_id')
fcst_bi_static_feat = fcst_bi_static_feat[(fcst_bi_static_feat.family_id.isin(monitor_wl['pre_selection_family']))]

In [42]:
fcst_bi_static_feat = fcst_bi_static_feat.merge(df_mrp,on='model_id',how='left')

In [43]:
fcst_bi_static_feat[['model_id', 'wpe_reach_target', 'wape_reach_target',
       'univers_label', 'department_label', 'family_label', 'rs_email',
       'AW2021_CodeA', 'tier', 'model_label', 'ec_tag', 'selling_length',
       'aw_seasonality', 'annual_seasonality', 'wpe_cutoff_ok_pct',
       'wape_cutoff_ok_pct','is_mrp_active', 'update_time']].to_csv('fcst_bi_static_feat_w.csv',index=False)

In [44]:
fcst_bi_dynamic_feat = read_csv_s3('fcst-workspace', 
                                   'forecast-cn/fcst-data-exchange-dev/dashboard/tableau/total_cutoff/fcst_bi_dynamic_feat.csv')

In [45]:
fcst_bi_dynamic_feat = fcst_bi_dynamic_feat.merge(df_tree[['model_id','family_id']],on='model_id')
fcst_bi_dynamic_feat = fcst_bi_dynamic_feat[(fcst_bi_dynamic_feat.family_id.isin(monitor_wl['pre_selection_family']))]

In [46]:
fcst_bi_dynamic_feat[['model_id','week_id','weekly_average_price','num_store_following']]\
.to_csv('fcst_bi_dynamic_feat.csv',index=False)

In [47]:
fcst_bi_forecast_result = read_csv_s3('fcst-workspace', 
                                      'forecast-cn/fcst-data-exchange-dev/dashboard/tableau/total_cutoff/fcst_bi_forecast_result.csv')

In [48]:
fcst_bi_forecast_result[(fcst_bi_forecast_result.algorithm == 'realized_sales') \
                        & (fcst_bi_forecast_result.qty_tag == 'sales_w')].quantity.sum()

#51537265/152819899

160994956

In [49]:
fcst_bi_forecast_result = fcst_bi_forecast_result.merge(df_tree[['model_id','family_id']],on='model_id')
fcst_bi_forecast_result = fcst_bi_forecast_result[(fcst_bi_forecast_result.family_id.isin(monitor_wl['pre_selection_family']))]

In [50]:
fcst_bi_forecast_result[['model_id','week_id','algorithm','qty_tag','quantity']]\
.to_csv('fcst_bi_forecast_result.csv',index=False)

In [51]:
fcst_bi_forecast_result.groupby(by='algorithm',as_index=False).agg({"week_id":"max"})

Unnamed: 0,algorithm,week_id
0,AI,202222
1,APO_GD,202218
2,APO_STAT,202218
3,realized_sales,202212


In [52]:
fcst_bi_forecast_performance = read_csv_s3('fcst-workspace', 
                                           'forecast-cn/fcst-data-exchange-dev/dashboard/tableau/total_cutoff/fcst_bi_forecast_performance_a.csv')

In [53]:
fcst_bi_forecast_performance = fcst_bi_forecast_performance.merge(df_tree[['model_id','family_id']],on='model_id')
fcst_bi_forecast_performance = fcst_bi_forecast_performance[(fcst_bi_forecast_performance.family_id.isin(monitor_wl['pre_selection_family']))]

In [None]:
fcst_bi_forecast_performance

In [54]:
fcst_bi_forecast_performance.to_csv('fcst_bi_forecast_performance_a.csv',index=False)

In [55]:
fcst_bi_forecast_performance[['model_id']].drop_duplicates().to_csv('model_id.csv',index=False)

In [56]:
fcst_bi_static_feat[['model_id']].drop_duplicates().to_csv('model_id.csv',index=False)

#### Check

In [6]:
df_sales = read_multipart_parquet_s3('fcst-workspace/forecast-cn/fcst-refined-demand-forecast-dev',
                                    'global/model_week_sales', prefix_filename='part-')

In [9]:
df_sales[(df_sales.model_id == 8554046)].groupby(by='week_id').agg({"sales_quantity":"sum"})

Unnamed: 0_level_0,sales_quantity
week_id,Unnamed: 1_level_1
201952,144
202001,434
202002,477
202003,554
202004,552
...,...
202208,503
202209,396
202210,409
202211,437
