In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import os
import sys

sys.path.insert(0, '../')

import src.utils as ut
import src.config as cf

In [None]:
os.environ['HTTP_PROXY'] = "http://proxy-internet-aws-eu.subsidia.org:3128"
os.environ['HTTPS_PROXY'] = "http://proxy-internet-aws-eu.subsidia.org:3128"
os.environ['no_proxy'] = "169.254.169.254,127.0.0.1"

In [None]:
RUN_ENV = "dev"

config_file = "../conf/prod.yml" if RUN_ENV=="prod" else "../conf/dev.yml"
config = cf.ProgramConfiguration(config_file, "../conf/functional.yml")

## Load active sales

In [None]:
active_sales = pd.read_parquet('s3://' + config.get_train_bucket_input() + '/' + 'global/active_sales/')

## Load results

In [None]:
algo_1 = 'Facebook_Prophet'

res_files_1 = np.sort([int(re.findall('\d+', f)[0]) \
                        for f in cutoff_files if f.startswith('Facebook_Prophet_cutoff_') and f[-1].isdigit()])

In [None]:
res_files_1

In [None]:
algo_1 = 'Facebook_Prophet_cutoff_'
algo_2 = 'APO_Global_Demand'

res_files = ut.get_files_list(cf.bucket, cf.s3_path_models_results)
res_files_1 = np.array([f for f in res_files if algo_1 in f])
res_files_2 = np.array([f for f in res_files if algo_2 in f])

res_1 = []
res_2 = []

for f in res_files_1:
    res_1.append(ut.read_csv_S3(cf.bucket, cf.s3_path_models_results + f,  
                      parse_dates=['cutoff_date', 'date']))

for f in res_files_2:
    res_2.append(ut.read_csv_S3(cf.bucket, cf.s3_path_models_results + f,  
                      parse_dates=['cutoff_date', 'date']))
    
res_1 = pd.concat(res_1)
res_2 = pd.concat(res_2)

In [None]:
## Here from external CSV file
#res = pd.read_csv("Facebook_Prophet_cutoff_201922.csv", sep="|", parse_dates=['date'])
#res['cutoff_week_id'] = 201922
#res['cutoff_date'] = ut.week_id_to_date(res['cutoff_week_id'])
#res.head(1)

### Calculate WAPE

In [None]:
error_1 = pd.merge(res_1, active_sales, how="inner")
error_1["forecast_step"] = ((error_1["date"] - error_1["cutoff_date"]) / np.timedelta64(1, 'W')).astype(int) + 1
error_1["ae"] = np.abs(error_1["yhat"] - error_1["y"])

In [None]:
error_2 = pd.merge(res_2, active_sales, how="inner")
error_2["forecast_step"] = ((error_2["date"] - error_2["cutoff_date"]) / np.timedelta64(1, 'W')).astype(int) + 1
error_2["ae"] = np.abs(error_2["yhat"] - error_2["y"])

In [None]:
# QLIK FILTERS : 

# complete cutoff
error_1 = error_1[error_1.cutoff_week_id <= 201947]
error_2 = error_2[error_2.cutoff_week_id <= 201947]

# models forecasted by algo 1
error_2 = pd.merge(error_2, 
                   error_1[['model', 'cutoff_week_id']].drop_duplicates(), 
                   how="inner")

In [None]:
wape_1 = np.round(error_1["ae"].sum() / error_1["y"].sum() * 100, 3)
print("Global WAPE ", str(wape_1))
print("Nb products ", str(error_1.model.unique().shape[0]))

In [None]:
wape_2 = np.round(error_2["ae"].sum() / error_2["y"].sum() * 100, 3)
print("Global WAPE ", str(wape_2))
print("Nb products ", str(error_2.model.unique().shape[0]))

In [None]:
for c in np.sort(error_1["cutoff_week_id"].unique()):
    error_1_c = error_1[error_1["cutoff_week_id"] == c]
    wape = error_1_c["ae"].sum() / error_1_c["y"].sum()
    
    print("Cutoff", str(c), ":")
    print("Nb products ", str(error_1_c.model.unique().shape[0]))
    print("WAPE:     ", str(wape))
    print("\n-------------------------\n")

In [None]:
for c in np.sort(error_2["cutoff_week_id"].unique()):
    error_2_c = error_2[error_2["cutoff_week_id"] == c]
    wape = error_2_c["ae"].sum() / error_2_c["y"].sum()
    
    print("Cutoff", str(c), ":")
    print("Nb products ", str(error_2_c.model.unique().shape[0]))
    print("WAPE:     ", str(wape))
    print("\n-------------------------\n")

In [None]:
#for s in np.sort(error["forecast_step"].unique()):
#    error_s = error[error["forecast_step"] == s]
#    wape = error_s["ae"].sum() / error_s["y"].sum()
#    
#    print("Forecast Step", str(s), ":")
#    print("WAPE:     ", str(wape))
#    print("\n-------------------------\n")