In [None]:
%load_ext autoreload
%autoreload 2

import json
import s3fs
import boto3
import sagemaker
import numpy as np
import pandas as pd
import os
import sys

sys.path.insert(0,'..')

import src.config as cf
import src.utils as ut

os.environ['HTTP_PROXY'] = "http://proxy-internet-aws-eu.subsidia.org:3128"
os.environ['HTTPS_PROXY'] = "http://proxy-internet-aws-eu.subsidia.org:3128"
os.environ['no_proxy'] = "169.254.169.254,127.0.0.1"

# Params

In [None]:
RUN_ENV = "dev"
freq = 'W'
prediction_length = 16
cutoff_week_id = 201922

In [None]:
config_file = "../conf/prod.yml" if RUN_ENV == "prod" else "../conf/dev.yml"
config = cf.ProgramConfiguration(config_file, "../conf/functional.yml")

sagemaker_session = sagemaker.Session()
role = config.get_global_role_arn()

bucket = config.get_train_bucket_input()
prefix = 'test_deepAR' # Your S3 test dir

s3_data_path = "{}/{}/data".format(bucket, prefix)
s3_output_path = "{}/{}/output".format(bucket, prefix)

model_name = "APO-{}-DEEPAR".format(cutoff_week_id)

# Forecast with Batch Transform 

In [None]:
transform_job_name = "FORECASTING-{}".format(model_name)
output_path = "s3://{}/result/cutoff_{}".format(s3_output_path, cutoff_week_id)

transformer = sagemaker.transformer.Transformer(
    model_name=model_name,
    instance_count=1,
    instance_type='ml.c5.4xlarge',
    strategy='SingleRecord',
    assemble_with='Line',
    base_transform_job_name=transform_job_name,
    output_path=output_path,
    sagemaker_session=sagemaker_session
)

In [None]:
data = "s3://{}/cutoff_{}/test.json".format(s3_data_path, cutoff_week_id)

transformer.transform(data=data, split_type='Line')
sagemaker_session.wait_for_transform_job(transformer.latest_transform_job.name)

# Format Results

In [None]:
# Get back info from input data
fs = s3fs.S3FileSystem()

data = []
with fs.open("s3://{}/cutoff_{}/test.json".format(s3_data_path, cutoff_week_id), 'r', encoding='utf-8') as f:
    for line in f:
        data.append(json.loads(line, encoding='utf-8'))
        
l_model = []
for x in data:
    l_model.append(int(x['model']))
    
nb_ts = len(l_model)

In [None]:
# Read output results
fs = s3fs.S3FileSystem()

res = []
with fs.open("s3://{}/result/cutoff_{}/test.json.out".format(s3_output_path, cutoff_week_id), 'r', encoding='utf-8') as f:
    for line in f:
        res.append(json.loads(line, encoding='utf-8'))

In [None]:
res = pd.DataFrame.from_records(res)
res = pd.concat([pd.DataFrame.from_records(res['quantiles']), res[['mean']]], axis=1)
res = res[['0.5']] # keep only quantile 0.5 for now
res['model'] = l_model
res = res.set_index(['model'])['0.5'].apply(pd.Series).stack().reset_index()
res.rename(columns={0 : 'yhat'}, inplace=True)

In [None]:
week_id_range = ut.get_next_n_week(cutoff_week_id, prediction_length)

res['cutoff_week_id'] = cutoff_week_id
res['cutoff_date'] = ut.week_id_to_date(cutoff_week_id)
res['week_id'] = week_id_range * nb_ts
res['date'] = [ut.week_id_to_date(w) for w in week_id_range] * nb_ts

res['cutoff_date'] = res['cutoff_date'].dt.date
res['date'] = res['date'].dt.date
res['yhat'] = res['yhat'].astype(int)

res = res[['cutoff_week_id', 'cutoff_date', 'week_id', 'date', 'model', 'yhat']].reset_index(drop=True)

res.head()

# Calculate WAPE

In [None]:
active_sales = ut.read_parquet_S3(bucket, "global/active_sales")

In [None]:
error = pd.merge(res, active_sales, how="inner")
error["forecast_step"] = ((error["date"] - error["cutoff_date"]) / np.timedelta64(1, 'W')).astype(int) + 1
error["ae"] = np.abs(error["yhat"] - error["y"])

In [None]:
wape = np.round(error["ae"].sum() / error["y"].sum() * 100, 3)
print("Global WAPE ", str(wape))
print("Nb products ", str(error.model.unique().shape[0]))

In [None]:
for s in np.sort(error["forecast_step"].unique()):
    error_s = error[error["forecast_step"] == s]
    wape = error_s["ae"].sum() / error_s["y"].sum()
    
    print("Forecast Step", str(s), ":")
    print("WAPE:     ", str(wape))
    print("\n-------------------------\n")