# Amazon Forecast

Black Friday (매년 11월 말)시점의 sales를 예측해 보자.

 - dept_id FOODS_3 (8230개) 중 200개 Sampling (Best200)
 - Target Time Series
     - From/To : 2013-11-16/2015-11-15 (365*2일)
     - timestamp (timestamp)
     - id (string)
     - sales (float)
 - Related Time Series
     - From/To : 2013-11-16/2015-12-15 (365*2일 + 30일)
     - timestamp (timestamp)
     - id (string)
     - sell_price (float)
     - snap_CA, snap_TX, snap_WI (float)
     - Easter, LaborDay, Purim_End, StPatricksDay, SuperBowl (float)
     - Black Friday (float)
 - Item meta data : id, item_id, dept_id, cat_id, store_id, state_id
 - BackTestWindows : 4
 - BackTestWindowOffset : Default (Same as ForecastHorizon)

<img src="../img/forecast-steps.png" align="left">

# Data Preparation

In [None]:
# Import required library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os
import boto3
from datetime import datetime, timedelta
from itertools import cycle
import json
import time
from time import sleep
import warnings

color_cycle = cycle(plt.rcParams['axes.prop_cycle'].by_key()['color'])
%matplotlib inline
plt.rcParams["figure.figsize"] = (12,5)
plt.rcParams['lines.linewidth'] = 2
plt.rcParams['lines.color'] = 'r'
plt.rcParams['axes.grid'] = True

# Set maximum number of lines
pd.set_option ('display.max_rows', 500)
# Set the maximum number of columns
pd.set_option ('display.max_columns', 500)
# Width to display
pd.set_option ('display.width', 1000)

warnings.filterwarnings(action='ignore')

In [None]:
%store -r

In [None]:
len(df_merged) # 15,743,990

In [None]:
len(df_sales_foods_3) #8,230

In [None]:
df_sales_foods_3.head()

## Best200 선택

In [None]:
# d_로 시작하는 column 추출
d_cols = [c for c in df_sales_foods_3.columns if 'd_' in c]

# d_로 시작하는 column의 value(판매량)들을 더해 "sales_total" column에 추가
df_sales_foods_3["sales_total"] = df_sales_foods_3.loc[:,d_cols].sum(axis=1)

In [None]:
# Daily sales가 가장 많은 item Best200 list 선택
best200  = df_sales_foods_3.sort_values(by="sales_total", ascending=False).head(200)
sampled = best200
len(sampled)

In [None]:
sampled[["id", "sales_total"]].head()

In [None]:
sampled[["id", "sales_total"]].tail()

In [None]:
# Best200 추출
df_merged_sampled = df_merged[df_merged["id"].isin(sampled.id)]

In [None]:
len(df_merged_sampled["id"].unique()) # 200

In [None]:
df_merged_sampled.head()

## Create Data Sets

### Target (df_target)

In [None]:
# 2013-11-16 ~ 2015-11-15
df_target = df_merged_sampled[["id", "sales"]]
df_target = df_target.loc["2013-11-16":"2015-11-15"] # 2year
df_target.head()

In [None]:
len(df_target) #146,000 = 200*365*2

In [None]:
df_target = df_target.sort_values(by=["id", "date"])

### Related (df_related)
- Black Friday 전일, 당일, 다음 날을 df_related 데이터에 추가한다.

In [None]:
df_related.head()

In [None]:
del df_related['black_friday']

In [None]:
df_merged_sampled['black_friday'] = 0

In [None]:
# 2013년 Black Friday : 2013-11-29
df_merged_sampled['black_friday'].loc["2013-11-28":"2013-11-30"] = 1 

# 2014년 Black Friday : 2013-11-28
df_merged_sampled['black_friday'].loc["2014-11-27":"2014-11-29"] = 1

# 2015년 Black Friday : 2013-11-27
df_merged_sampled['black_friday'].loc["2015-11-26":"2015-11-28"] = 1

In [None]:
# 2013-11-16 ~ 2015-12-15

df_related = df_merged_sampled[["id", "event_name_1", "snap_CA", "snap_TX", "snap_WI", "sell_price", "black_friday"]]

# Related TS는 Target TS + ForecastHorizon까지 데이터가 있어야 하고,
# Missing Value가 있으면 안된다.
df_related = df_related.loc["2013-11-16":"2015-12-15"]

print(len(df_related)) # 152,000 = 200*(365*2+30)
df_related.isnull().sum()

In [None]:
# event_name_1의 NaN를 "None"으로 fill
df_related["event_name_1"] = df_related["event_name_1"].fillna("None")

# 특정 item이 2015-07-01 이후부터 판매 되었다고 한다면, df_sales의 해당 item의 sell_price 데이터는 2015-07-01 이후부터 있을 것이다.
# df_merged의 date는 df_calendar를 merge했으므로 특정 item의 date는 2011-01-29~2016-06-19 범위지만,
# 특정 item의 df_sales내 date는 2015-07-01 이후이므로
# df_merged와 df_sales를 Merge하면 특정 item의 2015-07-01 이전 시점의 sell_price는 NaN이다.
# 따라서 sell_price의 NaN를 "0"으로 fill
df_related["sell_price"] = df_related["sell_price"].fillna(0)
df_related.isnull().sum()

In [None]:
print(len(df_related)) # 152,000 = 200*(365*2+30)
df_related.isnull().sum()

In [None]:
# One-hot encoding for event_name_1
df_related = pd.concat([df_related, pd.get_dummies(df_related['event_name_1'])],axis=1)

In [None]:
print(len(df_related)) # 39,500 = 100*(365+30)
df_related.isnull().sum()

In [None]:
# event_name_1에서 Unique value 추출
all_events = df_related.event_name_1.unique()

# event_name_1 : SuperBowl, LaborDay, Purim End, Easter, StPatricksDay <- Sales가 가장 많은 event 다섯 개만 Related에 추가
chosen_events = ['SuperBowl', 'LaborDay', 'Purim End', 'Easter', 'StPatricksDay']
for event in [event for event in all_events if event not in chosen_events]:
    df_related.drop([event], axis=1, inplace=True)

df_related.drop(["event_name_1"], axis=1, inplace=True)
#df_related.head()

In [None]:
print(len(df_related)) # 325,085 = 823*(365+30)
df_related.isnull().sum()

In [None]:
df_related = df_related.sort_values(by=["id", "date"])

### Item_metadata (df_item)

In [None]:
df_item = df_merged_sampled[["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"]].drop_duplicates()

In [None]:
len(df_item) #200

In [None]:
len(df_item["id"].unique()) # 100

In [None]:
df_item.head()

## Make CSV files

In [None]:
# Prepare csv files

!mkdir ./train

local_path = "./train/"

target_file_name     = "df_target.csv"
related_file_name    = "df_related.csv"
item_file_name       = "df_item.csv"

local_target     = local_path + target_file_name
local_related    = local_path + related_file_name
local_item       = local_path + item_file_name

df_target.to_csv(local_target, header=False, index=True)
df_related.to_csv(local_related, header=False, index=True)
df_item.to_csv(local_item, header=False, index=False) #index 제외

# Forecast 시작

참고 : https://github.com/chrisking/ForecastPOC/blob/master/

In [None]:
DATASET_FREQUENCY = "D" # Day
TIMESTAMP_FORMAT = "yyyy-MM-dd"

project = 'walmart_m5'
datasetName= project+'_ds'
datasetGroupName= project +'_dsg'

In [None]:
# Jupyter notebook이 실행되는 AWS region 정보 추출
with open('/opt/ml/metadata/resource-metadata.json') as notebook_info:
    data = json.load(notebook_info)
    resource_arn = data['ResourceArn']
    region = resource_arn.split(':')[3]
print(region)

In [None]:
session = boto3.Session(region_name=region)
forecast = session.client(service_name='forecast')
forecast_query = session.client(service_name='forecastquery')

In [None]:
# Sagemaker Jupyter notebook에서 Amazon Forecast의 API를 사용할 수 있도록 execution_role을 가져 온다.

from sagemaker import get_execution_role

role_arn = get_execution_role()
print(role_arn)

## 1. Datagroup 생성

In [None]:
create_dataset_group_response = forecast.create_dataset_group(DatasetGroupName=datasetGroupName,
                                                              Domain="RETAIL",
                                                             )
datasetGroupArn = create_dataset_group_response['DatasetGroupArn']

In [None]:
forecast.describe_dataset_group(DatasetGroupArn=datasetGroupArn)

## 2a. Target Time Series Dataset 생성

In [None]:
# Specify the schema of your dataset here. Make sure the order of columns matches the raw data files.
schema ={
   "Attributes":[
      {
         "AttributeName":"timestamp",
         "AttributeType":"timestamp"
      },
      {
         "AttributeName":"item_id",
         "AttributeType":"string"
      },
      {
         "AttributeName":"demand",
         "AttributeType":"float"
      }
   ]
}

In [None]:
target_DSN = datasetName + "_target"

response=forecast.create_dataset(
                    Domain="RETAIL",
                    DatasetType='TARGET_TIME_SERIES',
                    DatasetName=target_DSN,
                    DataFrequency=DATASET_FREQUENCY, 
                    Schema = schema
)

In [None]:
target_datasetArn = response['DatasetArn']
forecast.describe_dataset(DatasetArn=target_datasetArn)

## 2b. Target Time Series Dataset Import

In [None]:
# Create S3 Bucket
# {Account Number}-forecastpoc

print(region)
s3 = boto3.client('s3')
account_id = boto3.client('sts').get_caller_identity().get('Account')
bucket_name = account_id + "-forecastpoc"
print(bucket_name)
s3.create_bucket(Bucket=bucket_name)
if region != "us-east-1":
    s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={'LocationConstraint': region})
else:
    s3.create_bucket(Bucket=bucket_name)

In [None]:
# Upload Target File

bucket_name = bucket_name
role_arn = role_arn

s3_path = "walmart"

s3_target     = "s3://" + bucket_name + "/" + s3_path + "/" + target_file_name
s3_related    = "s3://" + bucket_name + "/" + s3_path + "/" + related_file_name
s3_item       = "s3://" + bucket_name + "/" + s3_path + "/" + item_file_name

boto3.Session().resource('s3').Bucket(bucket_name).Object(s3_path + "/" + target_file_name).upload_file(local_target)

In [None]:
# Finally we can call import the dataset
role_arn = role_arn #ForecastRolePOC
datasetImportJobName = 'DSIMPORT_JOB_TARGET_POC'
ds_import_job_response=forecast.create_dataset_import_job(DatasetImportJobName=datasetImportJobName,
                                                          DatasetArn=target_datasetArn,
                                                          DataSource= {
                                                              "S3Config" : {
                                                                 "Path":s3_target,
                                                                 "RoleArn": role_arn
                                                              } 
                                                          },
                                                          TimestampFormat=TIMESTAMP_FORMAT
                                                         )

In [None]:
ds_import_job_arn=ds_import_job_response['DatasetImportJobArn']
print(ds_import_job_arn)

In [None]:
#while True:
#    dataImportStatus = forecast.describe_dataset_import_job(DatasetImportJobArn=ds_import_job_arn)['Status']
#    print(dataImportStatus)
#    if dataImportStatus != 'ACTIVE' and dataImportStatus != 'CREATE_FAILED':
#        sleep(30)
#    else:
#        break

In [None]:
# 방금 만든 dataset을 dataset group에 attach한다.
# attach하지 않으면 Forecast dataset group의 dataset가 조회되지 않는다.
#response = forecast.update_dataset_group(
#    DatasetGroupArn=datasetGroupArn,
#    DatasetArns=[
#        target_datasetArn
#    ]
#)

## 2c. Related Time Series dataset 생성

Related Time Series 고려사항 : https://docs.aws.amazon.com/ko_kr/forecast/latest/dg/related-time-series-datasets.html

<img src="../img/related-ts.png" align="left">


In [None]:
# Upload Related File
boto3.Session().resource('s3').Bucket(bucket_name).Object(s3_path + "/" + related_file_name).upload_file(local_related)

In [None]:
df_related.info()

In [None]:
# Specify the schema of your dataset here. Make sure the order of columns matches the raw data files.
related_schema ={
   "Attributes":[
      {
         "AttributeName":"timestamp",
         "AttributeType":"timestamp"
      },
      {
         "AttributeName":"item_id",
         "AttributeType":"string"
      },
       {
         "AttributeName":"snap_CA",
         "AttributeType":"float"
      },
       {
         "AttributeName":"snap_TX",
         "AttributeType":"float"
      },
       {
         "AttributeName":"snap_WI",
         "AttributeType":"float"
      },
       {
         "AttributeName":"sell_price",
         "AttributeType":"float"
      },
       {
         "AttributeName":"black_friday",
         "AttributeType":"float"
      },
       {
         "AttributeName":"Easter",
         "AttributeType":"float"
      },
       {
         "AttributeName":"LaborDay",
         "AttributeType":"float"
      },
       {
         "AttributeName":"Purim_End",
         "AttributeType":"float"
      },
       {
         "AttributeName":"StPatricksDay",
         "AttributeType":"float"
      },
       {
         "AttributeName":"SuperBowl",
         "AttributeType":"float"
      }
   ]
}

In [None]:
related_DSN = datasetName + "_related"
response=forecast.create_dataset(
                    Domain="RETAIL",
                    DatasetType='RELATED_TIME_SERIES',
                    DatasetName=related_DSN,
                    DataFrequency=DATASET_FREQUENCY, 
                    Schema = related_schema
)

In [None]:
related_datasetArn = response['DatasetArn']
print(related_datasetArn)
forecast.describe_dataset(DatasetArn=related_datasetArn)

## 2d. Related Time Series Dataset Import

In [None]:
datasetImportJobName = 'DSIMPORT_JOB_RELATEDPOC_2'
related_ds_import_job_response=forecast.create_dataset_import_job(DatasetImportJobName=datasetImportJobName,
                                                          DatasetArn=related_datasetArn,
                                                          DataSource= {
                                                              "S3Config" : {
                                                                 "Path":s3_related,
                                                                 "RoleArn": role_arn
                                                              } 
                                                          },
                                                          TimestampFormat=TIMESTAMP_FORMAT
                                                         )

In [None]:
rel_ds_import_job_arn=related_ds_import_job_response['DatasetImportJobArn']
print(rel_ds_import_job_arn)

In [None]:
#while True:
#    dataImportStatus = forecast.describe_dataset_import_job(DatasetImportJobArn=rel_ds_import_job_arn)['Status']
#    print(dataImportStatus)
#    if dataImportStatus != 'ACTIVE' and dataImportStatus != 'CREATE_FAILED':
#        sleep(30)
#    else:
#        break

## 2e. Item Metadata 생성

In [None]:
# Upload Item Metadata File
boto3.Session().resource('s3').Bucket(bucket_name).Object(s3_path + "/" + item_file_name).upload_file(local_item)

In [None]:
df_item.info()

In [None]:
item_schema ={
   "Attributes":[
      {
         "AttributeName":"item_id",
         "AttributeType":"string"
      },
       {
         "AttributeName":"item_id_not_combined",
         "AttributeType":"string"
      },
       {
         "AttributeName":"dept_id",
         "AttributeType":"string"
      },
       {
         "AttributeName":"cat_id",
         "AttributeType":"string"
      },
       {
         "AttributeName":"store_id",
         "AttributeType":"string"
      },
       {
         "AttributeName":"state_id",
         "AttributeType":"string"
      }
   ]
}

In [None]:
item_DSN = datasetName + "_item"
response=forecast.create_dataset(
                    Domain="RETAIL",
                    DatasetType='ITEM_METADATA',
                    DatasetName=item_DSN,
                    Schema = item_schema
)

In [None]:
item_datasetArn = response['DatasetArn']
print(item_datasetArn)
forecast.describe_dataset(DatasetArn=item_datasetArn)

## 2f. Item Metadata Dataset Import

In [None]:
datasetImportJobName = 'DSIMPORT_JOB_ITEMPOC'
item_ds_import_job_response=forecast.create_dataset_import_job(DatasetImportJobName=datasetImportJobName,
                                                          DatasetArn=item_datasetArn,
                                                          DataSource= {
                                                              "S3Config" : {
                                                                 "Path":s3_item,
                                                                 "RoleArn": role_arn
                                                              } 
                                                          }
                                                         )

In [None]:
item_ds_import_job_arn=item_ds_import_job_response['DatasetImportJobArn']
print(item_ds_import_job_arn)

## 2g. Check Dataset Import Status

In [None]:
import time 

start_time = time.time()

while True:
    TargetdataImportStatus  = forecast.describe_dataset_import_job(DatasetImportJobArn=ds_import_job_arn)['Status']
    RelateddataImportStatus = forecast.describe_dataset_import_job(DatasetImportJobArn=rel_ds_import_job_arn)['Status']
    ItemdataImportStatus    = forecast.describe_dataset_import_job(DatasetImportJobArn=item_ds_import_job_arn)['Status']
    
    print("Dataset {} status : {}".format(target_datasetArn, TargetdataImportStatus))
    print("Dataset {} status : {}".format(related_datasetArn, RelateddataImportStatus))
    print("Dataset {} status : {}".format(item_datasetArn, ItemdataImportStatus))
    print("--------------------------------------------------------------------")
    
    if TargetdataImportStatus != 'ACTIVE' or RelateddataImportStatus != 'ACTIVE' or ItemdataImportStatus != 'ACTIVE':
        sleep(30)
    else:
        break
print('작업 수행된 시간 : %f 초' % (time.time() - start_time))

In [None]:
response = forecast.update_dataset_group(
    DatasetGroupArn=datasetGroupArn,
    DatasetArns=[
        target_datasetArn,
        related_datasetArn,
        item_datasetArn
    ]
)

아래 스크린 캡쳐와 같이 3가지 Dataset이 모두 Import되었는지 확인한 후 "Create Predictor" 단계로 넘어 간다.
Import 상태가 "Falied"인 경우 세부 오류 메시지를 확인한다.

<img src="../img/datasets.png" align="left">

# 3. Create Predictor (20~30분 소요)


In [None]:
forecastHorizon = 30 # 30 days
NumberOfBacktestWindows = 4
BackTestWindowOffset = 30
ForecastFrequency = "D"

In [None]:
prophet_algorithmArn = 'arn:aws:forecast:::algorithm/Prophet'
deepAR_Plus_algorithmArn = 'arn:aws:forecast:::algorithm/Deep_AR_Plus'

## 3a. Prophet

In [None]:
# Prophet Specifics
prophet_predictorName= project+'_prophet_algo_1'

In [None]:
# Build Prophet:
prophet_create_predictor_response=forecast.create_predictor(PredictorName=prophet_predictorName, 
                                                  AlgorithmArn=prophet_algorithmArn,
                                                  ForecastHorizon=forecastHorizon,
                                                  PerformAutoML= False,
                                                  PerformHPO=False,
                                                  EvaluationParameters= {"NumberOfBacktestWindows": NumberOfBacktestWindows, 
                                                                         "BackTestWindowOffset": BackTestWindowOffset}, 
                                                  InputDataConfig= {"DatasetGroupArn": datasetGroupArn, "SupplementaryFeatures": [ 
                                                                     { 
                                                                        "Name": "holiday",
                                                                        "Value": "US"
                                                                     }
                                                                  ]},
                                                  FeaturizationConfig= {"ForecastFrequency": ForecastFrequency, 
                                                                        "Featurizations": 
                                                                        [
                                                                          {"AttributeName": "demand", 
                                                                           "FeaturizationPipeline": 
                                                                            [
                                                                              {"FeaturizationMethodName": "filling", 
                                                                               "FeaturizationMethodParameters": 
                                                                                {"frontfill": "none", 
                                                                                 "middlefill": "zero", 
                                                                                 "backfill": "zero"}
                                                                              }
                                                                            ]
                                                                          }
                                                                        ]
                                                                       }
                                                 )

## 3b. DeepAR Plus

In [None]:
# Prophet Specifics
deeparplus_predictorName= project+'_deeparplus_algo_1'

In [None]:
# Build DeepAR Plus:
deeparplus_create_predictor_response=forecast.create_predictor(PredictorName=deeparplus_predictorName, 
                                                  AlgorithmArn=deepAR_Plus_algorithmArn,
                                                  ForecastHorizon=forecastHorizon,
                                                  PerformAutoML= False,
                                                  PerformHPO=False,
                                                  EvaluationParameters= {"NumberOfBacktestWindows": NumberOfBacktestWindows, 
                                                                         "BackTestWindowOffset": BackTestWindowOffset}, 
                                                  InputDataConfig= {"DatasetGroupArn": datasetGroupArn, "SupplementaryFeatures": [ 
                                                                     { 
                                                                        "Name": "holiday",
                                                                        "Value": "US"
                                                                     }
                                                                  ]},
                                                  FeaturizationConfig= {"ForecastFrequency": ForecastFrequency, 
                                                                        "Featurizations": 
                                                                        [
                                                                          {"AttributeName": "demand", 
                                                                           "FeaturizationPipeline": 
                                                                            [
                                                                              {"FeaturizationMethodName": "filling", 
                                                                               "FeaturizationMethodParameters": 
                                                                                {"frontfill": "none", 
                                                                                 "middlefill": "zero", 
                                                                                 "backfill": "zero"}
                                                                              }
                                                                            ]
                                                                          }
                                                                        ]
                                                                       },
                                                 TrainingParameters= { 
                                                          "likelihood" : "negative-binomial" 
                                                       }
                                                 )

- 일반적으로 Prophet predictor 학습은 DeepAR+ 보다 빨리 끝난다.
- Prophet predictor 학습이 완료되어 predictor status가 `ACTIVE`인 경우 Prophet predictor를 이용하여 Forecast를 생성한다.

## 3c. Check Predictor Creation Status

In [None]:
import time 

start_time = time.time()
while True:
    ProphetArn = prophet_create_predictor_response['PredictorArn']
    #ProphetArn = "arn:aws:forecast:us-east-1:889750940888:predictor/walmart_prophet_algo_1"
    DeepARPlusArn = deeparplus_create_predictor_response['PredictorArn']
    #DeepARPlusArn = "arn:aws:forecast:us-east-1:889750940888:predictor/walmart_deeparplus_algo_1"
    
    ProphetStatus = forecast.describe_predictor(PredictorArn = prophet_create_predictor_response['PredictorArn'])['Status']
    DeepARPlusStatus = forecast.describe_predictor(PredictorArn = deeparplus_create_predictor_response['PredictorArn'])['Status']
    #DeepARPlusStatus = forecast.describe_predictor(PredictorArn = DeepARPlusArn)['Status']
    
    print("Predictor {} status : {}".format(ProphetArn, ProphetStatus))
    print("Predictor {} status : {}".format(DeepARPlusArn, DeepARPlusStatus))
    print("--------------------------------------------------------------------")
    
    if ProphetStatus != 'ACTIVE' or DeepARPlusStatus != 'ACTIVE':
        sleep(30)
    else:
        break
print('작업 수행된 시간 : %f 초' % (time.time() - start_time))

## 3d. Examining the Predictors
- AWS Forecast에서 생성된 Predictor별 Metric을 확인한다.
- 참고 : https://docs.aws.amazon.com/ko_kr/forecast/latest/dg/metrics.html

# 4. Create Forecast
- Predictor별 Forecast를 만든다.
- 5분 ~ 10분 소요
- 참고 : https://docs.aws.amazon.com/ko_kr/forecast/latest/dg/gs-console.html 의 "3단계 - 예상 생성"
- ForecastTypes : The quantiles at which probabilistic forecasts are generated. You can currently specify up to 5 quantiles per forecast. Accepted values include 0.01 to 0.99 (increments of .01 only) and mean. The mean forecast is different from the median (0.50) when the distribution is not symmetric (for example, Beta and Negative Binomial). The default value is ["0.1", "0.5", "0.9"].

## 4a. Create Prophet, DeepAR+ Forecast

In [None]:
deeparplus_forecastName= project+'_deepAR_algo_forecast'
prophet_forecastname= project+'_prophet_algo_forecast'
ForecastTypes=["0.1", "0.5", "0.9", "mean"]

**DeepAR+**

In [None]:
create_forecast_response=forecast.create_forecast(ForecastName=prophet_forecastname,
                                                  ForecastTypes=ForecastTypes,
                                                  PredictorArn=ProphetArn
                                                 )
prophet_forecastArn = create_forecast_response['ForecastArn']

In [None]:
deeparplus_forecastName

**Prophet**

In [None]:
create_forecast_response=forecast.create_forecast(ForecastName=deeparplus_forecastName,
                                                  ForecastTypes=ForecastTypes,
                                                  PredictorArn = DeepARPlusArn
                                                 )
deeparplus_forecastArn = create_forecast_response['ForecastArn']

In [None]:
prophet_forecastArn

## 4b. Check Forecast Creation Status

In [None]:
import time 

start_time = time.time()
while True:
    deeparplus_forecast_status = forecast.describe_forecast(ForecastArn=deeparplus_forecastArn)['Status']
    prophet_forecast_status = forecast.describe_forecast(ForecastArn=prophet_forecastArn)['Status']
    
    print("Predictor {} status : {}".format(deeparplus_forecastArn, deeparplus_forecast_status))
    print("Predictor {} status : {}".format(prophet_forecastArn, prophet_forecast_status))
    print("--------------------------------------------------------------------")
    
    if deeparplus_forecast_status != 'ACTIVE' or prophet_forecast_status != 'ACTIVE':
        sleep(30)
    else:
        break
print('작업 수행된 시간 : %f 초' % (time.time() - start_time))

## 4c. Get Forecast & Visualization
Predictor별 Forecast를 생성한 후 id별 p10, p50, p90, mean 값을 확인할 수 있다.

In [None]:
# 200개의 Sample item 중 top10
sampled[["id", "sales_total"]].head(10)

In [None]:
# 200개의 Sample item 중 worst10
sampled[["id", "sales_total"]].tail(10)

In [None]:
def get_forecast(id):
    for forecastArn in [deeparplus_forecastArn, prophet_forecastArn]:
        forecastResponse = forecast_query.query_forecast(
                            ForecastArn=forecastArn,
                            Filters={"item_id":id}
                            )

        mean = pd.DataFrame(forecastResponse['Forecast']['Predictions']['mean'])
        mean.Timestamp = mean.Timestamp.apply(lambda x: datetime.strptime(x, "%Y-%m-%dT%H:%M:%S"))
        mean.set_index("Timestamp", inplace=True)
        mean.rename(columns = {'Value' : 'mean'}, inplace = True)

        p10 = pd.DataFrame(forecastResponse['Forecast']['Predictions']['p10'])
        p10.Timestamp = p10.Timestamp.apply(lambda x: datetime.strptime(x, "%Y-%m-%dT%H:%M:%S"))
        p10.set_index("Timestamp", inplace=True)
        p10.rename(columns = {'Value' : 'p10'}, inplace = True)

        p50 = pd.DataFrame(forecastResponse['Forecast']['Predictions']['p50'])
        p50.Timestamp = p50.Timestamp.apply(lambda x: datetime.strptime(x, "%Y-%m-%dT%H:%M:%S"))
        p50.set_index("Timestamp", inplace=True)
        p50.rename(columns = {'Value' : 'p50'}, inplace = True)

        p90 = pd.DataFrame(forecastResponse['Forecast']['Predictions']['p90'])
        p90.Timestamp = p90.Timestamp.apply(lambda x: datetime.strptime(x, "%Y-%m-%dT%H:%M:%S"))
        p90.set_index("Timestamp", inplace=True)
        p90.rename(columns = {'Value' : 'p90'}, inplace = True)

        plot_start_ts = mean.index.min() - timedelta(days=0.5 * 365/12)
        plot_end_ts   = mean.index.max() + timedelta(days=0.5 * 365/12)
        plot_start_str = datetime.strptime(str(plot_start_ts), '%Y-%m-%d %H:%M:%S')
        plot_end_str   = datetime.strptime(str(plot_end_ts), '%Y-%m-%d %H:%M:%S')
        plot_start_date = str(plot_start_str.year) + "-" + str(plot_start_str.month) + "-" + str(plot_start_str.day)
        plot_end_date   = str(plot_end_str.year) + "-" + str(plot_end_str.month) + "-" + str(plot_end_str.day)

        observations = df_merged[df_merged["id"] == id].loc[plot_start_date:plot_end_date].sales
        
        fig = plt.figure(figsize=(20, 5))

        plt.title("Forecast for {}, Predictor : {}".format(id, forecastArn))
        plt.plot(observations, color='gray', linewidth=1, label="observation")
        plt.plot(p90, label='p90')
        plt.plot(mean, label='mean')
        plt.plot(p50, label='p50')
        plt.plot(p10, label='p10')
        plt.axvline(x=datetime(2015, 11, 27), color='r', linestyle='--', linewidth=3) # Adding Vertical line for Black Friday
        plt.legend()
     
    return

### Sample중 Top5

In [None]:
sampled[["id", "sales_total"]].head()

In [None]:
sampled.id.head(5)

In [None]:
for item in sampled.id.head(5):
    get_forecast(item)

### Sample중 Worst5

In [None]:
for item in sampled.id.tail(5):
    get_forecast(item)