In [None]:
%load_ext autoreload
%autoreload 2

import boto3
import sagemaker
import pandas as pd
import os
import sys

sys.path.insert(0,'..')

import src.config as cf
import src.utils as ut

os.environ['HTTP_PROXY'] = "http://proxy-internet-aws-eu.subsidia.org:3128"
os.environ['HTTPS_PROXY'] = "http://proxy-internet-aws-eu.subsidia.org:3128"
os.environ['no_proxy'] = "169.254.169.254,127.0.0.1"

# Params

In [None]:
RUN_ENV = "dev"
freq = 'W'
prediction_length = 16
cutoff_week_id = 201922

In [None]:
config_file = "../conf/prod.yml" if RUN_ENV == "prod" else "../conf/dev.yml"
config = cf.ProgramConfiguration(config_file, "../conf/functional.yml")

sagemaker_session = sagemaker.Session()
role = config.get_global_role_arn()

bucket = config.get_train_bucket_input()
prefix = 'test_deepAR' # Your S3 test dir

s3_data_path = "{}/{}/data".format(bucket, prefix)
s3_output_path = "{}/{}/output".format(bucket, prefix)

# Get DeepAR Image

In [None]:
from sagemaker.amazon.amazon_estimator import get_image_uri
image_name = get_image_uri(boto3.Session().region_name, 'forecasting-deepar')

# HyperParameters Tuning Job

In [None]:
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

In [None]:
s3_output_path = "s3://{}/model".format(s3_output_path)

estimator = sagemaker.estimator.Estimator(
    sagemaker_session=sagemaker_session,
    image_name=image_name,
    role=role,
    train_instance_count=1,
    train_instance_type='ml.p3.2xlarge',
    output_path=s3_output_path,
    train_use_spot_instances=True,
    train_max_run=3600,
    train_max_wait=3600
)

In [None]:
hyperparameters = {
    "time_freq": freq,
    "prediction_length": str(prediction_length),
    "test_quantiles" : [0.5]
}

estimator.set_hyperparameters(**hyperparameters)

In [None]:
hyperparameter_ranges = {
    "context_length": IntegerParameter(16, 110),
    "num_cells": IntegerParameter(30, 200),
    "num_layers": IntegerParameter(2, 6), # 1 - 8
    "epochs":  IntegerParameter(10, 200),
    "mini_batch_size": CategoricalParameter([32, 64, 128, 256, 512, 1024]),
    "learning_rate": ContinuousParameter(1e-5, 1e-1, scaling_type="Logarithmic"),
    "dropout_rate": ContinuousParameter(0, 0.2),
    "likelihood": CategoricalParameter(["negative-binomial", "student-T"]),
}

In [None]:
tuning_job_name = "TUNING-{}-DEEPAR".format(cutoff_week_id)

tuner = HyperparameterTuner(
    estimator=estimator,
    objective_metric_name='test:mean_wQuantileLoss',
    hyperparameter_ranges=hyperparameter_ranges,
    objective_type='Minimize',
    max_jobs=25,
    max_parallel_jobs=2,
    early_stopping_type='Off',
    base_tuning_job_name=tuning_job_name
)

In [None]:
data_channels = {
    "train" : "s3://{}/cutoff_{}/train.json".format(s3_data_path, cutoff_week_id),
    'test': "s3://{}/cutoff_{}/val.json".format(s3_data_path, cutoff_week_id),
}

In [None]:
tuner.fit(inputs=data_channels)
sagemaker_session.wait_for_tuning_job(tuner.latest_tuning_job.name)