In [None]:
%load_ext autoreload
%autoreload 2

import boto3
import sagemaker
import pandas as pd
import numpy as np
import os
import sys
import re
import time

sys.path.insert(0,'..')

import src.config as cf
import src.utils as ut

os.environ['HTTP_PROXY'] = "http://proxy-internet-aws-eu.subsidia.org:3128"
os.environ['HTTPS_PROXY'] = "http://proxy-internet-aws-eu.subsidia.org:3128"
os.environ['no_proxy'] = "169.254.169.254,127.0.0.1"

# Params

In [None]:
RUN_ENV = "dev"
freq = 'W'
prediction_length = 16

In [None]:
config_file = "../conf/prod.yml" if RUN_ENV == "prod" else "../conf/dev.yml"
config = cf.ProgramConfiguration(config_file, "../conf/functional.yml")

sagemaker_session = sagemaker.Session()
client = boto3.client('sagemaker')
role = config.get_global_role_arn()

bucket = config.get_train_bucket_input()
prefix = 'test_deepAR' # Your S3 test dir

s3_data_path = "{}/{}/data".format(bucket, prefix)
s3_output_path = "{}/{}/output".format(bucket, prefix)

# Get DeepAR Image

In [None]:
from sagemaker.amazon.amazon_estimator import get_image_uri
image_name = get_image_uri(boto3.Session().region_name, 'forecasting-deepar')

# Run Training Job

In [None]:
cutoff_files = sagemaker_session.list_s3_files(bucket, "{}/data/".format(prefix))
l_cutoff_week_id = np.sort(np.unique([int(re.findall('\d+', f)[0]) for f in cutoff_files]))
l_cutoff_week_id = l_cutoff_week_id[:1]
l_cutoff_week_id

In [None]:
for cutoff_week_id in l_cutoff_week_id:
    
    print(cutoff_week_id)
    
    model_name = "APO-{}-DEEPAR".format(cutoff_week_id)
    
    job_name = "TRAINING-{}".format(model_name)
    output_path = "s3://{}/model".format(s3_output_path)
    
    estimator = sagemaker.estimator.Estimator(
        sagemaker_session=sagemaker_session,
        image_name=image_name,
        role=role,
        train_instance_count=1,
        train_instance_type='ml.p3.2xlarge',
        base_job_name=job_name,
        output_path=output_path,
        train_use_spot_instances=True,
        train_max_run=3600,
        train_max_wait=3600
    )
    
    hyperparameters = {
        "time_freq": freq,
        "prediction_length": str(prediction_length),
        "context_length": "21",
        "num_cells": "199",
        "num_layers": "3",
        "likelihood": "negative-binomial",
        "epochs": "126",
        "mini_batch_size": "256",
        "learning_rate": "9.958020405900597e-05",
        "dropout_rate": "0.17555152252766837",
        "test_quantiles" : "[0.5]",
        "early_stopping_patience" : "10"
    }

    estimator.set_hyperparameters(**hyperparameters)
    
    inputs = {
        "train" : "s3://{}/cutoff_{}/val.json".format(s3_data_path, cutoff_week_id)
    }
    
    while True:
        try:
            estimator.fit(inputs=inputs, wait=False)
            break
        except:
            print("...")
            time.sleep(120)
            continue

### /!\ Wait until all previous jobs have been completed before starting the next cell.

In [None]:
l_training_jobs = client.list_training_jobs(MaxResults=100)["TrainingJobSummaries"]
l_training_jobs_name = [l_training_jobs[i]['TrainingJobName'] for i in range(len(l_training_jobs))]

for cutoff_week_id in l_cutoff_week_id:

    model_name = "APO-{}-DEEPAR".format(cutoff_week_id)
    job_name = "TRAINING-{}".format(model_name)
    
    # Delete old model version if exists
    try: 
        sagemaker_session.delete_model(model_name)
    except:
        pass
    
    training_job_name = [j for j in l_training_jobs_name if job_name in j][0]
    
    print(model_name)
    
    model = sagemaker_session.create_model_from_job(
        training_job_name,
        name=model_name,
        role=role,
    )