In [1]:
import os

os.environ['HTTP_PROXY'] = "http://proxy-internet-aws-eu.subsidia.org:3128"
os.environ['HTTPS_PROXY'] = "http://proxy-internet-aws-eu.subsidia.org:3128"
os.environ['no_proxy'] = "169.254.169.254,127.0.0.1"

## Parameters & Configuration

In [2]:
ENVIRONMENT = "preprod"

In [7]:
import subprocess
import sys
sys.path.insert(0,'..')

import src.config as cf

In [6]:
config_file = "../conf/prod.yml" if ENVIRONMENT=="prod" else "../conf/dev.yml"
config = cf.ProgramConfiguration(config_file, "../conf/functional.yml")

## Build Docker images for train & serve

In [23]:
subprocess.call(['sh', '../_sagemaker_/build_image.sh', config.get_train_image_name()])

0

## Training

### Training config

In [11]:
#from sagemaker import get_execution_role
#hyperparameters = {
#        'yearly_order': 26,
#        'quaterly_order': 5,
#    
#        'weekly_seasonality': False,
#        'daily_seasonality': False,
#        'yearly_seasonality': False,
#    
#        'n_changepoints': 36,
#        'changepoint_range': 0.69,
#        'changepoint_prior_scale': 1.91,
#        'seasonality_prior_scale': 2.04
#}

In [20]:
from sagemaker.estimator import Estimator

In [18]:
role = config.get_global_role_arn()
image_name = config.get_train_docker_image()
bucket = config.get_train_bucket_input()
project_id = config.get_train_path_refined_data_input()
hyperparameters = config.get_train_hyperparameters()
train_instance_count = config.get_train_instance_count()
train_instance_type = config.get_train_instance_type()
security_group_ids = config.get_global_security_group_ids()
subnets = config.get_global_subnets()

In [19]:
print("- role:", role,
      "\n- image name:", image_name,
      "\n- bucket:", bucket,
      "\n- project_id:", project_id,
      "\n- hyperparameters:\n", hyperparameters,
      "\n- train_instance_count:", train_instance_count,
      "\n- train_instance_type:", train_instance_type,
      "\n- security_group_id:s", security_group_ids,
      "\n- subnets:", subnets
     )

- role: arn:aws:iam::150258775384:role/FORECAST-SAGEMAKER-DEV 
- image name: 150258775384.dkr.ecr.eu-west-1.amazonaws.com/demand-forecast-prophet-training:latest 
- bucket: fcst-refined-demand-forecast-dev 
- project_id: specific/domyos_nov_2019/train_data_cutoff/ 
- hyperparameters:
 {'yearly_order': '25', "quaterly_order'": '5', 'weekly_seasonality': 'False', 'daily_seasonality': 'False', 'yearly_seasonality': 'False', 'n_changepoints': '36', 'changepoint_range': '0.69', 'changepoint_prior_scale': '1.91', 'seasonality_prior_scale': '2.04'} 
- train_instance_count: 1 
- train_instance_type: ml.m4.xlarge 
- security_group_id:s ['sg-0186b5ab868f43e42'] 
- subnets: ['subnet-0f87a7ed73f4ead6d', 'subnet-02c60aed04f0d4ee5']


In [34]:
# If you'd like to run the docker container locally instead of popping a machine ( faster to check your dev )
train_instance_type = 'local'

In [None]:
estimator = Estimator(role=role,
                      train_instance_count=train_instance_count,
                      train_instance_type=train_instance_type,
                      image_name=image_name,
                      hyperparameters=hyperparameters,
                      security_group_ids=security_group_ids,
                      subnets=subnets
                      )

estimator.fit('s3://'+bucket+'/'+project_id)

Creating tmph4ubgbzj_algo-1-8b4vf_1 ... 
[1BAttaching to tmph4ubgbzj_algo-1-8b4vf_12mdone[0m
[36malgo-1-8b4vf_1  |[0m Run Env: preprod
[36malgo-1-8b4vf_1  |[0m Only Last: True
[36malgo-1-8b4vf_1  |[0m Content /opt/ml/input/data/training ['train_data_cutoff_201927', 'train_data_cutoff_201929', 'train_data_cutoff_201933', 'train_data_cutoff_201931', 'train_data_cutoff_201943', 'train_data_cutoff_201912', 'train_data_cutoff_201928', 'train_data_cutoff_201944', 'train_data_cutoff_201834', 'train_data_cutoff_201948', 'train_data_cutoff_202003', 'train_data_cutoff_201951', 'train_data_cutoff_201930', 'train_data_cutoff_201940', 'train_data_cutoff_201923', 'train_data_cutoff_201925', 'train_data_cutoff_201936', 'train_data_cutoff_201949', 'train_data_cutoff_201902', 'train_data_cutoff_201950', 'train_data_cutoff_201947', 'train_data_cutoff_201924', 'train_data_cutoff_201942', 'train_data_cutoff_202004', 'train_data_cutoff_201922', 'train_data_cutoff_201932', 'train_data_cutoff_202002'

## Bring your own hyperopt

In [11]:
import boto3

from time import gmtime, strftime
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

In [20]:
tuning_job_name = 'demand-forecast-prophet-tuning'# + strftime("%d-%H-%M-%S", gmtime())


hyperparameter_ranges = {
        'yearly_order': IntegerParameter (26, 29),#, #(1, 30)
        'quaterly_order': IntegerParameter(4, 6)#, #(1, 10)
}    
#        'n_changepoints': IntegerParameter(30, 32), #(1, 50)
#        'changepoint_range': ContinuousParameter(0.65, 0.69), #(0.6, 1.)
#        'changepoint_prior_scale': ContinuousParameter(1.8, 1.9, scaling_type="Logarithmic"), #1e-2, 1e2
#        'seasonality_prior_scale': ContinuousParameter(2.2, 2.4, scaling_type="Logarithmic")
#}


In [21]:
objective_metric_name = 'global_wape'
objective_type = 'Minimize'
metric_definitions = [{'Name': 'global_wape',
                       'Regex': 'global_wape: ([0-9\\.]+)'}]

In [22]:
tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=9,
                            max_parallel_jobs=1,
                            objective_type=objective_type)

In [23]:
#tuner.fit({'training': 's3://'+bucket+'/'+project_id,
#          'test': 's3://'+bucket+'/'+project_id})

tuner.fit('s3://'+bucket+'/'+project_id)

In [24]:
boto3.client('sagemaker').describe_hyper_parameter_tuning_job(
    HyperParameterTuningJobName=tuner.latest_tuning_job.job_name)['HyperParameterTuningJobStatus']

'InProgress'