# Example 1: Random search hyperparameter optimization using random search

In [1]:
import os
import numpy as np
import time
import sagemaker
from sagemaker.session import s3_input

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

In [2]:
hyperparams={    'epochs': 30,
                 'learning-rate': 0.01,
                 'batch-size': 256,
                 'weight-decay': 2e-4,
                 'momentum': 0.9,
                 'optimizer': 'sgd'}

In [3]:
from sagemaker.tensorflow import TensorFlow

output_path = 's3://oreilly-ai-conference-sanjose-2019/'
tf_estimator = TensorFlow(entry_point='cifar10-training-script-sagemaker.py', 
                          source_dir='1-code-example',
                          role=role,
                          train_instance_count=1, 
                          train_instance_type='ml.p3.2xlarge',
                          framework_version='1.13', 
                          py_version='py3',
                          script_mode=True,
                          output_path=output_path,
                          hyperparameters=hyperparams)

train_path = 's3://oreilly-ai-conference-sanjose-2019/cifar10-dataset-tfrecord/train'
val_path = 's3://oreilly-ai-conference-sanjose-2019/cifar10-dataset-tfrecord/validation'
eval_path = 's3://oreilly-ai-conference-sanjose-2019/cifar10-dataset-tfrecord/eval'

In [7]:
tf_estimator.fit({'training': train_path,'validation': val_path,'eval': eval_path})

2019-09-12 02:34:20 Starting - Starting the training job...
2019-09-12 02:34:21 Starting - Launching requested ML instances......
2019-09-12 02:35:21 Starting - Preparing the instances for training...
2019-09-12 02:36:14 Downloading - Downloading input data...
2019-09-12 02:36:32 Training - Downloading the training image...
2019-09-12 02:37:10 Training - Training image download completed. Training in progress..
[31m2019-09-12 02:37:13,884 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[31m2019-09-12 02:37:14,271 sagemaker-containers INFO     Invoking user script
[0m
[31mTraining Env:
[0m
[31m{
    "additional_framework_parameters": {},
    "channel_input_dirs": {
        "eval": "/opt/ml/input/data/eval",
        "training": "/opt/ml/input/data/training",
        "validation": "/opt/ml/input/data/validation"
    },
    "current_host": "algo-1",
    "framework_module": "sagemaker_tensorflow_container.training:main",
    "hosts": [
    

In [4]:
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

job_name = 'oreilly-' + time.strftime('%Y-%m-%d-%H-%M-%S-%j', time.gmtime())

hyperparameter_ranges = {
    'learning-rate': ContinuousParameter(0.0001, 0.1, scaling_type='Logarithmic'), 
    'batch-size':    CategoricalParameter([32, 128, 512, 1024]),
    'momentum':      ContinuousParameter(0.9, 0.99),
    'optimizer':     CategoricalParameter(['sgd', 'adam'])
}

objective_metric_name = 'val_acc'
objective_type = 'Maximize'
metric_definitions = [{'Name': 'val_acc', 'Regex': 'val_acc: ([0-9\\.]+)'}]

tuner = HyperparameterTuner(tf_estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            strategy='Random',
                            max_jobs=16,
                            max_parallel_jobs=4,
                            objective_type=objective_type)

In [5]:
tuner.fit({'training': train_path,'validation': val_path,'eval': eval_path}, job_name=job_name)

In [6]:
# tuner_predictor = tuner.deploy(initial_instance_count=1, 
#                                instance_type='ml.c5.large')