# Guess a Number - SageMaker notebook with HyperParameter tuning

*By Michael Ludvig*

**Import the required modules**

In [None]:
import os
import sagemaker
from sagemaker.rl import RLEstimator, RLToolkit, RLFramework
from sagemaker_job.misc import get_execution_role, wait_for_s3_object

**Job configuration**

In [None]:
job_name_prefix = 'tune-guessnumber'

instance_type = "ml.c5.xlarge"

**Find out AWS resources**

In [None]:
# Figure out S3 bucket
sage_session = sagemaker.session.Session()
s3_bucket = sage_session.default_bucket()  
s3_output_path = 's3://{}/'.format(s3_bucket)
print("S3 bucket path: {}".format(s3_output_path))

# Figure out execution role
try:
    role = sagemaker.get_execution_role()
except:
    role = get_execution_role()

print("Using IAM role arn: {}".format(role))

In [None]:
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

# The hyperparameters that are the same for all jobs
static_hyperparameters = {
    # This 'improve_steps' name is mapped to a real name in sagemaker_job/train-coach.py
    'improve_steps': 100000,
}

# The hyperparameters we're going to tune
hyperparameter_ranges = {
    'rl.agent_params.algorithm.optimization_epochs': IntegerParameter(3, 20),
    'rl.agent_params.algorithm.discount': ContinuousParameter(0.9, 0.99),
    'rl.agent_params.algorithm.clip_likelihood_ratio_using_epsilon': ContinuousParameter(0.1, 0.3),
    "rl.learning_rate": ContinuousParameter(1e-5, 0.005),    # see sagemaker_job/train-coach.py for mapping
}

**RL Estimator with HyperparameterTuner** - here the tuning happens

In [None]:
max_jobs = 25
max_parallel_jobs = 5

metric_definitions = RLEstimator.default_metric_definitions(RLToolkit.COACH)

estimator = RLEstimator(entry_point="train-coach.py",
                        source_dir="sagemaker_job",
                        dependencies=["gym_guess_number"],
                        toolkit=RLToolkit.COACH,
                        toolkit_version='0.11.0',
                        framework=RLFramework.MXNET,
                        role=role,
                        train_instance_type=instance_type,
                        train_instance_count=1,
                        #train_max_run=1800,    # Max runtime in seconds (1800 sec = 30 min)
                        output_path=s3_output_path,
                        base_job_name=job_name_prefix,
                        metric_definitions=metric_definitions,
                        hyperparameters = static_hyperparameters,
                    )

tuner = HyperparameterTuner(estimator,
                            objective_metric_name='reward-training',
                            objective_type='Maximize',
                            hyperparameter_ranges=hyperparameter_ranges,
                            metric_definitions=metric_definitions,
                            max_jobs=max_jobs,
                            max_parallel_jobs=max_parallel_jobs,
                            base_tuning_job_name=job_name_prefix,
                           )

tuner.fit()

**Wait for completion**

1. Now go to [**AWS Sagemaker --> Training --> Hyperparameter tuning jobs**](https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/hyper-tuning-jobs) and check the progress and results.
2. Once finished open [GuessNumber-SageMaker.ipynb](GuessNumber-SageMaker.ipynb), update the hyperparameters there and re-run training.