# Amazon SageMaker Notebook for ProcGen Starter Kit with Single Instance

In [17]:
import os
import yaml

import sagemaker
from sagemaker.rl import RLEstimator, RLToolkit, RLFramework
import boto3

In [18]:
with open(os.path.join("config", "sagemaker_config.yaml")) as f:
    sagemaker_config = yaml.safe_load(f)

## Initialize Amazon SageMaker

In [19]:
sm_session = sagemaker.session.Session()
s3_bucket = sagemaker_config["S3_BUCKET"]

s3_output_path = 's3://{}/'.format(s3_bucket)
print("S3 bucket path: {}".format(s3_output_path))

S3 bucket path: s3://aan-bucket/


In [29]:
job_name_prefix = 'sm-ray-htm'

instance_type="ml.p3.2xlarge"

role = sagemaker.get_execution_role()
print(role)

arn:aws:iam::056313310996:role/service-role/sagemaker-sandbox-exection-role


In [21]:
# Change local_mode to True if you want to do local training within this Notebook instance
# Otherwise, we'll spin-up a SageMaker training instance to handle the training

local_mode = False

if local_mode:
    instance_type = 'local'
else:
    instance_type = sagemaker_config["CPU_TRAINING_INSTANCE"]
    
# If training locally, do some Docker housekeeping..
if local_mode:
    !/bin/bash source/common/setup.sh

# Configure the framework you want to use

Set `framework` to `"tf"` or `"torch"` for tensorflow or pytorch respectively.

You will also have to edit your entry point i.e., `train-sagemaker.py` with the configuration parameter `"use_pytorch"` to match the framework that you have selected.

In [22]:
framework = "torch"

# Train your model here

### Train the RL model using the Python SDK Script mode

If you are using local mode, the training will run on the notebook instance. 

When using SageMaker for training, you can select a GPU or CPU instance. The RLEstimator is used for training RL jobs.

1. Specify the source directory where the environment, presets and training code is uploaded.
2. Specify the entry point as the training code
3. Specify the custom image to be used for the training environment.
4. Define the training parameters such as the instance count, job name, S3 path for output and job name.
5. Define the metrics definitions that you are interested in capturing in your logs. These can also be visualized in CloudWatch and SageMaker Notebooks.

*[Choose](https://github.com/aws/sagemaker-rl-container#rl-images-provided-by-sagemaker) which docker image to use based on the instance type.* 
For this notebook, it has to be a container with Ray 0.8.5 and TensorFlow 2.1.0 to be consistent with the AICrowd ProcGen starter kit. 

If you prefer to use PyTorch, it is recommended to update your notebook kernel to `conda_pytorch_p36`. You would need to substitute for the corresponding container listed on Amazon SageMaker Reinforcement Learning documentation. In addition, you will need to ensure your starter kit is modified to train using PyTorch.

In [30]:
cpu_or_gpu = 'gpu' if instance_type.startswith(('ml.p', 'ml.g')) else 'cpu'
aws_region = boto3.Session().region_name

custom_image_name = "462105765813.dkr.ecr.{}.amazonaws.com/sagemaker-rl-ray-container:ray-1.6.0-{}-{}-py36".format(aws_region, framework, cpu_or_gpu)
custom_image_name

'462105765813.dkr.ecr.us-west-2.amazonaws.com/sagemaker-rl-ray-container:ray-1.6.0-torch-gpu-py36'

Metrics still work in progress. Currently REGEX is not working with SageMaker env

In [None]:
metric_definitions =  [
    {'Name': 'training_iteration', 'Regex': 'Result for PPO_.*training_iteration: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, 
    {'Name': 'episodes_total', 'Regex': 'Result for PPO_.*episodes_total: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, 
    {'Name': 'num_steps_trained', 'Regex': 'Result for PPO_.*num_steps_trained: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, 
    {'Name': 'timesteps_total', 'Regex': 'Result for PPO_.*timesteps_total: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
    {'Name': 'training_iteration', 'Regex': 'Result for PPO_.*training_iteration: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},

    {'Name': 'episode_reward_max', 'Regex': 'Result for PPO_.*episode_reward_max: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, 
    {'Name': 'episode_reward_mean', 'Regex': 'Result for PPO_.*episode_reward_mean: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, 
    {'Name': 'episode_reward_min', 'Regex': 'Result for PPO_.*episode_reward_min: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
] 

### Run the RL estimator

There are 16 environments to choose from. You can run the RL estimator on multiple environments by proving a list of environments as well. The RL estimator will start the training job. This will take longer compared to the above cells, be patient. You can monitor the status of your training job from the console as well, go to Amazon SageMaker > Training jobs. The most recent job will be at the top.

In [77]:

estimator = RLEstimator(entry_point="train-sagemaker.py",
                        source_dir='src',
                        dependencies=["src/utils", "src/common/"],
                        image_uri=custom_image_name,
                        role=role,
                        max_run=172800,
                        instance_type=instance_type,
                        instance_count=1,
                        output_path=s3_output_path,
                        base_job_name=job_name_prefix + "-" + 'tiger',
                        metric_definitions=metric_definitions,
                        debugger_hook_config=False,
                        hyperparameters={
                            "rl.training.run": "PPO",
                            "rl.training.stop.episodes_total": 10000,
                            "rl.training.config.framework": "torch",
#                             "rl.training.config.log_level": "WARN",
                            "rl.training.config.env": "drug_runner",
                            "rl.training.config.gamma": 0.99,
                            "rl.training.config.use_gae": True,
                            "rl.training.config.lambda": 0.9,
                            "rl.training.config.kl_coeff": 1.0,
                            "rl.training.config.num_sgd_iter": 20,
                            "rl.training.config.compress_observations": True,
                            "rl.training.config.lr": 2e-05,
                            "rl.training.config.grad_clip": 0.001,
                            "rl.training.config.clip_param": 0.4,
                            "rl.training.config.vf_loss_coeff": 0.25,
                            "rl.training.config.sgd_minibatch_size": 8,
                            "rl.training.config.train_batch_size": 64,
                            "rl.training.config.num_workers": 2,
                            "rl.training.config.num_cpus_per_worker": 3,
                            "rl.training.config.num_gpus_per_worker": 0,
                            "rl.training.config.num_envs_per_worker": 1,
                            "rl.training.config.num_gpus": 1,
                            "rl.training.config.rollout_fragment_length": 16,
                            "rl.training.config.batch_mode": "truncate_episodes",
                        }
                    )

estimator.fit(wait=False)

print(estimator.latest_training_job.job_name)

sm-ray-spearai-tiger-2021-12-10-04-13-13-739


## Evaluate the model