In [1]:
import boto3
import sagemaker
import sys
import os
import re
import numpy as np
import subprocess
sys.path.append("common")
from misc import get_execution_role, wait_for_s3_object
from docker_utils import build_and_push_docker_image
from sagemaker.rl import RLEstimator, RLToolkit, RLFramework
from time import gmtime, strftime
import time
from IPython.display import Markdown
from markdown_helper import *

In [2]:
# Select the instance type
# Be aware of the costs of these training instances. They will run as long as the training duration
# We use ml.c4.2xlarge or ml.c5.2xlarge in the AWS DeepRacer console


instance_type = "ml.c4.2xlarge"
# instance_type = "ml.p2.xlarge"


# Starting SageMaker session
sage_session = sagemaker.session.Session()

# Create unique job name.
job_name_prefix = 'deepracer-notebook'

# Duration of job in seconds (1 hours)
job_duration_in_seconds = 3600 * 1

# AWS Region
aws_region = sage_session.boto_region_name
if aws_region not in ["us-west-2", "us-east-1", "eu-west-1"]:
    raise Exception("This notebook uses RoboMaker which is available only in US East (N. Virginia),"
                    "US West (Oregon) and EU (Ireland). Please switch to one of these regions.")

In [3]:
# S3 bucket
s3_bucket = sage_session.default_bucket()

# SDK appends the job name and output folder
s3_output_path = 's3://{}/'.format(s3_bucket)

# Ensure that the S3 prefix contains the keyword 'sagemaker'
s3_prefix = job_name_prefix + "-sagemaker-" + strftime("%y%m%d-%H%M%S", gmtime())

# Get the AWS account id of this account
sts = boto3.client("sts")
account_id = sts.get_caller_identity()['Account']

# print("Using s3 bucket {}".format(s3_bucket))
# print("Model checkpoints and other metadata will be stored at: \ns3://{}/{}".format(s3_bucket, s3_prefix))

In [4]:
try:
    sagemaker_role = sagemaker.get_execution_role()
except:
    sagemaker_role = get_execution_role('sagemaker')

print("Using Sagemaker IAM role arn: \n{}".format(sagemaker_role))

Using Sagemaker IAM role arn: 
arn:aws:iam::191072206499:role/service-role/AmazonSageMaker-ExecutionRole-20200320T163119


In [5]:
ec2 = boto3.client('ec2')

#
# Check if the user has Deepracer-VPC and use that if its present. This will have all permission.
# This VPC will be created when you have used the Deepracer console and created one model atleast
# If this is not present. Use the default VPC connnection
#
deepracer_security_groups = [group["GroupId"] for group in ec2.describe_security_groups()['SecurityGroups']\
                             if group['GroupName'].startswith("aws-deepracer")]
if(deepracer_security_groups):
    print("Using the DeepRacer VPC stacks")
    deepracer_vpc = [vpc['VpcId'] for vpc in ec2.describe_vpcs()['Vpcs'] \
                     if "Tags" in vpc for val in vpc['Tags'] \
                     if val['Value'] == 'deepracer-vpc'][0]
    deepracer_subnets = [subnet["SubnetId"] for subnet in ec2.describe_subnets()["Subnets"] \
                         if subnet["VpcId"] == deepracer_vpc]
else:
    print("Using the default VPC stacks")
    deepracer_vpc = [vpc['VpcId'] for vpc in ec2.describe_vpcs()['Vpcs'] if vpc["IsDefault"] == True][0]

    deepracer_security_groups = [group["GroupId"] for group in ec2.describe_security_groups()['SecurityGroups'] \
                                 if 'VpcId' in group and group["GroupName"] == "deepracer-experiment" and group["VpcId"] == deepracer_vpc]

    deepracer_subnets = [subnet["SubnetId"] for subnet in ec2.describe_subnets()["Subnets"] \
                         if subnet["VpcId"] == deepracer_vpc and subnet['DefaultForAz']==True]

print("Using VPC:", deepracer_vpc)
print("Using security group:", deepracer_security_groups)
print("Using subnets:", deepracer_subnets)

Using the DeepRacer VPC stacks
Using VPC: vpc-0a015892e03bcd4f1
Using security group: ['sg-04b73203eee4e4dae']
Using subnets: ['subnet-0d3431d7c5ce22379', 'subnet-079730a74022dccba', 'subnet-004ef9ce8055c5ef4', 'subnet-0be63b464b786ba0a', 'subnet-0114f5ddffc299b42', 'subnet-03b2d579b1e808fb6']


In [6]:
metric_definitions = [
    # Training> Name=main_level/agent, Worker=0, Episode=19, Total reward=-102.88, Steps=19019, Training iteration=1
    {'Name': 'reward-training',
     'Regex': '^Training>.*Total reward=(.*?),'},
    
    # Policy training> Surrogate loss=-0.32664725184440613, KL divergence=7.255815035023261e-06, Entropy=2.83156156539917, training epoch=0, learning_rate=0.00025
    {'Name': 'ppo-surrogate-loss',
     'Regex': '^Policy training>.*Surrogate loss=(.*?),'},
     {'Name': 'ppo-entropy',
     'Regex': '^Policy training>.*Entropy=(.*?),'},
   
    # Testing> Name=main_level/agent, Worker=0, Episode=19, Total reward=1359.12, Steps=20015, Training iteration=2
    {'Name': 'reward-testing',
     'Regex': '^Testing>.*Total reward=(.*?),'},
]

In [7]:
from sagemaker.debugger import Rule, DebuggerHookConfig, TensorBoardOutputConfig, CollectionConfig, rule_configs

rules = [
#     Rule.sagemaker(rule_configs.dead_relu()),
#     Rule.sagemaker(rule_configs.exploding_tensor()),
#     Rule.sagemaker(rule_configs.poor_weight_initialization()),
#     Rule.sagemaker(rule_configs.saturated_activation()),
#     Rule.sagemaker(rule_configs.vanishing_gradient()),
#     Rule.sagemaker(rule_configs.weight_update_ratio()),
#     Rule.sagemaker(rule_configs.all_zero()),
#     Rule.sagemaker(rule_configs.class_imbalance()),
#     Rule.sagemaker(rule_configs.confusion()),
    Rule.sagemaker(
        base_config=rule_configs.loss_not_decreasing(),
        collections_to_save=[ 
            CollectionConfig(
                name="losses", 
                parameters={
                      "train.save_interval": "50"}    
            )
        ]
    )
#     Rule.sagemaker(rule_configs.loss_not_decreasing()),
#     Rule.sagemaker(rule_configs.overfit()),
#     Rule.sagemaker(rule_configs.overtraining()),
#     Rule.sagemaker(rule_configs.similar_across_runs()),
#     Rule.sagemaker(rule_configs.tensor_variance()),
#     Rule.sagemaker(rule_configs.unchanged_tensor()),
]

debugger_hook_config = DebuggerHookConfig(
    hook_parameters={
        "save_all": "True",
        "gloabal.save_interval": '1'
    },
    collection_configs=[
        CollectionConfig(
            name="losses",
            parameters={
                "include_regex": ".*"
            }
        )
    ]
)

In [8]:
custom_image_name = "191072206499.dkr.ecr.us-east-1.amazonaws.com/deepracer-sagemaker:cpu-smdebug"
# custom_image_name = "220744436658.dkr.ecr.us-east-1.amazonaws.com/deepracersm:latest"

hyperparameters = {
    "s3_bucket": s3_bucket,
    "s3_prefix": s3_prefix,
    "aws_region": aws_region,
    "model_metadata_s3_key": "s3://{}/custom_files/model_metadata.json".format(s3_bucket),
    "RLCOACH_PRESET": "deepracer",
    "batch_size": 64,
    "beta_entropy": 0.01,
    "discount_factor": 0.99,
    "e_greedy_value": 0.05,
    "epsilon_steps": 10000,
    "exploration_type": "categorical",
    "loss_type": "huber",
    "lr": 0.0003,
    "num_episodes_between_training": 20,
    "num_epochs": 10,
    "stack_size": 1,
    "term_cond_avg_score": 100000.0,
    "term_cond_max_episodes": 10000
}

estimator = RLEstimator(entry_point="training_worker.py",
                        source_dir='src',
                        
                        
                        image_name=custom_image_name,
                        
#                         toolkit=RLToolkit.COACH,
#                         toolkit_version='0.11.1',
#                         framework=RLFramework.TENSORFLOW,
                        
                        
                        dependencies=["common/sagemaker_rl"],
                        role=sagemaker_role,
                        train_instance_type=instance_type,
                        train_instance_count=1,
                        output_path=s3_output_path,
                        base_job_name=job_name_prefix,
                        metric_definitions=metric_definitions,
                        train_max_run=job_duration_in_seconds,
                        hyperparameters=hyperparameters,
                        subnets=deepracer_subnets,
                        security_group_ids=deepracer_security_groups,
                        
                        rules=rules,
#                         debugger_hook_config=debugger_hook_config
                    )

estimator.fit(wait=False)
job_name = estimator.latest_training_job.job_name
print("Training job: %s" % job_name)

Training job: deepracer-notebook-2020-07-10-12-39-21-565


In [9]:
robomaker = boto3.client("robomaker")

In [10]:
available_tracks = ["reinvent_base", # 0
                    "AWS_track", # 1
                    "Tokyo_Training_track", #2
                    "Virtual_May19_Train_track", #3 (london)
                    "reInvent2018_36inch", #4
                    "reInvent2018_mirror", #5
                    "reInvent2019_track"] #6

training_tracks_indices = [6]

num_simulation_workers = len(training_tracks_indices)


In [11]:
%%bash -s "$s3_bucket" "$s3_prefix" "$account_id"
sed "s/<bucket>/${1}/; s/<prefix>/${2}/; s/<account_id>/${3}/" ./training_params.yaml > ./new_training_params.yaml

In [12]:
s3_client = boto3.client('s3')

s3_client.upload_file(
    "./new_training_params.yaml",
    s3_bucket,
    "{prefix}/training_params.yaml".format(prefix = s3_prefix)
)

In [13]:
simulation_app_arn = "arn:aws:robomaker:us-east-1:191072206499:simulation-application/deepracer-simapp-77373afe-274f-4ebc-b5e2-9d67ccc51241/1576204587949"

envriron_vars = {
    "WORLD_NAME" : "reinvent_base",
    "ALTERNATE_DRIVING_DIRECTION" : "false",
    "SAGEMAKER_SHARED_S3_BUCKET" : s3_bucket,
    "SAGEMAKER_SHARED_S3_PREFIX" : s3_prefix,
    "TRAINING_JOB_ARN": job_name,
    "APP_REGION" : "us-east-1",
    "METRIC_NAME": "TrainingRewardScore",
    "METRIC_NAMESPACE": "AWSDeepRacer",
    "REWARD_FILE_S3_KEY" : "custom_files/reward.py",
    "MODEL_METADATA_FILE_S3_KEY" : "custom_files/model_metadata.json",
    "METRICS_S3_BUCKET": s3_bucket,
    "METRICS_S3_OBJECT_KEY": s3_prefix + "/training_metrics.json",
    "TARGET_REWARD_SCORE": "None",
    "ROBOMAKER_SIMULATION_JOB_ACCOUNT_ID": account_id,
    "S3_YAML_NAME" : "training_params.yaml",
    "MIN_EVAL_TRIALS" : "5"
}

simulation_application = {"application":simulation_app_arn,
                          "launchConfig": {"packageName": "deepracer_simulation_environment",
                                           "launchFile": "distributed_training.launch",
                                           "environmentVariables": envriron_vars}
                         }

vpcConfig = {"subnets": deepracer_subnets,
             "securityGroups": deepracer_security_groups,
             "assignPublicIp": True}

responses = []
for job_no in range(num_simulation_workers):
#     simulation_application["launchConfig"]["environmentVariables"]["WORLD_NAME"] = available_tracks[training_tracks_indices[job_no]]
    client_request_token = strftime("%Y-%m-%d-%H-%M-%S", gmtime())
    response =  robomaker.create_simulation_job(iamRole=sagemaker_role,
                                            clientRequestToken=client_request_token,
                                            maxJobDurationInSeconds=job_duration_in_seconds,
                                            failureBehavior="Fail",
                                            simulationApplications=[simulation_application],
                                            vpcConfig=vpcConfig
                                            )
    print(response)
    responses.append(response)


print("Created the following jobs:")
job_arns = [response["arn"] for response in responses]
for response in responses:
    print("Job ARN", response["arn"]) 
    
    

{'ResponseMetadata': {'RequestId': 'bd26bbf2-379a-4338-9db4-d2567f862df4', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Fri, 10 Jul 2020 12:39:24 GMT', 'content-type': 'application/json', 'content-length': '3808', 'connection': 'keep-alive', 'x-amzn-requestid': 'bd26bbf2-379a-4338-9db4-d2567f862df4', 'x-amz-apigw-id': 'PdQrUGlfIAMFedQ=', 'x-amzn-trace-id': 'Root=1-5f08617b-ae1575067a8dacddd1d9a2cf'}, 'RetryAttempts': 0}, 'arn': 'arn:aws:robomaker:us-east-1:191072206499:simulation-job/sim-8w0d9b954b9p', 'status': 'Pending', 'lastUpdatedAt': datetime.datetime(2020, 7, 10, 12, 39, 24, tzinfo=tzlocal()), 'failureBehavior': 'Fail', 'clientRequestToken': '2020-07-10-12-39-23', 'loggingConfig': {'recordAllRosTopics': False}, 'maxJobDurationInSeconds': 3600, 'simulationTimeMillis': 0, 'iamRole': 'arn:aws:iam::191072206499:role/service-role/AmazonSageMaker-ExecutionRole-20200320T163119', 'simulationApplications': [{'application': 'arn:aws:robomaker:us-east-1:191072206499:simulation-applicati

In [14]:
from smdebug.trials import create_trial

s3_output_path = estimator.latest_job_debugger_artifacts_path()
trial = create_trial(s3_output_path)

[2020-07-10 12:43:49.653 f2b876e473a2:146 INFO s3_trial.py:42] Loading trial debug-output at path s3://sagemaker-us-east-1-191072206499/deepracer-notebook-2020-07-10-12-39-21-565/debug-output


In [16]:
trial.tensor_names()

['main_level/agent/main/online/network_0/v_values_head_0/Mean:0',
 'main_level/agent/main/online/network_1/ppo_head_0/Neg:0',
 'main_level/agent/main/online/network_1/ppo_head_0/Neg_1:0',
 'main_level/agent/main/target/network_1/ppo_head_0/Neg_1:0']

In [17]:
trial.tensor('main_level/agent/main/target/network_1/ppo_head_0/Neg_1:0').values()

{100: array([-0.01945355], dtype=float32),
 200: array([-0.01945326], dtype=float32)}

In [16]:
!pip install matplotlib seaborn

Collecting matplotlib
  Using cached matplotlib-3.2.2-cp36-cp36m-manylinux1_x86_64.whl (12.4 MB)
Collecting seaborn
  Using cached seaborn-0.10.1-py3-none-any.whl (215 kB)
Collecting kiwisolver>=1.0.1
  Using cached kiwisolver-1.2.0-cp36-cp36m-manylinux1_x86_64.whl (88 kB)
Collecting cycler>=0.10
  Using cached cycler-0.10.0-py2.py3-none-any.whl (6.5 kB)
Installing collected packages: kiwisolver, cycler, matplotlib, seaborn
Successfully installed cycler-0.10.0 kiwisolver-1.2.0 matplotlib-3.2.2 seaborn-0.10.1
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [18]:
import matplotlib.pyplot as plt
import seaborn as sns
import re

def get_data(trial, tname):
    """
    For the given tensor name, walks though all the iterations
    for which you have data and fetches the values.
    Returns the set of steps and the values.
    """
    tensor = trial.tensor(tname)
    steps = tensor.steps()
    vals = [tensor.value(s) for s in steps]
    return steps, vals

def plot_collection(trial, collection_name, regex='.*', figsize=(8, 6)):
    """
    Takes a `trial` and a collection name, and 
    plots all tensors that match the given regex.
    """
    fig, ax = plt.subplots(figsize=figsize)
    sns.despine()

    tensors = trial.collection(collection_name).tensor_names

    for tensor_name in sorted(tensors):
        if re.match(regex, tensor_name) and "ppo_head" in tensor_name:
            try:
                steps, data = get_data(trial, tensor_name)
                ax.plot(steps, data, label=tensor_name)
            except:
                pass

    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    ax.set_xlabel('Iteration')

In [1]:
 plot_collection(trial, "losses")

NameError: name 'plot_collection' is not defined

In [14]:
display(Markdown(generate_robomaker_links(job_arns, aws_region)))

> Click on the following links for visualization of simulation jobs on RoboMaker Console
- [Simulation 1](https://us-east-1.console.aws.amazon.com/robomaker/home?region=us-east-1#simulationJobs/sim-rvqjp5r2hh8s)  

You can click on Gazebo after you open the above link to start the simulator.

In [15]:
estimator.latest_training_job.rule_job_summary()

[{'RuleConfigurationName': 'LossNotDecreasing',
  'RuleEvaluationStatus': 'InProgress',
  'LastModifiedTime': datetime.datetime(2020, 7, 6, 7, 34, 24, 314000, tzinfo=tzlocal())}]

In [16]:
# This utility gives the link to monitor the CW event
def _get_rule_job_name(training_job_name, rule_configuration_name, rule_job_arn):
        """Helper function to get the rule job name"""
        return "{}-{}-{}".format(
            training_job_name[:26], rule_configuration_name[:26], rule_job_arn[-8:]
        )
    
def _get_cw_url_for_rule_job(rule_job_name, region):
    return "https://{}.console.aws.amazon.com/cloudwatch/home?region={}#logStream:group=/aws/sagemaker/ProcessingJobs;prefix={};streamFilter=typeLogStreamPrefix".format(region, region, rule_job_name)


def get_rule_jobs_cw_urls(estimator):
    region = boto3.Session().region_name
    training_job = estimator.latest_training_job
    training_job_name = training_job.describe()["TrainingJobName"]
    rule_eval_statuses = training_job.describe()["DebugRuleEvaluationStatuses"]
    
    result={}
    for status in rule_eval_statuses:
        if status.get("RuleEvaluationJobArn", None) is not None:
            rule_job_name = _get_rule_job_name(training_job_name, status["RuleConfigurationName"], status["RuleEvaluationJobArn"])
            result[status["RuleConfigurationName"]] = _get_cw_url_for_rule_job(rule_job_name, region)
    return result

get_rule_jobs_cw_urls(estimator)

{}

### Clean up RoboMaker and SageMaker training job

Execute the cells below if you want to kill RoboMaker and SageMaker job.

In [None]:
# # Cancelling robomaker job
# for job_arn in job_arns:
#     robomaker.cancel_simulation_job(job=job_arn)

# # Stopping sagemaker training job
# sage_session.sagemaker_client.stop_training_job(TrainingJobName=estimator._current_job_name)

### Clean Up Simulation Application Resource

In [None]:
# robomaker.delete_simulation_application(application=simulation_app_arn)

### Clean your S3 bucket (Uncomment the awscli commands if you want to do it)

In [None]:
## Uncomment if you only want to clean the s3 bucket
# sagemaker_s3_folder = "s3://{}/{}".format(s3_bucket, s3_prefix)
# !aws s3 rm --recursive {sagemaker_s3_folder}

# robomaker_s3_folder = "s3://{}/{}".format(s3_bucket, job_name)
# !aws s3 rm --recursive {robomaker_s3_folder}

# robomaker_sim_app = "s3://{}/{}".format(s3_bucket, 'robomaker')
# !aws s3 rm --recursive {robomaker_sim_app}

# model_output = "s3://{}/{}".format(s3_bucket, s3_bucket)
# !aws s3 rm --recursive {model_output}

### Clean the docker images
Remove this only when you want to completely remove the docker or clean up the space of the sagemaker instance

In [None]:
# !docker rmi -f $(docker images -q)