In [None]:
!rm -rf mountain && mkdir -p mountain

In [None]:
%%writefile mountain/train.py
import sys
import subprocess
# we need a special package for cleaning our data, lets pip install it first
subprocess.check_call([sys.executable, "-m", "pip", "install", "sagemaker-training==3.9.2", "pyglet==1.5.16"])

import os
import copy
import traceback
import argparse
import glob
import re
import sys
import json
import gym
import ray
import ray.tune
from ray.rllib.agents.registry import get_agent_class
from shutil import copyfile
from sagemaker_training import environment, intermediate_output, logging_config, params, files

def start_file_sync(env):
    global logger, intermediate_sync
    ## this service will copy all the files, stored in the intermediate dir, to S3
    region = os.environ.get("AWS_REGION", os.environ.get(params.REGION_NAME_ENV))
    s3_endpoint_url = os.environ.get(params.S3_ENDPOINT_URL, None)

    logger.info("Starting intermediate sync. %s: %s - %s" % (region, env.sagemaker_s3_output(), s3_endpoint_url))
    intermediate_sync = intermediate_output.start_sync(
        env.sagemaker_s3_output(), region, endpoint_url=s3_endpoint_url
    )
    
def get_latest_checkpoint(env, algo):
    global logger
    logger.info("Latest checkpoint")
    # get the latest experiment
    experiments = glob.glob(os.path.join(env.output_intermediate_dir,'training', f'{algo}*'))
    experiments.sort(key=lambda x: [int(c) if c.isdigit() else c for c in ''.join(x.replace('-','').split('_')[-2:])])

    if len(experiments) > 0:
        exp_name = experiments[-1]

        chkpts = [c for c in glob.glob(f'{exp_name}/checkpoint*')]
        chkpts.sort(key=lambda x: [int(c) if c.isdigit() else c for c in re.split('(\d+)', x)])

        if len(chkpts) == 0: raise Exception("No checkpoint found!")
        ckpt_path=chkpts[-1]
        ckpt_meta_filename=ckpt_path.split('/')[-1].split('_')
        ckpt_meta_filename=f'{ckpt_meta_filename[0]}-{int(ckpt_meta_filename[1])}'
        logger.info(f'{ckpt_path}/{ckpt_meta_filename}')
        return ckpt_path, ckpt_meta_filename

def save_model(env_vars, experiment_params):
    global logger
    config = copy.deepcopy(experiment_params)['training']['config']

    config["monitor"] = False
    config["num_workers"] = 1
    config["num_gpus"] = 0
    logger.info(experiment_params)
    algo = experiment_params['training']['run']
    env_name = experiment_params['training']['env']
    logger.info(f'{algo} - {env_name}')
    cls = get_agent_class(algo)        
    agent = cls(env=env_name, config=config)
    
    ckpt_path, ckpt_meta_filename = get_latest_checkpoint(env_vars, algo)
    
    logger.info('Restoring agent...')
    agent.restore(os.path.join(ckpt_path, ckpt_meta_filename))
    logger.info('Exporting model...')
    agent.export_policy_model(os.path.join(env_vars.model_dir, "1"))
    
if __name__ == "__main__":
    env_vars = environment.Environment()
    parser = argparse.ArgumentParser()
    logging_config.configure_logger(env_vars.log_level)
    
    parser.add_argument("--eager", type=bool, default=False)
    parser.add_argument("--log-level", type=int, default=0)
    parser.add_argument("--gamma", type=float, default=0.995)
    parser.add_argument("--learning-rate", type=float, default=0.0001)
    parser.add_argument("--kl-coeff", type=float, default=1.0)
    parser.add_argument("--num-sgd-iter", type=int, default=20)
    parser.add_argument("--sgd-minibatch-size", type=int, default=1000)
    parser.add_argument("--train-batch-size", type=int, default=25000)
    parser.add_argument("--record-videos", type=bool, default=False)
    parser.add_argument("--num-workers", type=int, default=max(env_vars.num_cpus-1, 3))
    parser.add_argument("--num-gpus", type=int, default=env_vars.num_gpus)
    parser.add_argument("--batch-mode", type=str, default="complete_episodes")
    parser.add_argument("--episode-reward-mean", type=int, default=18)
    args,unknown = parser.parse_known_args()
    
    logger = logging_config.get_logger()
    intermediate_sync = None

    env_name = "MountainCarContinuous-v0"

    experiment_params = {
        "training": {
            "env": env_name,
            "run": "PPO",
            "stop": {
                "episode_reward_mean": args.episode_reward_mean,
            },
            "local_dir": env_vars.output_intermediate_dir,
            "checkpoint_at_end": True,
            "checkpoint_freq": 10,
            "config": {
                "log_level": args.log_level,
                "gamma": args.gamma,
                "kl_coeff": args.kl_coeff,
                "num_sgd_iter": args.num_sgd_iter,
                "lr": args.learning_rate,
                "sgd_minibatch_size": args.sgd_minibatch_size,
                "train_batch_size": args.train_batch_size,
                "monitor": args.record_videos,
                "model": {
                    # https://docs.ray.io/en/master/rllib-models.html#default-model-config-settings
                    "free_log_std": True
                },
                "num_workers": args.num_workers,
                "num_gpus": args.num_gpus,
                "batch_mode": args.batch_mode
            }
        }
    }
    
    try:
        start_file_sync(env_vars)
        # main program
        ray.init()
        ray.tune.register_env(env_name, lambda x: gym.make(env_name))
        ray.tune.run_experiments(copy.deepcopy(experiment_params))
        save_model(env_vars, experiment_params)
        ray.shutdown()
        
        files.write_success_file()
        logger.info("Reporting training SUCCESS")
    except Exception as e:
        failure_msg = "framework error: \n%s\n%s" % (traceback.format_exc(), str(e))
        logger.error("Reporting training FAILURE: %s" % failure_msg)

        files.write_failure_file(failure_msg)
    finally:
        if intermediate_sync:
            intermediate_sync.join()

In [None]:
import sagemaker
import boto3
# S3 bucket
sagemaker_session = sagemaker.session.Session()
s3_bucket = sagemaker_session.default_bucket()  
s3_output_path = 's3://{}/'.format(s3_bucket)

# create a descriptive job name 
aws_region = boto3.Session().region_name
role = sagemaker.get_execution_role()
print("S3 bucket path: {}".format(s3_output_path))

In [None]:
from sagemaker.rl import RLEstimator, RLToolkit, RLFramework

image_name="462105765813.dkr.ecr.us-east-1.amazonaws.com/sagemaker-rl-ray-container:ray-1.1.0-tf-gpu-py36"
estimator = RLEstimator(
    image_uri=image_name,
    entry_point="train.py",
    source_dir='mountain',
    role=role,
    instance_type='ml.p3.2xlarge',
    #instance_type='local_gpu',
    instance_count=1,
    output_path=s3_output_path,
    metric_definitions=RLEstimator.default_metric_definitions(RLToolkit.RAY),
    hyperparameters={
        "log-level": 20,
        "gamma": 0.995,
        "learning-rate": 0.0001,
        "kl-coeff": 1.0,
        "num-sgd-iter": 20,
        "sgd-minibatch-size": 1000,
        "rain-batch-size": 25000,
        "record-videos": True,
        "batch-mode": "complete_episodes",
        "episode-reward-mean": 80
    }
)

In [None]:
!sudo rm -rf /tmp/tmp*
estimator.fit(wait=True)
job_name = estimator.latest_training_job.job_name
print("Training job: %s" % job_name)

In [None]:
import os

print("Job name: {}".format(job_name))

s3_url = "s3://{}/{}".format(s3_bucket,job_name)

output_tar_key = "{}/output/output.tar.gz".format(job_name)

intermediate_folder_key = "{}/output/intermediate/".format(job_name)
output_url = "s3://{}/{}".format(s3_bucket, output_tar_key)
intermediate_url = "s3://{}/{}".format(s3_bucket, intermediate_folder_key)

print("S3 job path: {}".format(s3_url))
print("Output.tar.gz location: {}".format(output_url))
print("Intermediate folder path: {}".format(intermediate_url))
    
tmp_dir = "/tmp/{}".format(job_name)
os.system("mkdir {}".format(tmp_dir))
print("Create local folder {}".format(tmp_dir))

In [None]:
import os
import glob
from IPython.display import Video, display

path = "%s%s" % (intermediate_url, "training/")
!rm -rf $tmp_dir
!aws s3 sync --quiet $path $tmp_dir

sub_dir = []
for i in os.listdir(tmp_dir): 
    full_path=os.path.join(tmp_dir, i)
    if os.path.isdir(full_path): sub_dir.append(full_path)
if len(sub_dir) > 0:
    video_path = sorted(glob.glob(sub_dir[0] + '/*.mp4'))[-1]
    print(video_path)
    !cp $video_path video.mp4
    display(Video("video.mp4"))
else:
    print('There is no available video yet...')