# Running RAPIDS hyperparameter experiments at scale on Amazon SageMaker

### Import packages and create Amazon SageMaker and Boto3 sessions

In [None]:
import sagemaker
import time
import boto3

In [None]:
execution_role = sagemaker.get_execution_role()
session = sagemaker.Session()

region = boto3.Session().region_name
account = boto3.client('sts').get_caller_identity().get('Account')

In [None]:
account, region

### Upload the higgs-boson dataset to s3 bucket

In [None]:
!mkdir dataset
!wget -P dataset https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz
!gunzip dataset/HIGGS.csv.gz

In [None]:
s3_data_dir = session.upload_data(path='dataset', key_prefix='dataset/higgs-dataset')

In [None]:
s3_data_dir

### Download latest RAPIDS container with cloud-ml examples

Extend the container by copying the training script and installing [SageMaker Training toolkit](https://github.com/aws/sagemaker-training-toolkit) to makes RAPIDS compatible with SageMaker

In [None]:
# estimator_info = {
#     'rapids_container': 'rapidsai/rapidsai-cloud-ml:latest',
#     'ecr_image': 'sagemaker-rapids-cloud-ml:latest',
#     'ecr_repository': 'sagemaker-rapids-cloud-ml'
# }

In [None]:
estimator_info = {
    'rapids_container':'rapidsai/rapidsai-nightly:22.12-cuda11.5-runtime-ubuntu18.04-py3.9',
    'ecr_image':'sagemaker-rapids-nightly',
    'ecr_repository':'sagemaker-rapids-nightly'
}

In [None]:
%%time
!docker pull {estimator_info['rapids_container']}

In [None]:
!cat docker/Dockerfile

In [None]:
# !docker build -t sagemaker-rapids:latest docker
!docker build -t sagemaker-rapids-nightly docker

In [None]:
!docker images

### Publish to Elastic Container Registry

Note: SageMaker does not support using training images from private docker registry (ie. DockerHub), so we need to push
the SageMaker-compatible \
RAPIDS container to the Amazon Elastic Container Registry.

In [None]:
ECR_container_fullname = f"{account}.dkr.ecr.{region}.amazonaws.com/{estimator_info['ecr_image']}"

In [None]:
ECR_container_fullname 

In [None]:
!docker tag {estimator_info['rapids_container']} {ECR_container_fullname}

In [None]:
print( f"source      : {estimator_info['rapids_container']}\n"
       f"destination : {ECR_container_fullname}")

In [None]:
!docker images

In [None]:
!aws ecr create-repository --repository-name {estimator_info['ecr_repository']}
!$(aws ecr get-login --no-include-email --region {region})

In [None]:
!docker push {ECR_container_fullname}

##### Define hyperparameters: start with best guess values
Find the full list of Random Forest hyperparameters here in the RAPIDS doc page:
<br>
https://docs.rapids.ai/api/cuml/stable/api.html#random-forest

In [None]:
hyperparams={ 
    'n_estimators'       : 15,
    'max_depth'          : 5,
    'n_bins'             : 8,
    'split_criterion'    : 0,      # GINI:0, ENTROPY:1
    'bootstrap'          : 0,      # true: sample with replacement, false: sample without replacement
    'max_leaves'         : -1,     # unlimited leaves
    'max_features'       : 0.2, 
}

In [None]:
from sagemaker.estimator import Estimator

rapids_estimator = Estimator(image_uri=ECR_container_fullname,
                          role=execution_role,
                          instance_count=1,
                          instance_type='ml.g4dn.4xlarge',
                          hyperparameters=hyperparams,
                          metric_definitions=[{'Name': 'test_acc', 'Regex': 'test_acc: ([0-9\\.]+)'}])

In [None]:
%%time
rapids_estimator.fit(inputs = s3_data_dir)

In [None]:
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

hyperparameter_ranges = {
    'n_estimators'        : IntegerParameter(10, 200), 
    'max_depth'           : IntegerParameter(1, 22),
    'n_bins'              : IntegerParameter(5, 24),
    'split_criterion'     : CategoricalParameter([0, 1]),
    'bootstrap'           : CategoricalParameter([True, False]),
    'max_features'        : ContinuousParameter(0.01, 0.5),
}

In [None]:
from sagemaker.estimator import Estimator

rapids_estimator = Estimator(image_uri=image,
                          role=execution_role,
                          instance_count=1,
                          instance_type='ml.p3.2xlarge',
                          hyperparameters=hyperparams,
                          metric_definitions=[{'Name': 'test_acc', 'Regex': 'test_acc: ([0-9\\.]+)'}])

In [None]:
tuner = HyperparameterTuner(rapids_estimator,
                            objective_metric_name='test_acc',
                            hyperparameter_ranges=hyperparameter_ranges,
                            strategy='Bayesian',
                            max_jobs=1,
                            max_parallel_jobs=1,
                            objective_type='Maximize',
                            metric_definitions=[{'Name': 'test_acc', 'Regex': 'test_acc: ([0-9\\.]+)'}])

In [None]:
job_name = 'rapidsHPO' + time.strftime('%Y-%m-%d-%H-%M-%S-%j', time.gmtime())
tuner.fit({'dataset': s3_data_dir}, job_name=job_name)

## Clean up

- Delete S3 buckets and files you don't need
- Kill training jobs that you don't want running
- Delete container images and the repository you just created

In [None]:
aws ecr delete-repository --force --repository-name