# 05 - Running Sagemaker Hyperparameter optimisation with your ML Script

You want to test various hyperparameters by testing and evaluating your module multiple times. SageMaker can do this by creating many jobs in parallel.

In [1]:
import os
import boto3
from sagemaker import Session
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.tuner import HyperparameterTuner
from sagemaker.tuner import CategoricalParameter, ContinuousParameter
from sagemaker.analytics import HyperparameterTuningJobAnalytics

## AWS Session

In [2]:
boto3_session = boto3.Session(region_name=os.environ.get("DEMO_AWS_REGION"), profile_name=os.environ.get("DEMO_AWS_PROFILE_NAME"))

sagemaker_session = Session(boto_session=boto3_session)

account = os.environ.get("DEMO_AWS_ACCOUNT")  # sandbox-admin account
role = f"arn:aws:iam::{account}:role/service-role/AmazonSageMaker-ExecutionRole-20171129T145583"

## Upload data to S3

In [3]:
# Upload training data from local machine to S3
local_data_location = "../data"

data_location = sagemaker_session.upload_data(
    path=local_data_location, key_prefix="sagemaker_demo_data"
)

In [4]:
data_location

's3://sagemaker-eu-west-1-604842001064/sagemaker_demo_data'

## Run Script

In [5]:
# define hyper parameter ranges
metric_definitions = [
    {"Name": "test_roc_auc", "Regex": "test_roc_auc: (.+?);$"},
]

# define range of parameters to optimise for
params_to_optimise = {
    "penalty": CategoricalParameter(["l1", "l2"]),
    "C": ContinuousParameter(min_value=0.001, max_value=10.0, scaling_type="Logarithmic"),
}

In [6]:
# define how to run - using the last demo's script
sklearn = SKLearn(
    entry_point='../04_sagemaker_ml/dummy_ml_script_with_args_for_sagemaker.py',
    train_instance_type="ml.m5.large",
    role=role,
    sagemaker_session=sagemaker_session,
)

tuner = HyperparameterTuner(
    estimator=sklearn,
    max_jobs=10,
    max_parallel_jobs=3,
    hyperparameter_ranges=params_to_optimise,
    strategy="Bayesian",
    metric_definitions=metric_definitions,
    objective_metric_name="test_roc_auc",
    objective_type="Maximize",
    base_tuning_job_name="sklearn-demo-hyperopt",
)

In [7]:
# run script
tuner.fit({"train": data_location})

## Look at results

In [8]:
job_analytics = HyperparameterTuningJobAnalytics(
    sagemaker_session=sagemaker_session,
    hyperparameter_tuning_job_name="sklearn-demo-hyperop-200220-1015",
)
job_analytics_df = job_analytics.dataframe()

In [9]:
job_analytics_df

Unnamed: 0,C,penalty,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime
0,0.002017,"""l2""",sklearn-demo-hyperop-200220-1015-003-09a4e710,InProgress,,,
1,0.152386,"""l2""",sklearn-demo-hyperop-200220-1015-002-c7cb6be4,InProgress,,,
2,1.572507,"""l2""",sklearn-demo-hyperop-200220-1015-001-0e07e012,InProgress,,,
