### UFO Hyperparameter Tuning

As an extension to our baseline Linear Learner model, we will create a hyperparameter tuning job.

In [None]:
from datetime import datetime
import sagemaker
from sagemaker import get_execution_role
from sagemaker.parameter import ParameterRange

In [None]:
role = get_execution_role()
boto3_sess = boto3.Session()
bucket = "tc-ml-cert-training"

Let's get the recordIO files for the training and validation data that is in S3

In [None]:
train_file = "ufo_sightings_train_recordIO_protobuf.data"
training_recordIO_protobuf_location = (
    f"s3://{bucket}/algorithms_lab/linearlearner_train/{train_file}"
)
print(
    f"The Pipe mode recordIO protobuf training data: {training_recordIO_protobuf_location}"
)
validation_file = "ufo_sightings_validatioin_recordIO_protobuf.data"
validate_recordIO_protobuf_location = (
    f"s3://{bucket}/algorithms_lab/linearlearner_validation/{validation_file}"
)
print(
    f"The Pipe mode recordIO protobuf validation data: {validate_recordIO_protobuf_location}"
)

In [None]:
# Create a training job name
dt_now = datetime.now().strftime("%Y%m%d%H%M%S")
tuning_job_name = f"linear-learner-tuning-job-{dt_now}"
# Here is where the model-artifact will be stored:
output_location = f"s3://{bucket}/optimization_evaluation_lab/linearlearner_hyperparameter_tuning_output"

In [None]:
container = sagemaker.image_uris.retrieve("linear-learner", boto3_sess.region_name, "1")
sess = sagemaker.Session()

In [None]:
# Setup the LinearLeaner algorithm from the ECR container
linear = sagemaker.estimator.Estimator(
    container,
    role,
    instance_count=1,
    instance_type="ml.c4.xlarge",
    output_path=output_location,
    sagemaker_session=sess,
    input_mode="Pipe",
)
# Setup the hyperparameters
linear.set_hyperparameters(
    feature_dim=22,  # number of attributes (minus the researchOutcome attribute)
    predictor_type="multiclass_classifier",  # type of classification problem
    num_classes=3,
)  # number of classes in out researchOutcome (explained, unexplained, probable)


# Launch a training job. This method calls the CreateTrainingJob API call
data_channels = {
    "train": training_recordIO_protobuf_location,
    "validation": validate_recordIO_protobuf_location,
}

In [None]:
# Specify some hyperparameter ranges for our tuning job:
hyperparameter_ranges = {
    "mini_batch_size": ParameterRange(
        min_value=500, max_value=5000, scaling_type="Linear"
    ),
    "wd": ParameterRange(min_value=0.0001, max_value=1.0, scaling_type="Logarithmic"),
    "l1": ParameterRange(min_value=0.0001, max_value=1.0, scaling_type="Logarithmic"),
    "learning_rate": ParameterRange(
        min_value=0.0001, max_value=1.0, scaling_type="Logarithmic"
    ),
}

In [None]:
hyperparameter_tuner = sagemaker.tuner.HyperparameterTuner(
    base_tuning_job_name=tuning_job_name,
    estimator=linear,
    objective_metric_name="validation:objective_loss",
    objective_type="Minimize",
    strategy="Bayesian",
    hyperparameter_ranges=hyperparameter_ranges,
    max_jobs=20,
    max_parallel_jobs=4,
)
hyperparameter_tuner.fit(data_channels)