# Multiple Models by Hyperparameters

## Training models

Previous activity: preparing and splitting data

In [1]:
import numpy as np
import pandas as pd
import sagemaker
from sagemaker import get_execution_role
from sagemaker.serializers import CSVSerializer

In [2]:
# Set the file directories
bucket_name = "sagemaker-loan-classification"
prefix = "xgboost"

# Set input and output paths for sagemaker
input_train = sagemaker.TrainingInput(
    s3_data=f"s3://{bucket_name}/{prefix}/train", content_type="csv")
input_test = sagemaker.TrainingInput(
    s3_data=f"s3://{bucket_name}/{prefix}/test", content_type="csv")
output_path = f"s3://{bucket_name}/{prefix}/saved_model"

In [3]:
# Build the estimator
def xgboost_fit_param(model_name, hyperparameters, use_spot_instances=False, max_run=3200, max_wait=None):
    name = f"xgboost-loan-{model_name}"
    print(f"____Running {name}")

    # Set checkpoint
    checkpoint = None
    if use_spot_instances == True:
        checkpoint = f"s3://{bucket_name}/{prefix}/checkpoints/{name}"
        print(f"___The checkpoint is saved to: {checkpoint}")
        max_wait = 7600

    # Set the container based on the version
    container = sagemaker.image_uris.retrieve("xgboost",
                                              sagemaker.Session().boto_region_name,
                                              version="1.2-2")

    xgb = sagemaker.estimator.Estimator(
        image_uri=container,
        role=sagemaker.get_execution_role(),
        instance_count=1,
        instance_type="ml.m5.xlarge",
        output_path=output_path,
        sagemaker_session=sagemaker.Session(),
        checkpoint_s3_uri=checkpoint,
        use_spot_instances=use_spot_instances,
        max_run=max_run,
        max_wait=max_wait,
        base_job_name=name,
        hyperparameters=hyperparameters,
    )

    # Fit the model
    xgb.fit(
        {
            "train": input_train,
            "validation": input_test
        }
    )

    print(f"____Finish running {xgb.latest_training_job.name}")

In [4]:
# Set the hyperparameters
hyperparameter_1 = {
    "colsample_bytree": 0.478,
    "gamma": 2.387,
    "eta": 0.175,
    "max_depth": 15,
    "min_child_weight": 7,
    "num_round": 86,
    "subsample": 0.80,
    "num_class": 2,
    "objective": "multi:softmax",
}

hyperparameter_2 = {
    "colsample_bytree": 0.1,
    "gamma": 0,
    "eta": 0.01,
    "max_depth": 3,
    "min_child_weight": 3,
    "num_round": 89,
    "subsample": 1,
    "num_class": 2,
    "objective": "multi:softmax",
}

In [5]:
xgboost_fit_param(model_name="model-1", hyperparameters=hyperparameter_1)

____Running xgboost-loan-model-1
2022-01-29 12:01:20 Starting - Starting the training job...
2022-01-29 12:01:22 Starting - Launching requested ML instancesProfilerReport-1643457680: InProgress
...
2022-01-29 12:02:15 Starting - Preparing the instances for training.........
2022-01-29 12:03:50 Downloading - Downloading input data
2022-01-29 12:03:50 Training - Downloading the training image.....[34m[2022-01-29 12:04:26.974 ip-10-0-145-53.ap-southeast-1.compute.internal:1 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2022-01-29:12:04:27:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2022-01-29:12:04:27:INFO] Failed to parse hyperparameter objective value multi:softmax to Json.[0m
[34mReturning the value itself[0m
[34m[2022-01-29:12:04:27:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2022-01-29:12:04:27:INFO] Running XGBoost Sagemaker in algorithm mode[0m
[34m[2022-01-29:12:04:27:INFO] Determined delimiter of CSV input is

In [6]:
xgboost_fit_param(model_name="model-2", hyperparameters=hyperparameter_2)

____Running xgboost-loan-model-2
2022-01-29 12:05:31 Starting - Starting the training job...
2022-01-29 12:05:54 Starting - Launching requested ML instancesProfilerReport-1643457931: InProgress
...
2022-01-29 12:06:26 Starting - Preparing the instances for training.........
2022-01-29 12:07:54 Downloading - Downloading input data...
2022-01-29 12:08:14 Training - Downloading the training image..[34m[2022-01-29 12:08:43.990 ip-10-0-217-112.ap-southeast-1.compute.internal:1 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2022-01-29:12:08:44:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2022-01-29:12:08:44:INFO] Failed to parse hyperparameter objective value multi:softmax to Json.[0m
[34mReturning the value itself[0m
[34m[2022-01-29:12:08:44:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2022-01-29:12:08:44:INFO] Running XGBoost Sagemaker in algorithm mode[0m
[34m[2022-01-29:12:08:44:INFO] Determined delimiter of CSV input i

## Deploy the models in AWS web console