# Multiple Models by Versions

## Training models

Previous activity: preparing and splitting data

In [1]:
import numpy as np
import pandas as pd
import sagemaker
from sagemaker import get_execution_role
from sagemaker.serializers import CSVSerializer

In [2]:
# Set the file directories
bucket_name = "sagemaker-loan-classification"
prefix = "xgboost"

# Set input and output paths for sagemaker
input_train = sagemaker.TrainingInput(
    s3_data=f"s3://{bucket_name}/{prefix}/train", content_type="csv")
input_test = sagemaker.TrainingInput(
    s3_data=f"s3://{bucket_name}/{prefix}/test", content_type="csv")
output_path = f"s3://{bucket_name}/{prefix}/saved_model"

In [3]:
# Build the estimator
def xgboost_fit_ver(version, use_spot_instances=False, max_run=3200, max_wait=None):
    name = f"xgboost-loan-{version.replace('.', '-')}" # Pay attention the quote marks. 
    print(f"____Running {name}")

    # Set checkpoint
    checkpoint = None
    if use_spot_instances == True:
        checkpoint = f"s3://{bucket_name}/{prefix}/checkpoints/{name}"
        print(f"___The checkpoint is saved to: {checkpoint}")
        max_wait = 7600

    # Set the container based on the version
    container = sagemaker.image_uris.retrieve("xgboost",
                                              sagemaker.Session().boto_region_name,
                                              version=version)

    xgb = sagemaker.estimator.Estimator(
        image_uri=container,
        role=sagemaker.get_execution_role(),
        instance_count=1,
        instance_type="ml.m5.xlarge",
        output_path=output_path,
        sagemaker_session=sagemaker.Session(),
        checkpoint_s3_uri=checkpoint,
        use_spot_instances=use_spot_instances,
        max_run=max_run,
        max_wait=max_wait,
        base_job_name=name,
    )

    # Set the hyperparameters
    xgb.set_hyperparameters(
        colsample_bytree=0.478,
        gamma=2.387,
        eta=0.175,
        max_depth=15,
        min_child_weight=7,
        num_round=86,
        subsample=0.80,
        num_class=2,
        objective="multi:softmax",
    )

    # Fit the model
    xgb.fit(
        {
            "train": input_train,
            "validation": input_test
        }
    )

    print(f"____Finish running {xgb.latest_training_job.name}")

In [4]:
# Train Xgboost version 0.90-1
xgboost_fit_ver(version="0.90-1", use_spot_instances=False)

____Running xgboost-loan-0-90-1
2022-01-29 10:28:57 Starting - Starting the training job...
2022-01-29 10:29:20 Starting - Launching requested ML instancesProfilerReport-1643452137: InProgress
......
2022-01-29 10:30:20 Starting - Preparing the instances for training......
2022-01-29 10:31:26 Downloading - Downloading input data...
2022-01-29 10:31:42 Training - Downloading the training image..[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value multi:softmax to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34m[10:32:05

In [5]:
# Train Xgboost version 0.90-1
xgboost_fit_ver(version="1.3-1", use_spot_instances=False)

____Running xgboost-loan-1-3-1
2022-01-29 10:33:18 Starting - Starting the training job...
2022-01-29 10:33:48 Starting - Launching requested ML instancesProfilerReport-1643452398: InProgress
......
2022-01-29 10:34:48 Starting - Preparing the instances for training......
2022-01-29 10:35:48 Downloading - Downloading input data
2022-01-29 10:35:48 Training - Downloading the training image....[34m[2022-01-29 10:36:22.330 ip-10-0-178-238.ap-southeast-1.compute.internal:1 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2022-01-29:10:36:22:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2022-01-29:10:36:22:INFO] Failed to parse hyperparameter objective value multi:softmax to Json.[0m
[34mReturning the value itself[0m
[34m[2022-01-29:10:36:22:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2022-01-29:10:36:22:INFO] Running XGBoost Sagemaker in algorithm mode[0m
[34m[2022-01-29:10:36:22:INFO] Determined delimiter of CSV input is '

## Deploy the models in web console

In [None]:
# Delete the endpoints if not needed anymore.
# xgb_model.delete_endpoint()