### XGBoost cloud training template for Iris Classification example

In [2]:
import numpy as np
import pandas as pd

# libs for s3 access
import boto3
import re
import sagemaker
from sagemaker import get_execution_role

In [3]:
### Import s3 bucket name as environment variable

import os
env_vars = !cat ./.env
for var in env_vars:
    key, value = var.split('=')
    os.environ[key] = value

In [4]:
bucket_name = os.environ['BUCKET_NAME']

In [None]:
bucket_name

In [6]:
## bucket name and folders for Iris model

training_file_key = 'iris/iris_train.csv'
validation_file_key = 'iris/iris_validation.csv'

s3_model_outputl_location = r's3://{0}/iris/model'.format(bucket_name)
s3_training_file_location = r's3://{0}/{1}'.format(bucket_name, training_file_key)
s3_validation_file_location = r's3://{0}/{1}'.format(bucket_name, training_file_key)

In [7]:
print(s3_model_outputl_location)
print(s3_training_file_location)
print(s3_validation_file_location)

s3://skk-ml-sagemaker/iris/model
s3://skk-ml-sagemaker/iris/iris_train.csv
s3://skk-ml-sagemaker/iris/iris_train.csv


In [10]:
# Write and Reading from S3 is just as easy
# files are referred as objects in S3.  
# file name is referred as key name in S3
# Files stored in S3 are automatically replicated across 3 different availability zones 
# in the region where the bucket was created.

def write_to_s3(filename, bucket, key):
    with open(filename,'rb') as f:
        return boto3.Session().resource('s3').Bucket(bucket).Object(key).upload_fileobj(f)

In [11]:
write_to_s3('iris_train.csv',bucket_name,training_file_key)
write_to_s3('iris_validation.csv',bucket_name, validation_file_key)

### Training XGBoost Algorithm in Docker Image

#### AWS maintain separage images for every region and algorithm

In [14]:
# Use Spot Instance - Save up to 90% of training cost by using spot instances when compared to on-demand instances
# Reference: https://github.com/aws-samples/amazon-sagemaker-managed-spot-training/blob/main/xgboost_built_in_managed_spot_training_checkpointing/xgboost_built_in_managed_spot_training_checkpointing.ipynb

# use spot instance

use_spot_instances=True
max_run = 3600
max_wait = 7200

job_name = 'xgboost-iris-v1'

checkpoint_s3_uri = None

if use_spot_instances:
    checkpoint_s3_uri = f's3://{bucket_name}/iris/checkpoints/{job_name}'


In [None]:
print(f'Checkpoint uri: {checkpoint_s3_uri}')

In [16]:
sess = sagemaker.Session()

In [17]:
role = get_execution_role()

In [None]:
print(role)

In [19]:
container = sagemaker.image_uris.retrieve("xgboost", sess.boto_region_name,version='1.2-2')

print(f'Using XGBoost Container: {container}')

Using XGBoost Container: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.2-2


### Build Model

In [30]:
# Configure the training job
# Specify type and number of instances to use
# S3 location where final artifacts needs to be stored

#   Reference: http://sagemaker.readthedocs.io/en/latest/estimators.html

# for managed spot training, specify the use_spot_instances flag, max_run, max_wait and checkpoint_s3_uri

# SDK 2.x version does not require train prefix for instance count and type

estimator = sagemaker.estimator.Estimator(
    container,
    role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    output_path=s3_model_outputl_location,
    sagemaker_session=sess,
    base_job_name=job_name,
    use_spot_instances=use_spot_instances,
    max_run=max_run,
    max_wait=max_wait,
    checkpoint_s3_uri=checkpoint_s3_uri
)

In [31]:
# Specify hyper parameters that appropriate for the training algorithm
# XGBoost Training Parameter Reference: 
#   https://github.com/dmlc/xgboost/blob/master/doc/parameter.md

estimator.set_hyperparameters(
    max_depth=5,
    objective="multi:softmax",
    eval_metric="mlogloss",
    num_class=3,
    num_round=100,
    early_stopping_rounds=10)

In [32]:
estimator.hyperparameters()

{'max_depth': 5,
 'objective': 'multi:softmax',
 'eval_metric': 'mlogloss',
 'num_class': 3,
 'num_round': 100,
 'early_stopping_rounds': 10}

### Prepare Training data location details

In [33]:
training_input_config = sagemaker.session.TrainingInput(
    s3_data=s3_training_file_location,
    content_type='csv',
    s3_data_type='S3Prefix'
)

validation_input_config = sagemaker.session.TrainingInput(
    s3_data=s3_validation_file_location,
    content_type='csv',
    s3_data_type='S3Prefix'
)

data_channels = {'train': training_input_config, 'validation': validation_input_config}

In [34]:
print(training_input_config)
print(validation_input_config)

<sagemaker.inputs.TrainingInput object at 0x7f44d63724d0>
<sagemaker.inputs.TrainingInput object at 0x7f44defd45e0>


### Train the model

In [35]:
# XGBoost supports "train", "validation" channels

estimator.fit(data_channels)

INFO:sagemaker:Creating training-job with name: xgboost-iris-v1-2024-04-29-00-03-57-358


2024-04-29 00:03:57 Starting - Starting the training job...
2024-04-29 00:04:13 Starting - Preparing the instances for training...
2024-04-29 00:04:43 Downloading - Downloading input data...
2024-04-29 00:05:04 Downloading - Downloading the training image...
2024-04-29 00:05:49 Training - Training image download completed. Training in progress...[34m[2024-04-29 00:05:59.689 ip-10-0-68-202.ec2.internal:7 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2024-04-29:00:05:59:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2024-04-29:00:05:59:INFO] Failed to parse hyperparameter eval_metric value mlogloss to Json.[0m
[34mReturning the value itself[0m
[34m[2024-04-29:00:05:59:INFO] Failed to parse hyperparameter objective value multi:softmax to Json.[0m
[34mReturning the value itself[0m
[34m[2024-04-29:00:05:59:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2024-04-29:00:05:59:INFO] Running XGBoost Sagemaker in algorithm mode[0


2024-04-29 00:06:17 Uploading - Uploading generated training model
2024-04-29 00:06:17 Completed - Training job completed
Training seconds: 94
Billable seconds: 39
Managed Spot Training savings: 58.5%


### Deploy Model

In [37]:
predictor = estimator.deploy(
                        initial_instance_count=1,
                        instance_type='ml.m5.xlarge',
                        endpoint_name=job_name)

INFO:sagemaker:Creating model with name: xgboost-iris-v1-2024-04-29-00-07-10-295
INFO:sagemaker:Creating endpoint-config with name xgboost-iris-v1
INFO:sagemaker:Creating endpoint with name xgboost-iris-v1


-----!

### Run Predictions

In [40]:
# SDK 2.0 serializers

from sagemaker.serializers import CSVSerializer
predictor.serializer = CSVSerializer()

In [43]:
predictor.predict([[4.8,3.4,1.6,0.2],[4.8,3.4,1.6,0.2],[5.8,2.7,4.1,1.0]])

b'0.0\n0.0\n1.0\n'