In [11]:
# https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html

In [12]:
import sagemaker
from sagemaker.session import Session
from sagemaker.inputs import TrainingInput

In [13]:
# configure hyperparams
hyperparameters = {'objective':'binary:logistic', 'max_depth':5, 'eta':0.2, 'gamma':4, 'min_child_weight':6, 'subsample':0.7, 'num_round':50}

# set an output path where the trained model will be saved
output_path = 's3://ktzouvan-trading-point-sagemaker-poc/models'

In [14]:
# get teh xgboos container for specific version
xgboost_container = sagemaker.image_uris.retrieve("xgboost", 'eu-west-1', "1.7-1")

# create the estimator
estimator = sagemaker.estimator.Estimator(image_uri=xgboost_container, 
                                          hyperparameters=hyperparameters,
                                          role=sagemaker.get_execution_role(),
                                          instance_count=1, 
                                          instance_type='ml.m5.large', 
                                          volume_size=5, # 5 GB 
                                          output_path=output_path)


INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [15]:
# define the data type and paths to the training and validation datasets
content_type = "csv"
train_input = TrainingInput("s3://ktzouvan-trading-point-sagemaker-poc/datasets/marketing/train.csv", content_type=content_type)
validation_input = TrainingInput("s3://ktzouvan-trading-point-sagemaker-poc/datasets/marketing/validate.csv", content_type=content_type)

# execute the XGBoost training job
estimator.fit({'train': train_input, 'validation': validation_input})

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2024-08-31-19-09-16-140


2024-08-31 19:09:16 Starting - Starting the training job...
2024-08-31 19:09:32 Starting - Preparing the instances for training...
2024-08-31 19:09:54 Downloading - Downloading input data...
2024-08-31 19:10:34 Downloading - Downloading the training image......
2024-08-31 19:11:45 Training - Training image download completed. Training in progress..[34m[2024-08-31 19:11:49.951 ip-10-0-116-113.eu-west-1.compute.internal:8 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2024-08-31 19:11:49.973 ip-10-0-116-113.eu-west-1.compute.internal:8 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2024-08-31:19:11:50:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2024-08-31:19:11:50:INFO] Failed to parse hyperparameter objective value binary:logistic to Json.[0m
[34mReturning the value itself[0m
[34m[2024-08-31:19:11:50:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2024-08-31:19:11:50:INFO] Running XGBoost Sagemak

In [None]:
# Explain usage of training thorugh a script too 
# https://sagemaker.readthedocs.io/en/stable/frameworks/xgboost/using_xgboost.html