In [1]:
import sagemaker
import boto3
import xgboost as xgb
from sagemaker import get_execution_role
from sagemaker.inputs import TrainingInput
from sagemaker.xgboost import XGBoostModel



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [2]:
sess = sagemaker.Session()
bucket = sess.default_bucket()
role = get_execution_role()

In [3]:
s3_bucket = "ads-508-final"
s3_prefix = "transformeddata"
file_path_train = f"s3://{s3_bucket}/{s3_prefix}/sm_fm_train.libsvm"
file_path_test = f"s3://{s3_bucket}/{s3_prefix}/sm_fm_test.libsvm"

In [4]:
#Train and Test Data
train_data = TrainingInput(file_path_train, content_type="libsvm")
test_data = TrainingInput(file_path_test, content_type="libsvm")

In [9]:
#Parameters
hyperparameters = {
    "objective": "binary:logistic",
    "max_depth": 6, 
    "eta": 0.1,  
    "gamma": 0,  
    "min_child_weight": 1,  
    "eval_metric": "logloss", 
    "num_round": 100, 
}

In [11]:
#Estimator 
from sagemaker.estimator import Estimator

xgboost_estimator = Estimator(
    image_uri=sagemaker.image_uris.retrieve("xgboost", region=sess.boto_region_name, version="1.0-1"),
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    hyperparameters=hyperparameters,
    output_path=f"s3://{s3_bucket}/{s3_prefix}/output",
    base_job_name="xgboost-recommender",
)

In [12]:
#Fit
xgboost_estimator.fit({"train": train_data, "validation": test_data})

2025-04-01 10:59:05 Starting - Starting the training job...
..25-04-01 10:59:21 Starting - Preparing the instances for training.
..25-04-01 10:59:42 Downloading - Downloading input data.
....[34m[2025-04-01 11:01:18.044 ip-10-0-222-72.ec2.internal:7 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter eval_metric value logloss to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value binary:logistic to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Single node training.[0m
[34m[11:01:19] 7140627x3 matrix with 14281254 entries loaded from /opt/ml/input/data/train[0m
[34m[

In [None]:
#Predictions

xgboost_model = xgboost_estimator.deploy(
    initial_instance_count=1, 
    instance_type="ml.m5.large"
)