# Lab_04.2.md

Various pieces of code to build a model, deploy it and make a prediction from a notebook.


In [1]:
import boto3
import time, random, pprint
from sagemaker import Model
from sagemaker.predictor import Predictor
from sagemaker import image_uris
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer


###
### XGBoost only supports numerical features by default
###   ensure your dataset encodes categorical features 
###      with either one-hot or label
###

# Initialize Boto3 SageMaker client
sagemaker_client = boto3.client("sagemaker", region_name="us-east-1")

# Define Training Job Name
training_job_name = f"xgboost-training-{int(time.time())}"

# S3 paths/keys (Replace with actual values)
s3_bucket = "INSERT_BUCKET"
training_data_s3_uri = f"s3://{s3_bucket}/INSERT_KEY"
output_s3_uri = f"s3://{s3_bucket}/output/"

# SageMaker Execution Role ARN (Replace with your SageMaker role)
sagemaker_role = "INSERT_ARN"


s3_bucket = "adgu-datasets"
training_data_s3_uri = f"s3://{s3_bucket}/tuning-job-dataset/train.csv"
output_s3_uri = f"s3://{s3_bucket}/output/"

# SageMaker Execution Role ARN (Replace with your SageMaker role)
sagemaker_role = "arn:aws:iam::146868985163:role/SageMaker-ExecutionRole"

# XGBoost Training Image URI (Region Specific)
# https://docs.aws.amazon.com/sagemaker/latest/dg-ecr-paths/sagemaker-algo-docker-registry-paths.html

xgboost_image_uri = image_uris.retrieve(framework='xgboost',region='us-east-1', version='1.7-1')
print("XGBoost image uri: {}".format(xgboost_image_uri))



sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/nick/Library/Application Support/sagemaker/config.yaml


XGBoost image uri: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1


In [2]:
# Define Training Job Configuration
training_params = {
    "TrainingJobName": training_job_name,
    "AlgorithmSpecification": {
        "TrainingImage": xgboost_image_uri,
        "TrainingInputMode": "File",
    },
    "RoleArn": sagemaker_role,
    "HyperParameters": {
        "num_round": "100",
        "eta": "0.2",
        "objective": "reg:squarederror",
        "max_depth": "6",
        "subsample": "0.8",
        "eval_metric": "rmse",
    },
    "InputDataConfig": [
        {
            "ChannelName": "train",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": training_data_s3_uri,
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "ContentType": "text/csv",  # Target column is the first column
        }
    ],
    "OutputDataConfig": {"S3OutputPath": output_s3_uri},
    "ResourceConfig": {
        "InstanceType": "ml.m5.large",
        "InstanceCount": 1,
        "VolumeSizeInGB": 10,
    },
    "StoppingCondition": {"MaxRuntimeInSeconds": 3600},
}

In [3]:
# Start SageMaker Training Job
print("Starting training job...")
sagemaker_client.create_training_job(**training_params)

# Wait for training to complete
print("Waiting for training to complete...")
while True:
    response = sagemaker_client.describe_training_job(TrainingJobName=training_job_name)
    status = response["TrainingJobStatus"]
    if status in ["Completed", "Failed", "Stopped"]:
        print(f"Training Job Status: {status}")
        break
    time.sleep(10)

# Check if training was successful
if status != "Completed":
    raise Exception(f"Training failed with status: {status}")

print(response)

# Extract model artifacts location
model_artifact_s3 = response["ModelArtifacts"]["S3ModelArtifacts"]
print(model_artifact_s3)


Starting training job...
Waiting for training to complete...
Training Job Status: Completed
{'TrainingJobName': 'xgboost-training-1742237086', 'TrainingJobArn': 'arn:aws:sagemaker:us-east-1:146868985163:training-job/xgboost-training-1742237086', 'ModelArtifacts': {'S3ModelArtifacts': 's3://adgu-datasets/output/xgboost-training-1742237086/output/model.tar.gz'}, 'TrainingJobStatus': 'Completed', 'SecondaryStatus': 'Completed', 'HyperParameters': {'eta': '0.2', 'eval_metric': 'rmse', 'max_depth': '6', 'num_round': '100', 'objective': 'reg:squarederror', 'subsample': '0.8'}, 'AlgorithmSpecification': {'TrainingImage': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1', 'TrainingInputMode': 'File', 'MetricDefinitions': [{'Name': 'train:mae', 'Regex': '.*\\[[0-9]+\\].*#011train-mae:([-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?).*'}, {'Name': 'train:gamma-nloglik', 'Regex': '.*\\[[0-9]+\\].*#011train-gamma-nloglik:([-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?).*'}, {'Name': 'val

In [4]:
# **Create a SageMaker Model**
model_name = f"xgboost-model-{int(time.time())}"
print("Creating model in SageMaker...")
create_model_response = sagemaker_client.create_model(
    ModelName=model_name,
    PrimaryContainer={
        "Image": xgboost_image_uri,
        "ModelDataUrl": model_artifact_s3,
    },
    ExecutionRoleArn=sagemaker_role,
)
print(create_model_response)

Creating model in SageMaker...
{'ModelArn': 'arn:aws:sagemaker:us-east-1:146868985163:model/xgboost-model-1742237263', 'ResponseMetadata': {'RequestId': '42022ca7-149e-47aa-aa68-b5f6fe016de8', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '42022ca7-149e-47aa-aa68-b5f6fe016de8', 'content-type': 'application/x-amz-json-1.1', 'content-length': '86', 'date': 'Mon, 17 Mar 2025 18:47:44 GMT'}, 'RetryAttempts': 0}}


In [None]:
# **Deploy the Model as an Endpoint**
# Create the endpoint configuration
endpoint_config_name = f"xgboost-endpoint-config-{int(time.time())}"
endpoint_name = f"xgboost-endpoint-{int(time.time())}"

# Create Endpoint Configuration
print("Creating endpoint configuration...")
sagemaker_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "VariantName": "XGBoostVariant1",
            "ModelName": model_name,
            "InstanceType": "ml.m5.large",
            "InitialInstanceCount": 1,
        }
    ],
)

In [None]:
# **Deploy the Model as an Endpoint**
# Create Endpoint
print("Deploying model as an endpoint...")
sagemaker_client.create_endpoint(
    EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name
)

# Wait for endpoint to be ready
print("Waiting for endpoint to be ready...")
while True:
    response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
    status = response["EndpointStatus"]
    if status in ["InService", "Failed"]:
        print(f"Endpoint Status: {status}")
        break
    time.sleep(30)

# Check if deployment was successful
if status != "InService":
    raise Exception(f"Deployment failed with status: {status}")

print(f"Model deployed successfully at endpoint: {endpoint_name}")

print(response)

In [None]:
# Create a Predictor object
predictor = Predictor(
    endpoint_name=endpoint_name,
    serializer=CSVSerializer(),  # Ensures input is formatted as CSV
    deserializer=JSONDeserializer(),  # Parses JSON output
)

# Sample input data (excluding the target column)
sample_data = [[30, 1, 12345]]  # Must be a 2D list

# Invoke the endpoint
prediction = predictor.predict(sample_data)

print("Prediction response:", prediction)

In [None]:
sample_data = []
for i in range(10):
    sample_data += [[random.randint(20, 80), random.randint(0, 4), random.randint(10000, 150000)]]

print(sample_data)
prediction = predictor.predict(sample_data)
print("Predictions: {}".format(pprint.pprint(prediction)))