In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import sagemaker
import boto3
from sagemaker import image_uris
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput

# Load the dataset and split into training and validation sets
dataset = pd.read_csv('HousingData.csv')


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [2]:
# Move 'medv' column to front
dataset = pd.concat([dataset['medv'], dataset.drop(['medv'], axis=1)], axis=1)

In [3]:
training_dataset, validation_dataset = train_test_split(dataset, test_size=0.1)
print(training_dataset.shape)
print(validation_dataset.shape)


(455, 13)
(51, 13)


In [4]:
# Save training and validation datasets as CSV files
training_dataset.to_csv('training_dataset.csv', index=False, header=False)
validation_dataset.to_csv('validation_dataset.csv', index=False, header=False)



In [5]:
# Initialize SageMaker session and S3 bucket
sess = sagemaker.Session()
bucket = sess.default_bucket()

# Specify the S3 prefix for data upload
prefix = 'boston-housing'

# Upload training and validation data to S3
training_data_path = sess.upload_data(path='training_dataset.csv', key_prefix=prefix + '/input/training')
validation_data_path = sess.upload_data(path='validation_dataset.csv', key_prefix=prefix + '/input/validation')
print(training_data_path)
print(validation_data_path)


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
s3://sagemaker-us-east-1-141154829940/boston-housing/input/training/training_dataset.csv
s3://sagemaker-us-east-1-141154829940/boston-housing/input/validation/validation_dataset.csv


In [9]:
# Retrieve the XGBoost container
region = boto3.Session().region_name    
container = sagemaker.image_uris.retrieve("xgboost", region, "1.7-1")
print(container)
# Set up the XGBoost estimator
xgb_estimator = Estimator(container,
    role=sagemaker.get_execution_role(),
    instance_count=1,
    instance_type='ml.m5.large',
    output_path=f's3://{bucket}/{prefix}/output'
)

# Set hyperparameters for XGBoost
xgb_estimator.set_hyperparameters(
    objective='reg:squarederror',  # for regression tasks
    num_round=100,                  # number of boosting rounds
    max_depth=5                     # maximum tree depth
)


683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [10]:

# Create TrainingInput channels
training_data_channel = TrainingInput(s3_data=training_data_path, content_type='csv')
validation_data_channel = TrainingInput(s3_data=validation_data_path, content_type='csv')



In [11]:
# Fit the XGBoost model
xgb_estimator.fit({'train': training_data_channel, 'validation': validation_data_channel})

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2023-12-17-16-46-54-041


2023-12-17 16:46:54 Starting - Starting the training job...
2023-12-17 16:47:10 Starting - Preparing the instances for training.........
2023-12-17 16:48:29 Downloading - Downloading input data...
2023-12-17 16:49:16 Downloading - Downloading the training image......
2023-12-17 16:50:17 Training - Training image download completed. Training in progress..[34m[2023-12-17 16:50:23.091 ip-10-0-234-58.ec2.internal:8 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2023-12-17 16:50:23.116 ip-10-0-234-58.ec2.internal:8 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2023-12-17:16:50:23:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2023-12-17:16:50:23:INFO] Failed to parse hyperparameter objective value reg:squarederror to Json.[0m
[34mReturning the value itself[0m
[34m[2023-12-17:16:50:23:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2023-12-17:16:50:23:INFO] Running XGBoost Sagemaker in algorithm mode[0

In [12]:
%%bash -s "$xgb_estimator.output_path"
aws s3 ls --recursive $1

2023-12-17 16:26:54          0 boston-housing/output/linear-learner-2023-12-17-16-23-13-971/debug-output/training_job_end.ts
2023-12-17 16:26:53       1026 boston-housing/output/linear-learner-2023-12-17-16-23-13-971/output/model.tar.gz
2023-12-17 16:26:54          0 boston-housing/output/linear-learner-2023-12-17-16-23-13-971/profiler-output/framework/training_job_end.ts
2023-12-17 16:26:01      75656 boston-housing/output/linear-learner-2023-12-17-16-23-13-971/profiler-output/system/incremental/2023121716/1702830240.algo-1.json
2023-12-17 16:26:00     184148 boston-housing/output/linear-learner-2023-12-17-16-23-13-971/profiler-output/system/incremental/2023121716/1702830300.algo-1.json
2023-12-17 16:26:50     151804 boston-housing/output/linear-learner-2023-12-17-16-23-13-971/profiler-output/system/incremental/2023121716/1702830360.algo-1.json
2023-12-17 16:26:54          0 boston-housing/output/linear-learner-2023-12-17-16-23-13-971/profiler-output/system/training_job_end.ts
2023-12

In [13]:
from time import strftime, gmtime
timestamp = strftime('%d-%H-%M-%S', gmtime())

endpoint_name = 'xgb-demo-'+timestamp
print(endpoint_name)

xgb-demo-17-16-51-24


In [14]:
xgb_predictor = xgb_estimator.deploy(endpoint_name=endpoint_name, 
                        initial_instance_count=1, 
                        instance_type='ml.m5.large')

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2023-12-17-16-51-26-370
INFO:sagemaker:Creating endpoint-config with name xgb-demo-17-16-51-24
INFO:sagemaker:Creating endpoint with name xgb-demo-17-16-51-24


-----!

In [15]:
test_sample = '0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,4.98'

In [17]:
#ll_predictor.content_type = 'text/csv'
xgb_predictor.serializer = sagemaker.serializers.CSVSerializer()
xgb_predictor.deserializer = sagemaker.deserializers.CSVDeserializer()

response = xgb_predictor.predict(test_sample)
print(response)

[['23.928556442260742']]


In [18]:
test_samples = ['0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,4.98',
                '0.02731,0.00,7.070,0,0.4690,6.4210,78.90,4.9671,2,242.0,17.80,9.14']

response = xgb_predictor.predict(test_samples)
print(response)
print(xgb_predictor.endpoint_name)

[['23.928556442260742'], ['21.594999313354492']]
xgb-demo-17-16-51-24


In [19]:
runtime = boto3.Session().client(service_name='runtime.sagemaker') 

response = runtime.invoke_endpoint(EndpointName=endpoint_name, 
                                  ContentType='text/csv', 
                                  Body=test_sample)

print(response['Body'].read())

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


b'23.928556442260742\n'


In [None]:
xgb_predictor.delete_endpoint()