In [None]:
import boto3
import sagemaker

In [None]:
sess = sagemaker.Session()

role = sagemaker.get_execution_role()
print(
    role
)  # This is the role that SageMaker would use to leverage AWS resources (S3, CloudWatch) on your behalf

bucket = sess.default_bucket()  # Replace with your own bucket name if needed
print(bucket)
prefix = "xgboost/abalone"  # Replace with the prefix under which you want to store the data if needed

In [None]:
! aws s3 cp s3://aws-mls-c01/sagemaker/xgboost/abalone.test ./data/
! aws s3 cp s3://aws-mls-c01/sagemaker/xgboost/abalone.train ./data/
! aws s3 cp s3://aws-mls-c01/sagemaker/xgboost/abalone.validation ./data/

In [None]:
region_name = boto3.Session().region_name

In [None]:
container = sagemaker.image_uris.retrieve("xgboost", region_name, version="latest")

In [None]:
train_channel = prefix + "/train"
validation_channel = prefix + "/validation"

sess.upload_data(path="./data/abalone.train", bucket=bucket, key_prefix=train_channel)
sess.upload_data(path="./data/abalone.validation", bucket=bucket, key_prefix=validation_channel)

s3_train_data = "s3://{}/{}".format(bucket, train_channel)
s3_validation_data = "s3://{}/{}".format(bucket, validation_channel)

In [None]:
s3_output_location = "s3://{}/{}/output".format(bucket, prefix)

In [None]:
xgb_model = sagemaker.estimator.Estimator(
    container,
    role,
    instance_count=1,
    instance_type="ml.m5.2xlarge",
    volume_size=5,
    max_run=3600,
    input_mode="File",
    output_path=s3_output_location,
    hyperparameters={
        "max_depth": "5",
        "eta": "0.2",
        "gamma": "4",
        "min_child_weight": "6",
        "subsample": "0.7",
        "objective": "reg:linear",
        "num_round": "50",
        "verbosity": "2",
    },
)

In [None]:
train_data = sagemaker.inputs.TrainingInput(
    s3_train_data,
    distribution="FullyReplicated",
    content_type="libsvm",
    s3_data_type="S3Prefix",
)
validation_data = sagemaker.inputs.TrainingInput(
    s3_validation_data,
    distribution="FullyReplicated",
    content_type="libsvm",
    s3_data_type="S3Prefix",
)
data_channels = {"train": train_data, "validation": validation_data}

In [None]:
%%time
xgb_model.fit(inputs=data_channels, logs=True)

In [None]:
%%time

from sagemaker.serializers import LibSVMSerializer

xgboost_classifier = xgb_model.deploy(
    initial_instance_count=1, instance_type="ml.m5.xlarge", serializer=LibSVMSerializer()
)

In [None]:
import math

test_file = open("./data/abalone.test", "r")
test_list = test_file.readlines()

In [None]:
def print_pred(payload):

    response = xgboost_classifier.predict(payload)
    
    raw_result = response.decode("utf-8")
    result = math.ceil(float(raw_result))
    label = payload.strip(" ").split()[0]
    
    print(f"Label: {label}\tPrediction: {result}\t({raw_result})")
    

for line in test_list:
    print_pred(line)

In [None]:
xgboost_classifier.delete_endpoint()