### Abalone - SageMaker using boto3

In [1]:
%%time

import os
import boto3
import re
import sagemaker

role = sagemaker.get_execution_role()
region = boto3.Session().region_name
s3_client = boto3.client("s3")


# S3 bucket where the training data is located.
data_bucket = f"sagemaker-sample-files"
data_prefix = "datasets/tabular/uci_abalone"
data_bucket_path = f"s3://{data_bucket}"

# S3 bucket for saving code and model artifacts.
# Feel free to specify a different bucket and prefix
output_bucket = sagemaker.Session().default_bucket()
output_prefix = "sagemaker/DEMO-xgboost-abalone-default"
output_bucket_path = f"s3://{output_bucket}"

for data_category in ["train", "test", "validation"]:
    data_key = "{0}/{1}/abalone.{1}".format(data_prefix, data_category)
    output_key = "{0}/{1}/abalone.{1}".format(output_prefix, data_category)
    data_filename = "abalone.{}".format(data_category)
    s3_client.download_file(data_bucket, data_key, data_filename)
    s3_client.upload_file(data_filename, output_bucket, output_key)

CPU times: user 1.06 s, sys: 220 ms, total: 1.28 s
Wall time: 5.36 s


In [33]:
output_bucket_path

's3://sagemaker-ap-northeast-2-687314952804'

In [2]:
container = sagemaker.image_uris.retrieve("xgboost", region, "1.3-1")

In [35]:
region

'ap-northeast-2'

In [3]:
%%time
import boto3
from time import gmtime, strftime

job_name = f"DEMO-xgboost-regression-{strftime('%Y-%m-%d-%H-%M-%S', gmtime())}"
print("Training job", job_name)

# Ensure that the training and validation data folders generated above are reflected in the "InputDataConfig" parameter below.

create_training_params = {
    "AlgorithmSpecification": {"TrainingImage": container, "TrainingInputMode": "File"},
    "RoleArn": role,
    "OutputDataConfig": {"S3OutputPath": f"{output_bucket_path}/{output_prefix}/single-xgboost"},
    "ResourceConfig": {"InstanceCount": 1, "InstanceType": "ml.m5.2xlarge", "VolumeSizeInGB": 5},
    "TrainingJobName": job_name,
    "HyperParameters": {
        "max_depth": "5",
        "eta": "0.2",
        "gamma": "4",
        "min_child_weight": "6",
        "subsample": "0.7",
        "objective": "reg:linear",
        "num_round": "50",
        "verbosity": "2",
    },
    "StoppingCondition": {"MaxRuntimeInSeconds": 3600},
    "InputDataConfig": [
        {
            "ChannelName": "train",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": f"{output_bucket_path}/{output_prefix}/train",
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "ContentType": "libsvm",
            "CompressionType": "None",
        },
        {
            "ChannelName": "validation",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": f"{output_bucket_path}/{output_prefix}/validation",
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "ContentType": "libsvm",
            "CompressionType": "None",
        },
    ],
}


client = boto3.client("sagemaker", region_name=region)
client.create_training_job(**create_training_params)

Training job DEMO-xgboost-regression-2022-01-18-13-15-01
CPU times: user 15.5 ms, sys: 7.06 ms, total: 22.6 ms
Wall time: 226 ms


{'TrainingJobArn': 'arn:aws:sagemaker:ap-northeast-2:687314952804:training-job/demo-xgboost-regression-2022-01-18-13-15-01',
 'ResponseMetadata': {'RequestId': 'ff8e60cc-cd7b-47b6-8c54-c144511741b3',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'ff8e60cc-cd7b-47b6-8c54-c144511741b3',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '123',
   'date': 'Tue, 18 Jan 2022 13:15:00 GMT'},
  'RetryAttempts': 0}}

### BOTO3

https://boto3.amazonaws.com/v1/documentation/api/1.9.42/reference/services/sagemaker.html

In [7]:
#TODO: Describe training job

In [None]:
#TODO: add an IAM role/policy to control sagemaker

![IAM](img/iam.png)