In [1]:
# default values
role_param = ""
output_bucket_path = ""

In [3]:
%%time
import boto3
from time import gmtime, strftime
import time
from sagemaker import image_uris

training_job_name = f"notebook-training-{strftime('%Y-%m-%d-%H-%M-%S', gmtime())}"
region = "us-east-1"
role = role_param
client = boto3.client("sagemaker", region_name=region)
deploy_amt_model = True
output_prefix = "local-pipeline"
container = image_uris.retrieve("xgboost", region, "1.5-1")
# s3://sagemaker-mlops-p-vicbs68pvwtg/abalonedata/data.csv

CPU times: user 456 ms, sys: 303 ms, total: 758 ms
Wall time: 1.15 s


In [5]:
create_training_params = {
    "AlgorithmSpecification": {"TrainingImage": container, "TrainingInputMode": "File"},
    "RoleArn": role,
    "OutputDataConfig": {"S3OutputPath": f"s3://{output_bucket_path}/single-xgboost"},
    "ResourceConfig": {"InstanceCount": 1, "InstanceType": "ml.m5.2xlarge", "VolumeSizeInGB": 5},
    "TrainingJobName": training_job_name,
    "HyperParameters": {
        "max_depth": "5",
        "eta": "0.2",
        "gamma": "4",
        "min_child_weight": "6",
        "subsample": "0.7",
        "objective": "reg:linear",
        "num_round": "50",
        "verbosity": "2",
    },
    "StoppingCondition": {"MaxRuntimeInSeconds": 3600},
    "InputDataConfig": [
        {
            "ChannelName": "train",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": f"s3://{output_bucket_path}/abalone_data/train",
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "ContentType": "libsvm",
            "CompressionType": "None",
        },
        {
            "ChannelName": "validation",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": f"s3://{output_bucket_path}/abalone_data/validation",
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "ContentType": "libsvm",
            "CompressionType": "None",
        },
    ],
}

In [20]:
print(f"Creating a training job with name: {training_job_name}. It will take between 5 and 6 minutes to complete.")
client.create_training_job(**create_training_params)
status = client.describe_training_job(TrainingJobName=training_job_name)["TrainingJobStatus"]
while status != "Completed" and status != "Failed":
    time.sleep(60)
    status = client.describe_training_job(TrainingJobName=training_job_name)["TrainingJobStatus"]
    print(status)

Creating a training job with name: DEMO-xgboost-regression-2023-01-23-15-13-28. It will take between 5 and 6 minutes to complete.
InProgress
InProgress
InProgress
Completed
