In [1]:
# Install the necessary libraries

!pip install -U sagemaker scikit-learn pandas boto3


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
from sagemaker import get_execution_role, Session
import sys
import os

# Get region, role, bucket

sagemaker_session = Session()
region = sagemaker_session.boto_region_name
role = get_execution_role()
bucket = sagemaker_session.default_bucket()

sys.path.insert(1, os.path.join(sys.path[0], '../'))

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/benieric/.config/sagemaker/config.yaml


In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

import pandas as pd

# Get IRIS Data

iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['target'] = iris.target

In [4]:
import os

# Prepare Data

os.makedirs('./data', exist_ok=True)

iris_df = iris_df[['target'] + [col for col in iris_df.columns if col != 'target']]

train_data, test_data = train_test_split(iris_df, test_size=0.2, random_state=42)

train_data.to_csv('./data/train.csv', index=False, header=False)
test_data.to_csv('./data/test.csv', index=False, header=False)

In [5]:
import boto3

# Upload Data

s3_client = boto3.client("s3")

prefix = "DEMO-scikit-iris"
TRAIN_DATA = "train.csv"
TEST_DATA = "test.csv"
WORK_DIRECTORY = "data"

train_input = sagemaker_session.upload_data(
    WORK_DIRECTORY, bucket=bucket, key_prefix="{}/{}".format(prefix, WORK_DIRECTORY)
)

s3_input_path = "s3://{}/{}/data/{}".format(bucket, prefix, TRAIN_DATA)
s3_output_path = "s3://{}/{}/output".format(bucket, prefix)

print(s3_input_path)
print(s3_output_path)

s3://sagemaker-us-west-2-341897309766/DEMO-scikit-iris/data/train.csv
s3://sagemaker-us-west-2-341897309766/DEMO-scikit-iris/output


In [6]:
from sagemaker import image_uris

# Fetch XGBOOST image

image = image_uris.retrieve(framework='xgboost',region=region, version="latest")
print(image)

433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest


In [None]:
# Create TrainingJob with Boto3

import time
import boto3

client = boto3.client('sagemaker')
job_name_boto = 'xgboost-iris-' + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

response = client.create_training_job(
    TrainingJobName=job_name_boto,
    HyperParameters={
        'objective': 'multi:softmax',
        'num_class': '3',
        'num_round': '10',
        'eval_metric': 'merror'
    },
    AlgorithmSpecification={
        'TrainingImage': image,
        'TrainingInputMode': 'File'
    },
    RoleArn=role,
    InputDataConfig=[
        {
            'ChannelName': 'train',
            'ContentType': 'csv',
            'DataSource': {
                'S3DataSource': {
                    'S3DataType': 'S3Prefix',
                    'S3Uri': s3_input_path,
                    'S3DataDistributionType': 'FullyReplicated'
                }
            },
            'CompressionType': 'None',
            'RecordWrapperType': 'None'
        }
    ],
    OutputDataConfig={
        'S3OutputPath': s3_output_path
    },
    ResourceConfig={
        'InstanceType': 'ml.m4.xlarge',
        'InstanceCount': 1,
        'VolumeSizeInGB': 30
    },
    StoppingCondition={
        'MaxRuntimeInSeconds': 600
    }
)
print(response)

In [None]:
# Wait for TrainingJob witn Boto3
import time
while True:
    response = client.describe_training_job(TrainingJobName=job_name_boto)
    status = response['TrainingJobStatus']
    if status in ['Failed', 'Completed', 'Stopped']:
        print(status)
        if status == 'Failed':
            print(response['FailureReason'])
        break
    print("-", end=" ")
    time.sleep(5)

In [7]:
# Create TrainingJob V3

import time
from src.generated.resources import TrainingJob, AlgorithmSpecification, Channel, DataSource, S3DataSource, OutputDataConfig, ResourceConfig, StoppingCondition
import logging
logging.basicConfig(level=logging.DEBUG)

job_name_v3 = 'xgboost-iris-' + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

training_job = TrainingJob.create(
    training_job_name=job_name_v3,
    hyper_parameters={
        'objective': 'multi:softmax',
        'num_class': '3',
        'num_round': '10',
        'eval_metric': 'merror'
    },
    algorithm_specification=AlgorithmSpecification(
        training_image=image,
        training_input_mode='File'
    ),
    role_arn=role,
    input_data_config=[
        Channel(
            channel_name='train',
            content_type='csv',
            compression_type='None',
            record_wrapper_type='None',
            data_source=DataSource(
                s3_data_source=S3DataSource(
                    s3_data_type='S3Prefix',
                    s3_uri=s3_input_path,
                    s3_data_distribution_type='FullyReplicated'
                )
            )
        )
    ],
    output_data_config=OutputDataConfig(
        s3_output_path=s3_output_path
    ),
    resource_config=ResourceConfig(
        instance_type='ml.m4.xlarge',
        instance_count=1,
        volume_size_in_g_b=30
    ),
    stopping_condition=StoppingCondition(
        max_runtime_in_seconds=600
    )
)

































































INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


Not applying SDK defaults from location: %s /etc/xdg/sagemaker/config.yaml
Not applying SDK defaults from location: %s /home/benieric/.config/sagemaker/config.yaml
{'AlgorithmSpecification': {'EnableSageMakerMetricsTimeSeries': False,
                            'MetricDefinitions': [{'Name': 'train:mae',
                                                   'Regex': '.*\\[[0-9]+\\].*#011train-mae:([-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?).*'},
                                                  {'Name': 'train:merror',
                                                   'Regex': '.*\\[[0-9]+\\].*#011train-merror:([-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?).*'},
                                                  {'Name': 'validation:mae',
                                                   'Regex': '.*\\[[0-9]+\\].*#011validation-mae:([-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?).*'},
                                                  {'Name': 'validation:logloss',
                                

In [8]:
# Wait for TrainingJob V3

training_job = TrainingJob.get(training_job_name = job_name_v3)
training_job.wait()

if training_job.training_job_status == "Failed":
    print(training_job.failure_reason)

{'AlgorithmSpecification': {'EnableSageMakerMetricsTimeSeries': False,
                            'MetricDefinitions': [{'Name': 'train:mae',
                                                   'Regex': '.*\\[[0-9]+\\].*#011train-mae:([-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?).*'},
                                                  {'Name': 'train:merror',
                                                   'Regex': '.*\\[[0-9]+\\].*#011train-merror:([-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?).*'},
                                                  {'Name': 'validation:mae',
                                                   'Regex': '.*\\[[0-9]+\\].*#011validation-mae:([-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?).*'},
                                                  {'Name': 'validation:logloss',
                                                   'Regex': '.*\\[[0-9]+\\].*#011validation-logloss:([-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?).*'},
                                               

In [9]:
# List TrainingJobs V3
import datetime 
creation_time_after = datetime.datetime.now() - datetime.timedelta(days=1)

for job in TrainingJob.get_all(creation_time_after=creation_time_after):
    job.refresh()
    print(job.training_job_name, job.training_job_status)


xgboost-iris-2024-05-24-18-23-24 Failed
xgboost-iris-2024-05-24-18-17-55 Failed
xgboost-iris-2024-05-24-18-17-43 Completed
xgboost-iris-2024-05-24-18-11-30 Failed
xgboost-iris-2024-05-24-18-09-10 Failed
xgboost-iris-2024-05-24-18-07-20 Failed
xgboost-iris-2024-05-24-17-57-52 Completed


In [None]:
from src.generated.shapes import ClusterInstanceGroupSpecification, ClusterLifeCycleConfig
# Creating TrainingJob using some inputs from Config File - Intelligent Defaults

import os
import time
from src.generated.resources import TrainingJob, AlgorithmSpecification, Channel, DataSource, S3DataSource, OutputDataConfig, ResourceConfig, StoppingCondition, Cluster

# Setting path of Config file in environment variable 
os.environ['SAGEMAKER_ADMIN_CONFIG_OVERRIDE'] = '/Users/nargokul/workspace/sagemaker-code-gen/sample/sagemaker/2017-07-24/default-configs.json'

# Generating names for resources
job_name_v3 = 'xgboost-iris-' + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
cluster_name_v3 = 'xgboost-cluster-' + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())


# This will create a Cluster - one that does not have default configs in the default-configs.json and will use values from Global Defaults
cluster = Cluster.create(
    cluster_name=cluster_name_v3,
    instance_groups=[ClusterInstanceGroupSpecification(instance_count=1, instance_group_name="instance-group-11", instance_type="ml.m5.4xlarge", life_cycle_config=ClusterLifeCycleConfig(source_s3_uri=s3_input_path, on_create="dothis"),execution_role=role )
    ]
)

# This will create a Training Job using specific VPC Config present in the default configs JSON
training_job = TrainingJob.create(
    training_job_name=job_name_v3,
    hyper_parameters={
        'objective': 'multi:softmax',
        'num_class': '3',
        'num_round': '10',
        'eval_metric': 'merror'
    },
    algorithm_specification=AlgorithmSpecification(
        training_image=image,
        training_input_mode='File'
    ),
    role_arn=role,
    input_data_config=[
        Channel(
            channel_name='train',
            content_type='csv',
            compression_type='None',
            record_wrapper_type='None',
            data_source=DataSource(
                s3_data_source=S3DataSource(
                    s3_data_type='S3Prefix',
                    s3_uri=s3_input_path,
                    s3_data_distribution_type='FullyReplicated'
                )
            )
        )
    ],
    output_data_config=OutputDataConfig(
        s3_output_path=s3_output_path
    ),
    resource_config=ResourceConfig(
        instance_type='ml.m4.xlarge',
        instance_count=1,
        volume_size_in_g_b=30
    ),
    stopping_condition=StoppingCondition(
        max_runtime_in_seconds=600
    )
)

