In [1]:
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri

In [42]:
bucket='sagemaker-builtin-imageclass-cancer-detection'
input_prefix='breast-cancer-detection/input/recordio'
s3_train='s3://{}/{}/train'.format(bucket,input_prefix)
s3_validation='s3://{}/{}/test'.format(bucket,input_prefix)

In [43]:
output_prefix='breast-cancer-detection/output'
output_path='s3://{}/{}/'.format(bucket,output_prefix)

In [44]:
# Cofiguring the hyperparameters

num_classes=2
num_trainnig_samples=194266
num_layers=18
mini_batch_size=128
image_shape='3,50,50'
augmentation_type='crop_color_transform'
epochs=5
learning_rate=0.01
use_pretrained_model=0



In [45]:
instance_count=1
instance_type='ml.p2.16xlarge'
volume_size_gb=50

In [46]:
role=get_execution_role()
training_image=get_image_uri(boto3.Session().region_name,'image-classification')

'get_image_uri' method will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.


In [47]:
train_timeout=360000


In [48]:
session=sagemaker.Session()


In [49]:
estimator=sagemaker.estimator.Estimator(training_image,
                                       role,
                                       train_instance_count=instance_count,
                                       train_instance_type=instance_type,
                                       train_volume_size=volume_size_gb,
                                       train_max_run=train_timeout,
                                       output_path=output_path,
                                       base_job_name='sm-builtin-img-classification',
                                       sagemaker_session=session
                                       )

Parameter image_name will be renamed to image_uri in SageMaker Python SDK v2.


In [53]:
estimator.set_hyperparameters(num_classes=num_classes,
                             num_training_samples=num_trainnig_samples,
                             num_layers=num_layers,
                             mini_batch_size=mini_batch_size,
                             image_shape=image_shape,
                             augmentation_type=augmentation_type,
                             epochs=epochs,
                             learning_rate=learning_rate,
                             use_pretrained_model=use_pretrained_model)

In [54]:
train_data = sagemaker.session.s3_input(s3_train, distribution='FullyReplicated', 
                        content_type='application/x-recordio', s3_data_type='S3Prefix')
validation_data = sagemaker.session.s3_input(s3_validation, distribution='FullyReplicated', 
                             content_type='application/x-recordio', s3_data_type='S3Prefix')

data_channels = {'train': train_data, 'validation': validation_data}

estimator.fit(inputs=data_channels,
             )

's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.
's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.


2020-10-06 11:15:41 Starting - Starting the training job...
2020-10-06 11:15:43 Starting - Launching requested ML instances......
2020-10-06 11:17:10 Starting - Preparing the instances for training...............
2020-10-06 11:19:21 Downloading - Downloading input data...
2020-10-06 11:19:45 Training - Downloading the training image..[34mDocker entrypoint called with argument(s): train[0m
[34m[10/06/2020 11:20:18 INFO 140401831143232] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/image_classification/default-input.json: {u'beta_1': 0.9, u'gamma': 0.9, u'beta_2': 0.999, u'optimizer': u'sgd', u'use_pretrained_model': 0, u'eps': 1e-08, u'epochs': 30, u'lr_scheduler_factor': 0.1, u'num_layers': 152, u'image_shape': u'3,224,224', u'precision_dtype': u'float32', u'mini_batch_size': 32, u'weight_decay': 0.0001, u'learning_rate': 0.1, u'momentum': 0}[0m
[34m[10/06/2020 11:20:18 INFO 140401831143232] Merging with provided configuration from /opt/ml/input/config