In [5]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import image_uris
from sagemaker.image_uris import retrieve, config_for_framework

role = get_execution_role()
sess = sagemaker.Session()

In [13]:
bucket = 'sagemaker-multi-label-data'
prefix = 'ic-multi-label'

training = 's3://{}/{}/training/'.format(bucket, prefix)
validation = 's3://{}/{}/validation/'.format(bucket, prefix)
output = 's3://{}/{}/output'.format(bucket, prefix)

In [14]:
train_data = sagemaker.inputs.TrainingInput(
    training, 
    distribution='FullyReplicated', 
    content_type='application/x-recordio', 
    s3_data_type='S3Prefix'
)

validation_data = sagemaker.inputs.TrainingInput(
    validation, 
    distribution='FullyReplicated', 
    content_type='application/x-recordio', 
    s3_data_type='S3Prefix'
)

data_channels = {'train': train_data, 'validation': validation_data}

In [15]:
training_image = retrieve('image-classification', sess.boto_region_name)

In [16]:
multilabel_ic = sagemaker.estimator.Estimator(
    training_image,
    role, 
    instance_count=1, 
    instance_type='ml.p3.2xlarge',
    output_path=output,
    sagemaker_session=sess
    )

In [19]:
multilabel_ic.set_hyperparameters(
    num_classes=334,
    num_training_samples=116945,
    augmentation_type = 'crop_color_transform',
    epochs=5,
    image_shape = "3,96,96",  
    learning_rate=0.001,
    mini_batch_size=256,
    multi_label=1,
    use_weighted_loss=1,
    optimizer='adam'
    )

In [20]:
multilabel_ic.fit(inputs=data_channels, logs=True)

2021-05-18 03:30:11 Starting - Starting the training job...
2021-05-18 03:30:33 Starting - Launching requested ML instancesProfilerReport-1621308610: InProgress
......
2021-05-18 03:31:33 Starting - Preparing the instances for training.........
2021-05-18 03:33:09 Downloading - Downloading input data
2021-05-18 03:33:09 Training - Downloading the training image.....[34mDocker entrypoint called with argument(s): train[0m
[34m[05/18/2021 03:33:53 INFO 139733071488832] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/image_classification/default-input.json: {'use_pretrained_model': 0, 'num_layers': 152, 'epochs': 30, 'learning_rate': 0.1, 'lr_scheduler_factor': 0.1, 'optimizer': 'sgd', 'momentum': 0, 'weight_decay': 0.0001, 'beta_1': 0.9, 'beta_2': 0.999, 'eps': 1e-08, 'gamma': 0.9, 'mini_batch_size': 32, 'image_shape': '3,224,224', 'precision_dtype': 'float32'}[0m
[34m[05/18/2021 03:33:53 INFO 139733071488832] Merging with provided configuration from /opt/m

[34m[05/18/2021 03:38:24 INFO 139733071488832] Epoch[1] Batch [140]#011Speed: 640.499 samples/sec#011accuracy=0.547297[0m
[34m[05/18/2021 03:38:32 INFO 139733071488832] Epoch[1] Batch [160]#011Speed: 640.512 samples/sec#011accuracy=0.548796[0m
[34m[05/18/2021 03:38:40 INFO 139733071488832] Epoch[1] Batch [180]#011Speed: 641.057 samples/sec#011accuracy=0.549306[0m
[34m[05/18/2021 03:38:48 INFO 139733071488832] Epoch[1] Batch [200]#011Speed: 641.222 samples/sec#011accuracy=0.549920[0m
[34m[05/18/2021 03:38:56 INFO 139733071488832] Epoch[1] Batch [220]#011Speed: 641.223 samples/sec#011accuracy=0.551999[0m
[34m[05/18/2021 03:39:04 INFO 139733071488832] Epoch[1] Batch [240]#011Speed: 640.817 samples/sec#011accuracy=0.554416[0m
[34m[05/18/2021 03:39:12 INFO 139733071488832] Epoch[1] Batch [260]#011Speed: 640.808 samples/sec#011accuracy=0.557612[0m
[34m[05/18/2021 03:39:20 INFO 139733071488832] Epoch[1] Batch [280]#011Speed: 640.868 samples/sec#011accuracy=0.559289[0m
[34m[05

[34m[05/18/2021 03:46:58 INFO 139733071488832] Epoch[3] Batch [440]#011Speed: 641.714 samples/sec#011accuracy=0.610623[0m
[34m[05/18/2021 03:47:04 INFO 139733071488832] Epoch[3] Train-accuracy=0.610851[0m
[34m[05/18/2021 03:47:04 INFO 139733071488832] Epoch[3] Time cost=181.531[0m
[34m[05/18/2021 03:47:19 INFO 139733071488832] Epoch[3] Validation-accuracy=0.628287[0m
[34m[05/18/2021 03:47:20 INFO 139733071488832] Storing the best model with validation accuracy: 0.628287[0m
[34m[05/18/2021 03:47:20 INFO 139733071488832] Saved checkpoint to "/opt/ml/model/image-classification-0004.params"[0m
[34m[05/18/2021 03:47:28 INFO 139733071488832] Epoch[4] Batch [20]#011Speed: 631.143 samples/sec#011accuracy=0.621063[0m
[34m[05/18/2021 03:47:36 INFO 139733071488832] Epoch[4] Batch [40]#011Speed: 638.043 samples/sec#011accuracy=0.617473[0m
[34m[05/18/2021 03:47:44 INFO 139733071488832] Epoch[4] Batch [60]#011Speed: 640.098 samples/sec#011accuracy=0.616857[0m
[34m[05/18/2021 03:47