In [1]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import image_uris
from sagemaker.image_uris import retrieve, config_for_framework

role = get_execution_role()
sess = sagemaker.Session()

In [2]:
bucket = 'sagemaker-multi-label-data'
prefix = '50-label-rs96'

training = 's3://{}/{}/training/'.format(bucket, prefix)
validation = 's3://{}/{}/validation/'.format(bucket, prefix)
output = 's3://{}/{}/output'.format(bucket, prefix)

In [3]:
train_data = sagemaker.inputs.TrainingInput(
    training, 
    distribution='FullyReplicated', 
    content_type='application/x-recordio', 
    s3_data_type='S3Prefix'
)

validation_data = sagemaker.inputs.TrainingInput(
    validation, 
    distribution='FullyReplicated', 
    content_type='application/x-recordio', 
    s3_data_type='S3Prefix'
)

data_channels = {'train': train_data, 'validation': validation_data}

In [4]:
training_image = retrieve('image-classification', sess.boto_region_name)

In [5]:
multilabel_ic = sagemaker.estimator.Estimator(
    training_image,
    role, 
    instance_count=1, 
    instance_type='ml.p2.8xlarge',
    output_path=output,
    sagemaker_session=sess
    )

In [6]:
multilabel_ic.set_hyperparameters(
    num_classes=50,
    num_training_samples=98378,
    augmentation_type = 'crop_color_transform',
    epochs=30,
    image_shape = "3,96,96",  
    learning_rate=0.00001,
    mini_batch_size=256,
    multi_label=1,
    use_weighted_loss=1,
    optimizer='adam'
    )

In [None]:
multilabel_ic.fit(inputs=data_channels, logs=True)

2021-05-26 23:45:58 Starting - Starting the training job...
2021-05-26 23:45:59 Starting - Launching requested ML instancesProfilerReport-1622072757: InProgress
.........
2021-05-26 23:47:54 Starting - Preparing the instances for training......
2021-05-26 23:48:57 Downloading - Downloading input data...
2021-05-26 23:49:23 Training - Downloading the training image......
2021-05-26 23:50:20 Training - Training image download completed. Training in progress.[34mDocker entrypoint called with argument(s): train[0m
[34m[05/26/2021 23:50:24 INFO 139821163624256] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/image_classification/default-input.json: {'use_pretrained_model': 0, 'num_layers': 152, 'epochs': 30, 'learning_rate': 0.1, 'lr_scheduler_factor': 0.1, 'optimizer': 'sgd', 'momentum': 0, 'weight_decay': 0.0001, 'beta_1': 0.9, 'beta_2': 0.999, 'eps': 1e-08, 'gamma': 0.9, 'mini_batch_size': 32, 'image_shape': '3,224,224', 'precision_dtype': 'float32'}[0m
[3

[34m[05/26/2021 23:56:16 INFO 139821163624256] Epoch[1] Batch [240]#011Speed: 563.535 samples/sec#011accuracy=0.523261[0m
[34m[05/26/2021 23:56:25 INFO 139821163624256] Epoch[1] Batch [260]#011Speed: 563.614 samples/sec#011accuracy=0.524761[0m
[34m[05/26/2021 23:56:34 INFO 139821163624256] Epoch[1] Batch [280]#011Speed: 563.748 samples/sec#011accuracy=0.526316[0m
[34m[05/26/2021 23:56:43 INFO 139821163624256] Epoch[1] Batch [300]#011Speed: 563.835 samples/sec#011accuracy=0.527659[0m
[34m[05/26/2021 23:56:52 INFO 139821163624256] Epoch[1] Batch [320]#011Speed: 563.868 samples/sec#011accuracy=0.528523[0m
[34m[05/26/2021 23:57:01 INFO 139821163624256] Epoch[1] Batch [340]#011Speed: 563.807 samples/sec#011accuracy=0.529271[0m
[34m[05/26/2021 23:57:10 INFO 139821163624256] Epoch[1] Batch [360]#011Speed: 563.721 samples/sec#011accuracy=0.529811[0m
[34m[05/26/2021 23:57:19 INFO 139821163624256] Epoch[1] Batch [380]#011Speed: 563.843 samples/sec#011accuracy=0.530120[0m
[34m[05

[34m[05/27/2021 00:06:13 INFO 139821163624256] Epoch[4] Batch [220]#011Speed: 563.065 samples/sec#011accuracy=0.564738[0m
[34m[05/27/2021 00:06:23 INFO 139821163624256] Epoch[4] Batch [240]#011Speed: 563.221 samples/sec#011accuracy=0.565153[0m
[34m[05/27/2021 00:06:32 INFO 139821163624256] Epoch[4] Batch [260]#011Speed: 563.353 samples/sec#011accuracy=0.565810[0m
[34m[05/27/2021 00:06:41 INFO 139821163624256] Epoch[4] Batch [280]#011Speed: 563.368 samples/sec#011accuracy=0.566448[0m
[34m[05/27/2021 00:06:50 INFO 139821163624256] Epoch[4] Batch [300]#011Speed: 563.424 samples/sec#011accuracy=0.566973[0m
[34m[05/27/2021 00:06:59 INFO 139821163624256] Epoch[4] Batch [320]#011Speed: 563.488 samples/sec#011accuracy=0.567355[0m
[34m[05/27/2021 00:07:08 INFO 139821163624256] Epoch[4] Batch [340]#011Speed: 563.293 samples/sec#011accuracy=0.567608[0m
[34m[05/27/2021 00:07:17 INFO 139821163624256] Epoch[4] Batch [360]#011Speed: 563.372 samples/sec#011accuracy=0.567770[0m
[34m[05

[34m[05/27/2021 00:15:53 INFO 139821163624256] Epoch[7] Batch [180]#011Speed: 561.420 samples/sec#011accuracy=0.592790[0m
[34m[05/27/2021 00:16:02 INFO 139821163624256] Epoch[7] Batch [200]#011Speed: 561.669 samples/sec#011accuracy=0.592940[0m
[34m[05/27/2021 00:16:11 INFO 139821163624256] Epoch[7] Batch [220]#011Speed: 561.654 samples/sec#011accuracy=0.593251[0m
[34m[05/27/2021 00:16:20 INFO 139821163624256] Epoch[7] Batch [240]#011Speed: 561.638 samples/sec#011accuracy=0.593558[0m
[34m[05/27/2021 00:16:29 INFO 139821163624256] Epoch[7] Batch [260]#011Speed: 561.691 samples/sec#011accuracy=0.594065[0m
[34m[05/27/2021 00:16:38 INFO 139821163624256] Epoch[7] Batch [280]#011Speed: 561.680 samples/sec#011accuracy=0.594530[0m
[34m[05/27/2021 00:16:48 INFO 139821163624256] Epoch[7] Batch [300]#011Speed: 561.697 samples/sec#011accuracy=0.595117[0m
[34m[05/27/2021 00:16:57 INFO 139821163624256] Epoch[7] Batch [320]#011Speed: 561.815 samples/sec#011accuracy=0.595490[0m
[34m[05

[34m[05/27/2021 00:25:33 INFO 139821163624256] Epoch[10] Batch [140]#011Speed: 559.833 samples/sec#011accuracy=0.613886[0m
[34m[05/27/2021 00:25:42 INFO 139821163624256] Epoch[10] Batch [160]#011Speed: 560.280 samples/sec#011accuracy=0.614691[0m
[34m[05/27/2021 00:25:51 INFO 139821163624256] Epoch[10] Batch [180]#011Speed: 560.779 samples/sec#011accuracy=0.615101[0m
[34m[05/27/2021 00:26:00 INFO 139821163624256] Epoch[10] Batch [200]#011Speed: 561.131 samples/sec#011accuracy=0.615219[0m
[34m[05/27/2021 00:26:09 INFO 139821163624256] Epoch[10] Batch [220]#011Speed: 561.208 samples/sec#011accuracy=0.615494[0m
[34m[05/27/2021 00:26:18 INFO 139821163624256] Epoch[10] Batch [240]#011Speed: 561.363 samples/sec#011accuracy=0.615576[0m
[34m[05/27/2021 00:26:27 INFO 139821163624256] Epoch[10] Batch [260]#011Speed: 561.648 samples/sec#011accuracy=0.615863[0m
[34m[05/27/2021 00:26:38 INFO 139821163624256] Epoch[10] Batch [280]#011Speed: 555.209 samples/sec#011accuracy=0.616330[0m


[34m[05/27/2021 00:35:09 INFO 139821163624256] Epoch[13] Batch [80]#011Speed: 560.479 samples/sec#011accuracy=0.626906[0m
[34m[05/27/2021 00:35:18 INFO 139821163624256] Epoch[13] Batch [100]#011Speed: 561.132 samples/sec#011accuracy=0.626259[0m
[34m[05/27/2021 00:51:16 INFO 139821163624256] Epoch[17] Validation-accuracy=0.653345[0m
[34m[05/27/2021 00:51:18 INFO 139821163624256] Storing the best model with validation accuracy: 0.653345[0m
[34m[05/27/2021 00:51:18 INFO 139821163624256] Saved checkpoint to "/opt/ml/model/image-classification-0018.params"[0m
[34m[05/27/2021 00:51:27 INFO 139821163624256] Epoch[18] Batch [20]#011Speed: 555.844 samples/sec#011accuracy=0.643523[0m
[34m[05/27/2021 00:51:36 INFO 139821163624256] Epoch[18] Batch [40]#011Speed: 560.411 samples/sec#011accuracy=0.642732[0m
[34m[05/27/2021 00:51:45 INFO 139821163624256] Epoch[18] Batch [60]#011Speed: 561.666 samples/sec#011accuracy=0.642289[0m
[34m[05/27/2021 00:51:54 INFO 139821163624256] Epoch[18]

[34m[05/27/2021 01:00:49 INFO 139821163624256] Epoch[20] Batch [380]#011Speed: 562.187 samples/sec#011accuracy=0.648731[0m
[34m[05/27/2021 01:00:50 INFO 139821163624256] Epoch[20] Train-accuracy=0.648753[0m
[34m[05/27/2021 01:00:50 INFO 139821163624256] Epoch[20] Time cost=174.396[0m
[34m[05/27/2021 01:01:13 INFO 139821163624256] Epoch[20] Validation-accuracy=0.658321[0m
[34m[05/27/2021 01:01:24 INFO 139821163624256] Epoch[21] Batch [20]#011Speed: 554.376 samples/sec#011accuracy=0.652132[0m
[34m[05/27/2021 01:01:33 INFO 139821163624256] Epoch[21] Batch [40]#011Speed: 559.654 samples/sec#011accuracy=0.651374[0m
[34m[05/27/2021 01:01:42 INFO 139821163624256] Epoch[21] Batch [60]#011Speed: 561.849 samples/sec#011accuracy=0.651183[0m
[34m[05/27/2021 01:01:51 INFO 139821163624256] Epoch[21] Batch [80]#011Speed: 562.431 samples/sec#011accuracy=0.650005[0m
[34m[05/27/2021 01:02:00 INFO 139821163624256] Epoch[21] Batch [100]#011Speed: 563.037 samples/sec#011accuracy=0.649179[