In [None]:
import json
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri

role = get_execution_role()
sess = sagemaker.Session()

training_image = get_image_uri(sess.boto_region_name, "object-detection", repo_version="latest")

### Upload Augmented Manifest files to S3

In [None]:
bucket = 'sagemaker-facemasks-object-detection'
prefix = 'facemask-detection'

s3_output_location = "s3://{}/{}/output".format(bucket, prefix)

In [None]:
train_channel = prefix + "/train"
validation_channel = prefix + "/validation"

sess.upload_data(path="training/augmented.manifest", bucket=bucket, key_prefix=train_channel)
sess.upload_data(path="validation/augmented.manifest", bucket=bucket, key_prefix=validation_channel)

s3_train_data = "s3://{}/{}/augmented.manifest".format(bucket, train_channel)
s3_validation_data = "s3://{}/{}/augmented.manifest".format(bucket, validation_channel)


### Define Training Inputs

In [None]:
train_data = sagemaker.inputs.TrainingInput(
    s3_train_data,
    distribution="FullyReplicated",
    content_type="application/x-recordio",
    s3_data_type="AugmentedManifestFile",
    attribute_names=['source-ref', 'bounding-box'],
    record_wrapping='RecordIO'
)

validation_data = sagemaker.inputs.TrainingInput(
    s3_validation_data,
    distribution="FullyReplicated",
    content_type="application/x-recordio",
    s3_data_type="AugmentedManifestFile",
    attribute_names=['source-ref', 'bounding-box'],
    record_wrapping='RecordIO'
)
data_channels = {"train": train_data, "validation": validation_data}

In [None]:
def get_training_records(local_path):
    with open(local_path, 'r') as f:
        records = f.readlines()
    return [json.loads(lb) for lb in records]

In [None]:
train_set = get_training_records('training/augmented.manifest')
max([len(lb['bounding-box']['annotations']) for lb in train_set])

### Estimater and Hyperparameters

In [None]:
od_model = sagemaker.estimator.Estimator(
    training_image,
    role,
    instance_count=1,
    instance_type="ml.p3.2xlarge",
    input_mode="Pipe",
    output_path=s3_output_location,
    sagemaker_session=sess
)

num_classes = 3
num_epochs = 100
lr_steps = "33,67" #reduce `learning_rate` by `lr_scheduler_factor` at epoch `33` and `67`
num_training_samples = len(get_training_records('training/augmented.manifest'))

od_model.set_hyperparameters(
    base_network="resnet-50",
    use_pretrained_model=1,
    num_classes=num_classes,
    mini_batch_size=16,
    epochs=num_epochs,
    learning_rate=0.001,
    #lr_scheduler_step=lr_steps,
    lr_scheduler_factor=0.1,
    optimizer="adam",
    momentum=0.9,
    #weight_decay=0.0005,
    #overlap_threshold=0.5,
    #nms_threshold=0.45,
    num_training_samples=num_training_samples,
    label_width=115*5 + 4 ## Required = For example, if one image in the data contains at most 10 objects, and each object's annotation is specified with 5 numbers, [class_id, left, top, width, height], then the label_width should be no smaller than (10*5 + header information length). The header information length is usually 2
)

### Train Model

In [None]:
od_model.fit(inputs=data_channels, logs='All')