In [None]:
import os
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import random
import cv2
from lxml import etree
from datetime import datetime


In [None]:
# We need SageMaker SDK >1.50.12 for imx8qm DLR (https://github.com/aws/sagemaker-python-sdk/blob/master/CHANGELOG.md#v15012-2020-02-17)
!pip install --upgrade sagemaker --quiet
from sagemaker.mxnet.estimator import MXNet
import sagemaker

## Visualize the images

In [None]:
data_dir = './dataset'
images_dir = os.path.join(data_dir,'VOC2019','JPEGImages')
train_images = glob.glob(images_dir + "/*")

In [None]:
print("We have {} images".format(len(train_images)))

Let's see how they look like. We use matplotlib to plot 9 images from the dataset to get a feel for what they are

In [None]:
n_images = 9
cols = (int(math.sqrt(n_images)))
fig = plt.figure(figsize=(15,10))
for n, (image) in enumerate(train_images[:n_images]):
    image = plt.imread(image)
    a = fig.add_subplot(np.ceil(n_images/float(cols)), cols, n + 1)
    plt.imshow(image)
    plt.axis('off')
plt.subplots_adjust(wspace=0.06, hspace=0.06)
plt.show()

In [None]:
n_images = 4
cols = (int(math.sqrt(n_images)))
fig = plt.figure(figsize=(15,10))
for i in range(n_images):
    random_image = train_images[random.randint(0, len(train_images)-1)]
    filename = os.path.basename(random_image).split('.')[0]
    annotation_file = "{}/VOC2019/Annotations/{}.xml".format(data_dir,filename)
    image = plt.imread(random_image)
    tree = etree.parse(annotation_file)
    for e in range(len(tree.xpath('/annotation/object'))):
        name = tree.xpath('/annotation/object/name')[e].text
        xmin = tree.xpath('/annotation/object/bndbox/xmin')[e].text
        ymin = tree.xpath('/annotation/object/bndbox/ymin')[e].text
        xmax = tree.xpath('/annotation/object/bndbox/xmax')[e].text
        ymax = tree.xpath('/annotation/object/bndbox/ymax')[e].text
        cv2.rectangle(image, (int(float(xmin)), int(float(ymin))), (int(float(xmax)), int(float(ymax))), (0,255,0), 3)
        cv2.putText(image, str(name), (int(float(xmax)), int(float(ymax))), 1, 3, (255,0,0), 3)
        
    a = fig.add_subplot(np.ceil(n_images/float(cols)), cols, i + 1)
    plt.imshow(image)
    plt.axis('off')    
plt.subplots_adjust(wspace=0.06, hspace=0.06)
plt.show()    

## First run of estimator with small epochs

In [None]:
session = sagemaker.session.Session()
default_s3_bucket = 's3://{}'.format(session.default_bucket())
print('default_s3_bucket: {}'.format(default_s3_bucket))

In [None]:
role = sagemaker.get_execution_role()
print("Using IAM role arn: {}".format(role))

# create a descriptive job name 
job_name_prefix = 'pasta-ssd-voc'
print("job_name_prefix: {}".format(job_name_prefix))

s3_output_path = '{}/{}/output'.format(default_s3_bucket,job_name_prefix)
print("S3 model output path: {}".format(s3_output_path))

training_images = session.upload_data('data_dir', key_prefix='{}/dataset'.format(job_name_prefix))
print("Training images S3 Uri: {}".format(training_images))

In [None]:
static_hyperparameters = {
    'epochs': 2,
    'num-workers': 12,
    'network': 'mobilenet1.0',
    'data-shape': 512
}
print("static_hyperparameters: {}".format(static_hyperparameters))


In [None]:
instance_type = "ml.p3.2xlarge"
estimator = MXNet(entry_point="train_ssd.py",
                  role=role,
                  train_instance_type=instance_type,
                  train_instance_count=1,
                  output_path=s3_output_path,
                  framework_version="1.4.1",
                  py_version='py3',
                  base_job_name=job_name_prefix,
                  hyperparameters=static_hyperparameters
                 )

In [None]:
estimator.fit({
    "train": training_images
})

### Hyperparameter Tuner job

In [None]:
static_hyperparameters = {
    'epochs': 50,
    'num-workers': 12,
    'network': 'mobilenet1.0', # this will be configured in the HPO
    'data-shape': 512
}
print("static_hyperparameters: {}".format(static_hyperparameters))

metric_definitions = [
    {'Name': 'validation_mAP', 'Regex': 'best mAP ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
    {'Name': 'run_validation_mAP', 'Regex': 'running mAP ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}]

instance_type = "ml.p3.2xlarge"

In [None]:
instance_type = "ml.p3.2xlarge"
estimator = MXNet(entry_point="train_ssd.py",
                  role=role,
                  train_instance_type=instance_type,
                  train_instance_count=1,
                  output_path=s3_output_path,
                  framework_version="1.4.1",
                  py_version='py3',
                  base_job_name=job_name_prefix,
                  hyperparameters=static_hyperparameters
                 )

In [None]:
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

# The hyperparameters we're going to tune
hyperparameter_ranges = {
    'lr': ContinuousParameter(0.0001, 0.002), # learning rate, how much should the model learn from the current iteration ( < 0.01 )
    'wd': ContinuousParameter(0.00001, 0.00005), # Weight decay: Regularization to force small weights ( < 0.001 )
#     'model': CategoricalParameter(["ssd_512_mobilenet1.0_voc", "ssd_512_resnet50_v1_voc"]), # ssd_512_resnet50_v1_voc is causing cudaMalloc failed: out of memory, stick to mobilenet1.0 for now till figure out the cause
}

In [None]:
max_jobs = 4
max_parallel_jobs = 2 # the account limits for the instance type

In [None]:
tuner = HyperparameterTuner(estimator,
                            objective_metric_name='validation_mAP',
                            objective_type='Maximize',
                            hyperparameter_ranges=hyperparameter_ranges,
                            metric_definitions=metric_definitions,
                            max_jobs=max_jobs,
                            max_parallel_jobs=max_parallel_jobs,
                            base_tuning_job_name=job_name_prefix
                           )
tuner.fit({"train":training_images})

In [None]:
job_name = tuner.latest_tuning_job.job_name
print("Tuning job: %s" % job_name)

In [None]:
print("You can monitor the progress of your jobs here: https://console.aws.amazon.com/sagemaker/home?region={}#/hyper-tuning-jobs/{}".format(session._region_name,job_name))

In [None]:
session.wait_for_tuning_job(job_name)

In [None]:
job_name

# Compile model from best training job

In [None]:
best_job = tuner.best_training_job()
best_job

In [None]:
best_job_model_s3_uri = session.describe_training_job(best_job).get("ModelArtifacts").get("S3ModelArtifacts")
pasta_model = sagemaker.model.Model(best_job_model_s3_uri, None)
compiled_output_path = '{}/{}/output'.format(default_s3_bucket,job_name_prefix)

In [None]:
compilation_job_name = "{}-{}".format(job_name_prefix, datetime.now().strftime("%Y%m%d-%H%M%S"))

optimized_ic = pasta_model.compile(
    target_instance_family='imx8qm', 
    input_shape={'data':[1, 3, 224, 224]},  # Batch size 1, 3 channels, 224x224 Images.
    output_path=compiled_output_path,
    role=role,
    job_name=compilation_job_name,
    framework='mxnet', framework_version='1.2.1')
