In [None]:
import sagemaker
from sagemaker import get_execution_role

role = get_execution_role()
sess = sagemaker.Session()
bucket=sess.default_bucket()
prefix = 'lego-classification'

In [None]:
import boto3
import json
ssm = boto3.client('ssm')

# expected format: {"username":"xx","key":"xxx"}
kaggleAPI = ssm.get_parameter(
    Name='kaggleAPI'
)
kaggleAPI = json.loads(kaggleAPI.get("Parameter")["Value"])

In [None]:
!pip -q install kaggle

In [None]:
from os import environ
environ["KAGGLE_USERNAME"] = kaggleAPI["username"]
environ["KAGGLE_KEY"] = kaggleAPI["key"]

![ -z "lego-brick-images.zip" ] && rm lego-brick-images.zip

!kaggle datasets download --force joosthazelzet/lego-brick-images
!unzip -oq lego-brick-images.zip

In [None]:
!wget 'https://raw.githubusercontent.com/apache/incubator-mxnet/master/tools/im2rec.py'

In [None]:
training_image_folder = "./LEGO brick images/train"
validation_image_folder = "./LEGO brick images/valid"

# generate .rec database
!python im2rec.py lego_train "$training_image_folder" --list --recursive --pass-through --pack-label 
!python im2rec.py lego_train "$training_image_folder" --recursive --pass-through --pack-label 
!python im2rec.py lego_test "$validation_image_folder" --list --recursive --pass-through --pack-label 
!python im2rec.py lego_test "$validation_image_folder" --recursive --pass-through --pack-label 

In [None]:
# Upload the RecordIO files to train and validation channels
train_channel = prefix + '/train'
validation_channel = prefix + '/validation'

sess.upload_data(path='lego_train.rec', bucket=bucket, key_prefix=train_channel)
sess.upload_data(path='lego_test.rec', bucket=bucket, key_prefix=validation_channel)

s3_train_data = 's3://{}/{}'.format(bucket, train_channel)
s3_validation_data = 's3://{}/{}'.format(bucket, validation_channel)
s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)

In [None]:
train_data = sagemaker.session.s3_input(s3_train_data, distribution='FullyReplicated', 
                        content_type='application/x-recordio', s3_data_type='S3Prefix')
validation_data = sagemaker.session.s3_input(s3_validation_data, distribution='FullyReplicated', 
                             content_type='application/x-recordio', s3_data_type='S3Prefix')

In [None]:
import os

num_classes = len(os.listdir(training_image_folder))
num_training_samples = sum([len(files) for r, d, files in os.walk(training_image_folder)])

print("num_classes:{}".format(num_classes))
print("num_training_samples:{}".format(num_training_samples))

In [None]:
mini_batch_size_min = 16
mini_batch_size_max = 64
learning_rate_min = "0.0001"
learning_rate_max = "1.0"
optimizers = ['sgd', 'adam', 'rmsprop', 'nag']

In [None]:
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner 
from datetime import date

training_image = get_image_uri(sess.boto_region_name, 'image-classification', repo_version="latest")
# training_image = get_image_uri(sess.boto_region_name, 'object-detection', repo_version="latest")

ic = sagemaker.estimator.Estimator(
    training_image,
    role, 
    train_instance_count=1, 
    train_instance_type='ml.p3.8xlarge', 
    input_mode= 'File',
    output_path=s3_output_location,
    sagemaker_session=sess,
    base_job_name= 'lego-classification'
)

layers=34 # [18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110]
epochs=1000

ic.set_hyperparameters(
    num_layers=layers,
    num_classes=num_classes,
    num_training_samples=num_training_samples,
    image_shape = "3,200,200",
# mini_batch_size=4,
    epochs=epochs,
# learning_rate=0.001,
     top_k=5,
     precision_dtype='float32',
     use_pretrained_model=0
)

# maximum number of training jobs
hpo_max_number_of_training_jobs = 50
# maximum number of parallel training jobs
hpo_max_number_of_parallel_jobs = 2
hpo_objective_metric_name = 'validation:accuracy'

hpo_hyperparameter_ranges = \
{
    'learning_rate': ContinuousParameter(learning_rate_min, learning_rate_max),
    'mini_batch_size': IntegerParameter(mini_batch_size_min, mini_batch_size_max),
    'optimizer': CategoricalParameter(optimizers)
}
job_name = "lego-classification-{}".format(date.today())

tuner_es = HyperparameterTuner(ic, 
                               hpo_objective_metric_name, 
                               hpo_hyperparameter_ranges,
                               objective_type='Maximize', 
                               max_jobs=hpo_max_number_of_training_jobs, 
                               max_parallel_jobs=hpo_max_number_of_parallel_jobs, 
                               early_stopping_type='Auto',
                               strategy="Random"
                              )


In [None]:
data_channels = {'train': train_data, 'validation': validation_data}

# run the hyperparameter tuning job
tuner_es.fit(data_channels, job_name=job_name, include_cls_metadata=False)

print('Hyperparameter Tuning job name: {}'.format(job_name))

In [None]:
tuner_es.wait()

In [None]:
from sagemaker import HyperparameterTuningJobAnalytics
tuner_metrics_es = HyperparameterTuningJobAnalytics(job_name)
tuner_metrics_es.dataframe().sort_values(['FinalObjectiveValue'], ascending=False).head(5)

In [None]:
best_training_job_name = tuner_es.best_training_job()

In [None]:
best_training_job_name

In [None]:
endpoint_name = sess.endpoint_from_job(
    job_name=best_training_job_name,
    initial_instance_count=1,
    instance_type='ml.m4.xlarge',
    deployment_image=training_image,
    role=role
)

In [None]:
# attached_estimator = sagemaker.estimator.Estimator.attach(best_training_job_name)
# attached_estimator.deploy(initial_instance_count = 1,
#                           instance_type = 'ml.c5.4xlarge')
predictor = sagemaker.predictor.RealTimePredictor(endpoint_name)

In [None]:
!mkdir test/
!unzip -o data/lego_photos.zip -d test/

In [None]:
import os
import json
import numpy as np
from IPython.display import display, Image
names = [f for f in os.listdir('test')]
for name in names:
    print("image: {}".format(name))
    display(Image('test/' + name, width=100))
    with open('test/' + name, 'rb') as f:
        payload = f.read()
        payload = bytearray(payload)
    predictor.content_type = 'application/x-image'
    result = json.loads(predictor.predict(payload))
    index = np.argmax(result)
    print("predicted index: {}".format(index))
