## Global vars/imports

In [None]:
base_dir='/tmp/cars'
model_dir=base_dir + '/model'
dataset_dir='https://workshopml.spock.cloud/datasets/cars'
pre_trained_model='https://workshopml.spock.cloud/models/cars/model.tar.gz'

In [None]:
# kernel conda_mxnet_p36

%matplotlib inline
import json
import logging
import os
import time
import numpy as np
import mxnet as mx
import sagemaker
import cv2
import pandas as pd
import tarfile
import inspect
import random
import boto3
import matplotlib.pyplot as plt

from sagemaker.mxnet import MXNet

from PIL import Image
from sagemaker.predictor import json_serializer, json_deserializer
from collections import namedtuple

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
print(mx.__version__)

## MxNet container functions

In [None]:
# Training method
def train(current_host, hosts, num_cpus, num_gpus, channel_input_dirs, model_dir, hyperparameters, **kwargs):
    
    # Helper class for keeping track of accuracy
    class EpochCheckpoint(object):
        def __init__(self):
            self.best_epoch_accuracy = 0.0
            self.best_epoch_id = -1

        def __call__(self, param): 
            if param.eval_metric is not None:
                name_value = param.eval_metric.get_name_value()
                epoch_accuracy = list(i[1] for i in name_value)[0]

                if epoch_accuracy > self.best_epoch_accuracy:
                    self.best_epoch_accuracy = epoch_accuracy
                    self.best_epoch_id = param.epoch+1
    
    print(hyperparameters)
    # retrieve the hyperparameters we set in notebook (with some defaults)
    batch_size = hyperparameters.get('batch_size', 128)
    epochs = hyperparameters.get('epochs', 100)
    learning_rate = hyperparameters.get('learning_rate', 0.00007)
    log_interval = hyperparameters.get('log_interval', 50)
    data_dir = channel_input_dirs['training']

    ctx = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    mx.random.seed(127)
    num_classes=196

    # Download a pre trained ImageNet 11k
    path='http://data.mxnet.io/models/imagenet-11k/'
    [mx.test_utils.download(path+'resnet-152/resnet-152-symbol.json', dirname=base_dir),
     mx.test_utils.download(path+'resnet-152/resnet-152-0000.params', dirname=base_dir),
     mx.test_utils.download(path+'synset.txt', dirname=base_dir)]   
    
    sym, arg_params, aux_params = mx.model.load_checkpoint(base_dir + '/resnet-152', 0)
    mod = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
    mod.bind(for_training=False, data_shapes=[('data', (1,3,224,224))], 
             label_shapes=mod._label_shapes)
    mod.set_params(arg_params, aux_params, allow_missing=True)
    
    # slicing the trained resnet to create the Frankenstein
    all_layers = sym.get_internals()
    net = all_layers['flatten0_output']
    net = mx.symbol.Dropout(data=net, p=0.7, name='drop1')
    net = mx.symbol.FullyConnected(data=net, num_hidden=num_classes, name='fc1')
    net = mx.symbol.Dropout(data=net, p=0.2, name='drop1')
    net = mx.symbol.SVMOutput(data=net, name='svm')
    
    new_args = dict({k:arg_params[k] for k in arg_params if 'fc1' not in k})

    new_args['fc1_weight'] = mx.nd.zeros((num_classes,2048))
    new_args['fc1_bias'] = mx.nd.zeros((num_classes))

    w = arg_params['fc1_weight']
    b = arg_params['fc1_bias']
    
    # Load the new model
    deep_car_mod = mx.mod.Module(symbol=net, context=ctx, label_names=[ 'svm_label'])
    deep_car_mod.bind(for_training=True, data_shapes=[('data', (batch_size,3,224,224))], 
             label_shapes=[('svm_label', (batch_size,))])
    deep_car_mod.init_params(mx.initializer.Xavier(rnd_type="uniform", magnitude="2.34"))
    deep_car_mod.set_params(new_args, aux_params, allow_missing=False)
    
    train_data = get_train_data(data_dir, batch_size, (3, 224, 224))
    test_data = get_test_data(data_dir, batch_size, (3, 224, 224))

    model_filename_prefix = os.path.join(model_dir, 'cars_labels_model' )
    epoch_checkpoint = EpochCheckpoint()
    
    # train with the same
    deep_car_mod.fit(train_data,
        eval_data=test_data,
        optimizer='adam',
        optimizer_params={'learning_rate':learning_rate},
        eval_metric='acc',
        epoch_end_callback=mx.callback.do_checkpoint(model_filename_prefix),
        batch_end_callback=mx.callback.Speedometer(batch_size, log_interval),
        eval_end_callback=epoch_checkpoint,
        num_epoch=epochs,
        force_init=True)

    print("Best epoch id: %d - Accuracy: %f" % (epoch_checkpoint.best_epoch_id, epoch_checkpoint.best_epoch_accuracy ) )
    
    os.rename(model_filename_prefix + '-%04d.params' % epoch_checkpoint.best_epoch_id,
        model_filename_prefix + '-best.params' )

    return deep_car_mod

In [None]:
# Save the best accuracy model
def save(net, model_dir):
    # model_dir will be empty except on primary container
    files = os.listdir(model_dir)
    if files:
        for i in files:
            if i.endswith('params') and not i.endswith('best.params'):
                os.remove( os.path.join(model_dir, i) )
        os.rename( os.path.join(model_dir, 'cars_labels_model-best.params' ),
            os.path.join(model_dir, 'cars_labels_model-0000.params' ) )

In [None]:
# Return the test data iterator
def get_test_data(data_dir, batch_size, data_shape):
    return mx.image.ImageIter(
        batch_size=batch_size, 
        data_shape=data_shape,
        path_imglist=os.path.join(data_dir, 'cars_test.lst'),
        path_imgrec=os.path.join(data_dir, 'cars_test.rec') )


In [None]:
# Return the train data iterator
def get_train_data(data_dir, batch_size, data_shape):
    return mx.image.ImageIter(
        batch_size=batch_size, 
        data_shape=data_shape,
        path_imglist=os.path.join(data_dir, 'cars_train.lst'),
        path_imgrec=os.path.join(data_dir, 'cars_train.rec') )

In [None]:
# Validation method
def test(ctx, net, test_data):
    test_data.reset()
    metric = mx.metric.Accuracy()
    
    Batch = namedtuple('Batch', ['data'])
    
    outputs = []
    for i, batch in enumerate(test_data):
        label = batch.label
        for img in batch.data:
            net.forward(Batch([img]))
            outputs.append(net.get_outputs())
        metric.update(label, outputs)
     
    return metric.get()

In [None]:
# Load the saved model and return it
def model_fn(model_dir):
    """
    Load the model. Called once when hosting service starts.

    :param: model_dir The directory where model files are stored.
    :return: a model (in this case a network)
    """
    
    batch_size=1
    sym, arg_params, aux_params = mx.model.load_checkpoint(os.path.join(model_dir, 'cars_labels_model'), 0)
    deep_car_mod = mx.mod.Module(symbol=sym, context=mx.cpu(), label_names=[ 'svm_label'])
    deep_car_mod.bind(for_training=False, data_shapes=[('data', (batch_size,3,224,224))], label_shapes=deep_car_mod._label_shapes)
    deep_car_mod.set_params(arg_params, aux_params, allow_missing=False)

    return deep_car_mod

In [None]:
# Do the prediction and return the top-5 classes
def transform_fn(net, data, input_content_type, output_content_type):
    """
    Transform a request using the model. Called once per request.

    :param net: The model.
    :param data: The request payload.
    :param input_content_type: The request content type.
    :param output_content_type: The (desired) response content type.
    :return: response payload and content type.
    """
    resp = []
    try:
        # we can use content types to vary input/output handling, but
        # here we just assume json for both
        Batch = namedtuple('Batch', ['data'])

        parsed = json.loads(data)
        img = mx.nd.array(parsed)

        net.forward(Batch([mx.nd.array(img)]))
        prob = net.get_outputs()[0][0].asnumpy()

        # print the top-5
        prob = np.squeeze(prob)
        a = np.argsort(prob)[::-1]
        resp = []
        for i in a[0:5]:
            resp.append({"%d"%i: float(prob[i])})

    except Exception as e:
        logging.error(e)

    return json.dumps(resp), output_content_type

## Helper functions

In [None]:
# Encoder for converting numpy to json
class NumPyArangeEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist() # or map(int, obj)
        return json.JSONEncoder.default(self, obj)

In [None]:
def predict(val_iter, endpoint_name=None):
    batch = val_iter.next()
    img = batch.data[0].asnumpy()
    img_ = img[0].transpose((1,2,0))
    label_id = int(batch.label[0].asnumpy()[0])

    encode_param=[int(cv2.IMWRITE_JPEG_QUALITY),90]
    _,im=cv2.imencode('.jpg', img_, encode_param)
    im = cv2.imdecode(im,1)
    plt.imshow(im)
    print("Ground truth [%d] - %s\n" %( label_id, object_classes[label_id] ) )
    
    # Convert the image to a Json array
    data = json.dumps(img, cls=NumPyArangeEncoder)
    #print(data)
    if endpoint_name is None:
        # Call our model for predicting
        input_content_type = 'application/json'
        output_content_type = 'application/json'
        response = transform_fn(net, data, input_content_type, output_content_type)
        
    else:
        sm = boto3.client('sagemaker-runtime')
        response = sm.invoke_endpoint(
            EndpointName=endpoint_name,
            Body=data
        )
        response = [response['Body'].read().decode('utf-8')]

    for i in json.loads(response[0]):
        label_id = int(list(i)[0])
        print("Predicted [%d] - %s [%s]" % (label_id, object_classes[label_id], i[str(label_id)] ) )

## Testing our code locally

## Downloading the dataset

In [None]:
mx.test_utils.download(dataset_dir + '/cars_all_test.lst', fname="cars_test.lst", dirname=base_dir + '/data')
mx.test_utils.download(dataset_dir + '/cars_all_train.lst', fname="cars_train.lst", dirname=base_dir + '/data')
mx.test_utils.download(dataset_dir + '/cars_all_test.rec', fname="cars_test.rec", dirname=base_dir + '/data')
mx.test_utils.download(dataset_dir + '/cars_all_train.rec', fname="cars_train.rec", dirname=base_dir + '/data')
!ls -lat $base_dir/data

In [None]:
mx.test_utils.download(dataset_dir + '/dataset_classes.json', dirname=base_dir + '/data')

In [None]:
classes = json.loads(open(base_dir + '/data/dataset_classes.json', 'r').read())
class_map = {}
for l in classes['samples']:
    class_map[l['id']] = l['name']

object_classes = []
for i,k in enumerate(sorted(["%s" % (i+1) for i in range(196)])):
    object_classes.append(class_map[k] )

## Testing our code locally

In [None]:
# Some dummy variables for mocking sagemaker calls
current_host='algo-1'
hosts=['algo-1']
list_cpus=!cat /proc/cpuinfo |grep processor|wc -l
num_cpus=int(list_cpus[0])
list_gpus=!nvidia-smi -L|wc -l
num_gpus=int(list_gpus[0])
print("GPUs: %d, CPUs: %d" % (num_gpus, num_cpus))
channel_input_dirs={
    'training': base_dir + '/data'
}
hyperparameters = {
    "learning_rate": 0.07,
    "batch_size": 32,
    "epochs": 1,
    "log_interval": 10
}

In [None]:
%%time
# Create our final model (it takes around)
!mkdir -p $model_dir
!rm -f $model_dir/*
# -> open the terminal and execute: watch nvidia-smi
net = train(current_host, hosts, num_cpus, num_gpus, channel_input_dirs, model_dir, hyperparameters)
logger.setLevel(logging.INFO)

In [None]:
# Save the model
save(net, model_dir)
!ls -lat $model_dir

In [None]:
# Retrive the model
net = model_fn(model_dir)

In [None]:
# loads a test dataset for testing
val_iter = mx.image.ImageIter(
    batch_size=1, data_shape=(3, 224, 224), 
    path_imglist=base_dir + '/data/cars_test.lst',
    path_imgrec=base_dir + '/data/cars_test.rec')

In [None]:
predict(val_iter)

### Now, let's download an 88% accuracy model and overwrite our previous model

In [None]:
!curl $pre_trained_model | tar -xz -C $model_dir

In [None]:
# Retrive the model again
net = model_fn(model_dir)

In [None]:
predict(val_iter)

## Saving the script

In [None]:
# Saving all the functions into a python script.
# It will be sent to a Sagemaker process
code = open(base_dir + '/cars.py', 'w')
code.write("""
import json
import logging
import os
import time
import numpy as np
import mxnet as mx

from collections import namedtuple

base_dir="."

""")
for obj in [train, save, get_test_data, get_train_data, test, model_fn, transform_fn]:
    code.write(inspect.getsource(obj) + '\n')
code.flush()
code.close()

## Training your model on Sagemaker (you can skip this session)

In [None]:
# Get the current Sagemaker session
sagemaker_session = sagemaker.Session()

role = sagemaker.get_execution_role()

In [None]:
inputs = sagemaker_session.upload_data(path=base_dir + '/data/', key_prefix='data/cars')
print('input spec (in this case, just an S3 path): {}'.format(inputs))

In [None]:
m = MXNet(base_dir + '/cars.py', 
          role=role, 
          train_instance_count=1, 
          train_instance_type="ml.p3.2xlarge",
          hyperparameters={'batch_size': 32, 
                           'epochs': 2,
                           'learning_rate': 0.00007})

In [None]:
m.fit(inputs)

In [None]:
predictor = m.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge')

## Deploying the 88% model to a Sagemaker Endpoint

In [None]:
# Get the current Sagemaker session
sagemaker_session = sagemaker.Session()

role = sagemaker.get_execution_role()

In [None]:
!curl $pre_trained_model | tar -xz -C $model_dir

In [None]:
model_tarball = base_dir + '/model.tar.gz'
entry_point = base_dir + '/cars.py'
py_version='py3'
endpoint_name='car-classification'
model_name=endpoint_name

In [None]:
# Create a tarball with the trained model
tarball = tarfile.open(model_tarball, "w:gz" )
for f in os.listdir(model_dir):
    tarball.add(os.path.join(model_dir, f), arcname=f)
tarball.close()

!tar -tzvf $model_tarball

In [None]:
# Upload the tarball to an S3 Bucket
model_data = sagemaker_session.upload_data(path=model_tarball, key_prefix='data/cars')
print(model_data)

In [None]:
# Create an Mxnet Estimator
m = sagemaker.mxnet.model.MXNetModel(model_data=model_data, role=role, entry_point=entry_point, py_version=py_version)

In [None]:
%%time

# Publishes the model. It takes around 8mins
m.deploy(initial_instance_count=1, instance_type='ml.t2.medium', endpoint_name=endpoint_name)

## Calling the endpoint

In [None]:
val_iter = mx.image.ImageIter(
    batch_size=1, data_shape=(3, 224, 224), 
    path_imglist=base_dir + '/data/cars_test.lst',
    path_imgrec=base_dir + '/data/cars_test.rec')

In [None]:
predict(val_iter, endpoint_name)

## Cleaning

In [None]:
!rm -rf $base_dir

In [None]:
%%time
sagemaker_session.delete_endpoint(endpoint_name)