### Training a Keras CNN on Fashion-MNIST

Fashion-MNIST is a Zalando dataset consisting of a training set of 60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes. It's a drop-in replacement for MNIST.

https://github.com/zalandoresearch/fashion-mnist/

In this notebook, we'll train a simple CNN built with Keras, using the built-in Tensorflow and Apache MXNet containers provided by Amazon SageMaker.

In [None]:
from IPython.display import Image
Image("fashion-mnist-sprite.png")

In [None]:
import sagemaker

sess = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket_name = sagemaker.Session().default_bucket()

prefix = 'keras-fashon-mnist'

## Download the Fashion-MNIST dataset

In [None]:
import os
import keras
import numpy as np
from keras.datasets import fashion_mnist
(x_train, y_train), (x_val, y_val) = fashion_mnist.load_data()

os.makedirs("./data", exist_ok = True)

np.savez('./data/training', image=x_train, label=y_train)
np.savez('./data/validation', image=x_val, label=y_val)

## Upload Fashion-MNIST data to S3

In [None]:
training_input_path   = sess.upload_data('data/training.npz', bucket=bucket_name, key_prefix=prefix+'/training')
validation_input_path = sess.upload_data('data/validation.npz', bucket=bucket_name, key_prefix=prefix+'/validation')

print(training_input_path)
print(validation_input_path)

In [None]:
!pygmentize mnist_keras_tf.py

## Train with Tensorflow on a CPU instance

In [None]:
from sagemaker.tensorflow import TensorFlow
tf_estimator = TensorFlow(entry_point='mnist_keras_tf.py', 
                          role=role,
                          instance_count=1, 
                          instance_type='ml.m5.2xlarge',
                          framework_version='1.12', 
                          py_version='py3',
                          script_mode=True,
                          use_spot_instances=True,
                          max_run=3600,
                          max_wait=3600,
                          hyperparameters={
                              'epochs': 10,
                              'batch-size': 512,
                              'learning-rate': 0.01}
                         )

In [None]:
%%time

tf_estimator.fit({'training': training_input_path, 'validation': validation_input_path})

## Deploy

In [None]:
%%time

import time 
tf_endpoint_name = 'keras-tf-fmnist-'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

tf_predictor = tf_estimator.deploy(initial_instance_count=1,
                                   instance_type='ml.m5.xlarge')      


## Predict 

In [None]:
class_list = ['T-shirt/top','Trouser','Pullover','Dress','Coat','Sandal','Shirt','Sneaker','Bag','Ankle boot']

In [None]:
%matplotlib inline
import random
import matplotlib.pyplot as plt

num_samples = 5
indices = random.sample(range(x_val.shape[0] - 1), num_samples)
images = x_val[indices]/255
labels = y_val[indices]

for i in range(num_samples):
    plt.subplot(1,num_samples,i+1)
    plt.imshow(images[i].reshape(28, 28), cmap='gray')
    plt.title(class_list[labels[i]])
    plt.axis('off')
    
prediction = tf_predictor.predict(images.reshape(num_samples, 28, 28, 1))['predictions']
prediction = np.array(prediction)
predicted_label = prediction.argmax(axis=1)
print('Predicted labels are: \n {}'.format([class_list[n] for n in predicted_label]))

In [None]:
x_val.shape

## Clean up

In [None]:
tf_predictor.delete_endpoint()

## Configure Automatic Model Tuning

In [None]:
tf_estimator = TensorFlow(entry_point='mnist_keras_tf.py', 
                          role=role,
                          instance_count=1, 
                          instance_type='ml.p3.2xlarge',
                          framework_version='1.12', 
                          py_version='py3',
                          script_mode=True,
                          use_spot_instances=True,
                          max_run=3600,
                          max_wait=3600,
                         )

In [None]:
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

hyperparameter_ranges = {
    'epochs':        IntegerParameter(20, 100),
    'learning-rate': ContinuousParameter(0.001, 0.1, scaling_type='Logarithmic'), 
    'batch-size':    IntegerParameter(32, 1024),
    'dense-layer':   IntegerParameter(128, 1024),
    'dropout':       ContinuousParameter(0.2, 0.6)
}

objective_metric_name = 'val_acc'
objective_type = 'Maximize'
metric_definitions = [{'Name': 'val_acc', 'Regex': 'val_acc: ([0-9\\.]+)'}]

tuner = HyperparameterTuner(tf_estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=10,
                            max_parallel_jobs=2,
                            objective_type=objective_type)

In [None]:
%%time
tuner.fit({'training': training_input_path, 'validation': validation_input_path})

## Deploy the best model

In [None]:
import time

tf_endpoint_name = 'keras-tf-fmnist-'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

tf_predictor = tuner.deploy(initial_instance_count=1,
                         instance_type='ml.c5.large',       
                         accelerator_type='ml.eia1.medium',
                         endpoint_name=tf_endpoint_name)

## Clean up

In [None]:
tf_predictor.delete_endpoint()