## Setup

In [1]:
import numpy as np
import pandas as pd
import h5py
import os
import sagemaker
import boto3
import botocore

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from keras.datasets import fashion_mnist
from sagemaker.tensorflow import TensorFlow
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

from cnn import FashionMNISTCNN as fmc

Using TensorFlow backend.


## Train locally

In [2]:
# Get data
X_train, Y_train, X_val, Y_val = fmc.load_data()

# run script in shell for one epoch
!python train_script_local.py --epochs 1

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.





2019-09-11 19:35:16.085581: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Model: "fashionmnistcnn_1"
_________________________________

## Upload data to s3 bucket

In [2]:
sess = sagemaker.Session()
role_name = 'arn:aws:iam::406755861890:role/service-role/AmazonSageMaker-ExecutionRole-20190827T145350'
bucket_name = 'sagemaker-fashion-mnist'

In [6]:
training_input_path   = sess.upload_data('data/train.hdf5', bucket=bucket_name, key_prefix='data')
validation_input_path = sess.upload_data('data/val.hdf5', bucket=bucket_name, key_prefix='data')

In [7]:
print(training_input_path)
print(validation_input_path)

s3://sagemaker-fashion-mnist/data/train.hdf5
s3://sagemaker-fashion-mnist/data/val.hdf5


## Train in the cloud with SageMaker

In [5]:
# store model artifacts
sm_output_dir = os.path.join(os.getcwd(), 'sagemaker_output')
os.makedirs(sm_output_dir, exist_ok=True)

In [3]:
# upload folders to s3 
sm_output_path = sess.upload_data('sagemaker_output/', bucket=bucket_name, key_prefix='sm-output')

print(sm_output_path)

s3://sagemaker-fashion-mnist/sm-output


In [21]:
# objective and metric
metric_definitions = [ {'Name': 'acc',
                       'Regex': 'acc: ([0-9\\.]+)'},
                       {'Name': 'val_acc',
                       'Regex': 'val_acc: ([0-9\\.]+)'}]


hyperparameters = {'epochs': 100, 'batch-size': 128}

# sagemaker estimator
tf_estimator = TensorFlow(entry_point='train_script_sagemaker.py', 
                          role=role_name,
                          train_volume_size=1,
                          train_instance_count=1, 
                          train_instance_type='ml.m5.xlarge',
                          train_use_spot_instances=True,
                          train_max_wait=86400,
                          model_dir=sm_output_path,
                          framework_version='1.13', 
                          py_version='py3',
                          script_mode=True,
                          hyperparameters=hyperparameters,
                          metric_definitions=metric_definitions
                         )

In [22]:
# train estimator
input_paths = {'training': training_input_path, 'validation': validation_input_path}
tf_estimator.fit(input_paths,
                 wait=False)

## Plot training job metrics

https://github.com/awslabs/amazon-sagemaker-examples/blob/master/introduction_to_amazon_algorithms/xgboost_abalone/xgboost_abalone.ipynb

In [18]:
training_job_name = 
print(training_job_name)

tensorflow-training-2019-09-13-18-16-20-207


In [19]:
%matplotlib inline
from sagemaker.analytics import TrainingJobAnalytics

metric_name = 'val_acc'
training_job_name = tf_estimator.latest_training_job.name
metrics_dataframe = TrainingJobAnalytics(training_job_name=training_job_name,
                                         metric_names=[metric_name]).dataframe()



## Download training job output from s3

## Inspect training job results