In [2]:
# Copyright 2019 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [1]:
from sagemaker.tensorflow import TensorFlow
import numpy as np
import sagemaker

# Remote Execution

In [7]:
role = sagemaker.get_execution_role()

hyperparameters={'steps':12000, # 600 steps per epoch, 20 epochs
                  'batch_size':100,
                  'learning_rate':0.001,
                  'verbosity':'INFO'} 

#The below example uses a CPU only instance ml.m4.xlarge. If you wish to use a GPU Instance such as 'ml.p2.xlarge' you may need to request a quota increase
# via http://aws.amazon.com/contact-us/ec2-request
tf_estimator = TensorFlow(py_version='py3', 
                          framework_version='1.12', 
                          entry_point='task.py',
                          role=role,
                          train_instance_count=1,
                          train_instance_type='ml.m4.xlarge',
                          hyperparameters=hyperparameters,
                          source_dir='training_job/')

In [8]:
tf_estimator.fit({'train':'s3://sagemaker-us-east-2-708267171719/sagemaker/ml-model-migration/data/mnist/train',
                  'eval':'s3://sagemaker-us-east-2-708267171719/sagemaker/ml-model-migration/data/mnist/test'})

ResourceLimitExceeded: An error occurred (ResourceLimitExceeded) when calling the CreateTrainingJob operation: The account-level service limit 'ml.p2.xlarge for training job usage' is 0 Instances, with current utilization of 0 Instances and a request delta of 1 Instances. Please contact AWS support to request an increase for this limit.

# Remote Endpoint Deployment

In [5]:
mnist_predictor = tf_estimator.deploy(initial_instance_count=1,
                                      instance_type='ml.t2.medium')

--------------------------------------------------------------------------------------!

In [6]:
from keras.datasets import mnist
def load_mnist_data():   
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train = np.reshape(x_train, [-1, 28,28,1]).astype(np.float32)
    x_test = np.reshape(x_test, [-1, 28,28,1]).astype(np.float32)
    x_train /= 255
    x_test /= 255
    train_data = {'images':x_train, 'labels':y_train}
    test_data = {'images':x_test, 'labels':y_test}
    return train_data, test_data

train_data, test_data = load_mnist_data()

for ex in range(1,10):
    # load an example from the test set
    example = test_data['images'][ex].reshape(1,28,28,1)
    #predictions is a dict{'predictions'[[]]}
    predictions = mnist_predictor.predict(example)
    #predictions['predictions'][0] contains the softmax activations of the network
    predicted_label = np.argmax(predictions['predictions'][0])
    label = test_data['labels'][ex]
    print("Example {}: Predicted label: {}  Actual label:{}".format(ex, predicted_label, label))

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
Example 1: Predicted label: 2  Actual label:2
Example 2: Predicted label: 1  Actual label:1
Example 3: Predicted label: 0  Actual label:0
Example 4: Predicted label: 4  Actual label:4
Example 5: Predicted label: 1  Actual label:1
Example 6: Predicted label: 4  Actual label:4
Example 7: Predicted label: 9  Actual label:9
Example 8: Predicted label: 5  Actual label:5
Example 9: Predicted label: 9  Actual label:9
