In [1]:
# upload model to SageMaker - if you don't have a model, use model below:
! wget https://raw.githubusercontent.com/liampearson/Youtube/master/Keras%20to%20aws%20SageMaker/models/model.h5

--2023-01-21 23:31:18--  https://raw.githubusercontent.com/liampearson/Youtube/master/Keras%20to%20aws%20SageMaker/models/model.h5
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 30416 (30K) [application/octet-stream]
Saving to: ‘model.h5’


2023-01-21 23:31:18 (103 MB/s) - ‘model.h5’ saved [30416/30416]



In [3]:
# input model names, 2 options:
# 1. model is only saved as .h5
MODEL_LOCATION = 'model.h5'
# 2. model is saved as .json (actual model) and .h5 (model weights)
JSON_LOCATION = ''
WEIGHTS_LOCATION = ''

# load model (adapted from https://machinelearningmastery.com/save-load-keras-deep-learning-models/)
if MODEL_LOCATION != '':
    from keras.models import load_model
    model = load_model(MODEL_LOCATION)
    print("loaded model from MODEL_LOCATION")
elif JSON_LOCATION!='':
    from keras.models import model_from_json
    json_file = open(JSON_LOCATION, 'r')
    loaded_model_json = json_file.read()
    json_file.close()    
    model = model_from_json(loaded_model_json)
    model.load_weights(WEIGHTS_LOCATION)
    print("loaded model from JSON_LOCATION and WEIGHTS_LOCATION")

2023-01-21 23:37:08.002186: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-21 23:37:28.329927: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-01-21 23:38:47.283285: E tensorflow/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-01-21 23:38:47.283387: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ip-172-16-56-255.ap-southeast-2.compute.internal): /proc/driver/nvidia/version does not exist
2023-01-21 23:38:47.285336: I tensorflow/core/p

loaded model from MODEL_LOCATION


In [5]:
# convert pre-trained Keras model to AWS format by:
# - converting model to a TensorFlow protobuf file
# - saving it in a certain file structure that AWS expects
# - tarballing and zipping file structure
# Expect INFO statements like No assets to write/save - normal behaviour (so long as you see SavedModel written to...)
from tensorflow.python.saved_model import builder
from tensorflow.python.saved_model.signature_def_utils import predict_signature_def
from tensorflow.python.saved_model import tag_constants
from keras import backend as K
import tarfile

# To fix this error RuntimeError: `build_tensor_info` is not supported in eager execution (https://stackoverflow.com/questions/60924336/run-into-the-following-issue-build-tensor-flow-is-not-supported-in-eager-mode)
import tensorflow as tf
if tf.executing_eagerly():
    tf.compat.v1.disable_eager_execution()

# This is the file structure which AWS expects. Cannot be changed. 
model_version = '1'
export_dir = 'export/Servo/' + model_version

# Build the Protocol Buffer SavedModel at 'export_dir'
builder = builder.SavedModelBuilder(export_dir)

# Create prediction signature to be used by TensorFlow Serving Predict API
signature = predict_signature_def(inputs={"inputs": model.input}, outputs={"score": model.output})

# Save the meta graph and variables
with K.get_session() as sess:
    builder.add_meta_graph_and_variables(
        sess=sess, tags=[tag_constants.SERVING], signature_def_map={"serving_default": signature})
    builder.save()

# Create a tarball/tar file and zip it
with tarfile.open('model.tar.gz', mode='w:gz') as archive:
    archive.add('export', recursive=True)

INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: export/Servo/1/saved_model.pb


In [6]:
# move tarball (tar.gz) file into S3
import sagemaker
sagemaker_session = sagemaker.Session()
inputs = sagemaker_session.upload_data(path='model.tar.gz', key_prefix='model')
print(f"Bucket name is: {sagemaker_session.default_bucket()}")

Bucket name is: sagemaker-ap-southeast-2-914211408554


In [7]:
# create SageMaker model

# first, create an empty train.py file (TensorFlowModel expects this at its 'entry point', but can be empty since we're using pre-trained model)
!touch train.py

# get default IAM role you created when creating this notebook
from sagemaker import get_execution_role
role = get_execution_role()

# create SageMaker model
from sagemaker.tensorflow.model import TensorFlowModel
sagemaker_model = TensorFlowModel(model_data = 's3://' + sagemaker_session.default_bucket() + '/model/model.tar.gz',
                                  role = role,
                                  framework_version = '1.12',
                                  entry_point = 'train.py')

In [8]:
# deploy model and create endpoint (ignore the message `update_endpoint is a no-op in sagemaker>=2`)
predictor = sagemaker_model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')
print(predictor.endpoint)

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


----!

The endpoint attribute has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


sagemaker-tensorflow-serving-2023-01-22-00-05-57-775


In [10]:
# confirm it's working correctly by making a prediction (first locally, then after deploying - only deployed model tested below)

# create a predictor which uses this new endpoint
predictor = sagemaker.tensorflow.model.TensorFlowPredictor(predictor.endpoint, sagemaker_session)

# send data to endpoint (expected output: {'predictions': [[0.999309, 0.000690674, 1.57288e-16]]})
# input format depends on what inputs your model is expecting - used the iris dataset and so can feed it 4 inputs of which it will give 3 probabilities - 1 for each iris type
import numpy as np
predictor.predict(np.asarray([[5.0, 3.5, 1.3, 0.3]]))

The endpoint attribute has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from primary with message "{ "error": "Session was not created with a graph before Run()!" }". See https://ap-southeast-2.console.aws.amazon.com/cloudwatch/home?region=ap-southeast-2#logEventViewer:group=/aws/sagemaker/Endpoints/sagemaker-tensorflow-serving-2023-01-22-00-05-57-775 in account 914211408554 for more information.

In [None]:
# cleanup (https://docs.aws.amazon.com/sagemaker/latest/dg/ex1-cleanup.html)
# stop notebook, delete endpoints/models/buckets/CloudWatch groups