# Deploying a Keras or TensorFlow model using Amazon SageMaker

### Step 1. Set up

In [1]:
import boto3, re
from sagemaker import get_execution_role

role = get_execution_role()

### Step 2. Load the Keras model using the json and weights file

Use the upload feature in jupyter to upload your model.json and model weights

If you saved your model in the TensorFlow ProtoBuf format, skip to "Step 4. Convert the TensorFlow model to an Amazon SageMaker-readable format.

In [3]:
import tensorflow.keras
from tensorflow.keras.models import model_from_json

In [4]:
json_file = open('model.json', 'r')
model_json = json_file.read()
json_file.close()
model = model_from_json(model_json)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [5]:
model.load_weights('model.h5')
print("Loaded model from disk")

Loaded model from disk


### Step 3. Export the Keras model to the TensorFlow ProtoBuf format

In [6]:
from tensorflow.python.saved_model import builder
from tensorflow.python.saved_model.signature_def_utils import predict_signature_def
from tensorflow.python.saved_model import tag_constants

In [7]:
# Note: This directory structure will need to be followed
model_version = '1'
export_dir = 'export/Servo/' + model_version

In [8]:
# Build the Protocol Buffer SavedModel at 'export_dir'
builder = builder.SavedModelBuilder(export_dir)

In [9]:
# Create prediction signature to be used by TensorFlow Serving Predict API
signature = predict_signature_def(
    inputs={"inputs": model.input}, outputs={"score": model.output})

Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.


In [10]:
from tensorflow.keras import backend as K

with K.get_session() as sess:
    # Save the meta graph and variables
    builder.add_meta_graph_and_variables(
        sess=sess, tags=[tag_constants.SERVING], signature_def_map={"serving_default": signature})
    builder.save()

INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: export/Servo/1/saved_model.pb


### Step 4. Convert TensorFlow model to a SageMaker readable format

Move the TensorFlow exported model into a directory export\Servo\. SageMaker will recognize this as a loadable TensorFlow model. Your directory and file structure should look like:

```
-export/
    |--Servo/
        |--{model version as an integer eg. 1}/
            |--variables/
            |--saved_model.pb
```

####  Tar the entire directory and upload to S3

In [11]:
import tarfile
with tarfile.open('model.tar.gz', mode='w:gz') as archive:
    archive.add('export', recursive=True)

In [12]:
import sagemaker

session = sagemaker.Session()
bucket = session.default_bucket()
prefix = 'model'

inputs = session.upload_data(path='model.tar.gz', key_prefix=prefix)

### Step 5. Deploy the trained model

The entry_point file "train.py" can be an empty Python file (this is a bug).

In [13]:
!touch train.py

In [14]:
from sagemaker.tensorflow.model import TensorFlowModel
sagemaker_model = TensorFlowModel(model_data = 's3://' + bucket + '/model/model.tar.gz',
                                  role = role,
                                  framework_version = '1.12',
                                  entry_point = 'train.py')

The Python 2 tensorflow images will be soon deprecated and may not be supported for newer upcoming versions of the tensorflow images.
Please set the argument "py_version='py3'" to use the Python 3 tensorflow image.


In [15]:
%%time
predictor = sagemaker_model.deploy(initial_instance_count=1,
                                   instance_type='ml.m4.xlarge')

---------------------------------------------------------------------------------------------------!CPU times: user 602 ms, sys: 45.1 ms, total: 647 ms
Wall time: 8min 20s


In [16]:
from sagemaker.tensorflow.model import TensorFlowPredictor

predictor = TensorFlowPredictor(predictor.endpoint, session)

### Step 6. Invoke the endpoint

#### Invoke the SageMaker endpoint from the notebook

In [19]:
import re
import json

REPLACE_NO_SPACE = re.compile("(\.)|(\;)|(\:)|(\!)|(\')|(\?)|(\,)|(\")|(\()|(\))|(\[)|(\])")
REPLACE_WITH_SPACE = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)")

with open('vocab_dict.json', 'r') as f:
    word_to_id = json.load(f)

def review_to_words(review):
    words = REPLACE_NO_SPACE.sub("", review.lower())
    words = REPLACE_WITH_SPACE.sub(" ", words)
    return words

def preprocess_input(text, vocab_dict, maxlen=100):
    review = review_to_words(text)
    tokens = review.split()
    int_tokens = [vocab_dict[token] for token in tokens]
    
    if len(int_tokens) >= maxlen:
        return int_tokens[:maxlen]
    else:
        diff = maxlen - len(int_tokens)
        zeros = [0 for i in range(diff)]
        return [zeros + int_tokens]

In [20]:
text = "This movie was the Best Movie I have ever seen!"

In [26]:
data = preprocess_input(text, word_to_id)

In [22]:
predictor.predict(data)

{'outputs': {'score': {'dtype': 1,
   'tensor_shape': {'dim': [{'size': 1}, {'size': 1}]},
   'float_val': [0.8134368062019348]}},
 'model_spec': {'name': 'generic_model',
  'version': {'value': 1},
  'signature_name': 'serving_default'}}

#### Invoke the SageMaker endpoint using a boto3 client

In [28]:
predictor.endpoint

'sagemaker-tensorflow-2020-01-09-10-39-46-231'

In [27]:
import json
import boto3
 
client = boto3.client('runtime.sagemaker')

response = client.invoke_endpoint(EndpointName=predictor.endpoint, Body=json.dumps(data))

result = json.loads(response['Body'].read().decode("utf-8"))
pred = result['outputs']['score']['floatVal'][0]
print(pred)

0.8134368062019348


### Step 7: Setting up AWS Lambda and API Gateway

The following code will allow lambda to invoke the endpoint

```python
import json
import re
import boto3

# define some preprocessing steps
REPLACE_NO_SPACE = re.compile("(\.)|(\;)|(\:)|(\!)|(\')|(\?)|(\,)|(\")|(\()|(\))|(\[)|(\])")
REPLACE_WITH_SPACE = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)")

# We now load our word_to_id dictionary from json
with open('vocab_dict.json', 'r') as fp:
    VOCAB_DICT = json.load(fp)

# Before sending the data to the endpoint, we need to preprocess it and so
# these functions help us do that
def review_to_words(review):
    words = REPLACE_NO_SPACE.sub("", review.lower())
    words = REPLACE_WITH_SPACE.sub(" ", words)
    return words

def preprocess_input(text, vocab_dict, maxlen=100):
    review = review_to_words(text)
    tokens = review.split()
    int_tokens = [vocab_dict[token] for token in tokens]
    
    if len(int_tokens) >= maxlen:
        return int_tokens[:maxlen]
    else:
        diff = maxlen - len(int_tokens)
        zeros = [0 for i in range(diff)]
        return [zeros + int_tokens]

def lambda_handler(event, context):
    
    # Preprocess the data
    data = preprocess_input(event['body'], VOCAB_DICT)

    # The SageMaker runtime is what allows us to invoke the endpoint that we've created.
    runtime = boto3.Session().client('sagemaker-runtime')

    # Now we use the SageMaker runtime to invoke our endpoint, sending the review we were given
    response = runtime.invoke_endpoint(EndpointName = 'sagemaker-tensorflow-2020-01-09-07-58-25-420',
                                       Body = json.dumps(data)) 

    # The response is an HTTP response whose body contains the result of our inference
    result = json.loads(response['Body'].read().decode("utf-8"))
    pred = result['outputs']['score']['floatVal'][0]

    return {
        'statusCode' : 200,
        'headers' : { 'Content-Type' : 'application/json', 'Access-Control-Allow-Origin' : '*', "Access-Control-Allow-Credentials" : True },
        'body' : pred
    }
    
```

Once you setup the lambda function, we need to connect Lambda to API Gateway

### Step 8. Clean up

To avoid incurring unnecessary charges, use the AWS Management Console to delete the resources that you created like the endpoint, S3 bucket, models, endpoint_configurations etc.