# Amazon SageMaker inference deployment to Inf1 with AWS Inferentia 
This example demonstrates Amazon SageMaker inference deployment using SageMaker SDK

This example was tested on Amazon SageMaker Studio Notebook
Run this notebook using the following Amazon SageMaker Studio conda environment:
`TensorFlow CPU Optimized`

In [None]:
# !pip install --upgrade pip -q
# !pip install --upgrade sagemaker -q

In [None]:
import tarfile
import sagemaker
import tensorflow as tf
import tensorflow.keras as keras
import shutil
import os
import time
import tensorflow.compat.v1.keras as keras
from tensorflow.keras.applications.resnet50 import ResNet50

role = sagemaker.get_execution_role()
sess = sagemaker.Session()
region = sess.boto_region_name
bucket = sess.default_bucket()
print('sagemaker version: '+sagemaker.__version__)
print('tensorflow version: '+tf.__version__)

In [None]:
# Export SavedModel
saved_model_dir = 'resnet50_saved_model'
shutil.rmtree(saved_model_dir, ignore_errors=True)

keras.backend.set_learning_phase(0)
model = ResNet50(weights='imagenet')
tf.saved_model.simple_save(session = keras.backend.get_session(),
                           export_dir = saved_model_dir,
                           inputs = {'input_1:0': model.inputs[0]},
                           outputs = {'probs/Softmax:0': model.outputs[0]})

In [None]:
shutil.rmtree('model.tar.gz', ignore_errors=True)
!tar cvfz model.tar.gz -C resnet50_saved_model .

In [None]:
from sagemaker.tensorflow.model import TensorFlowModel, TensorFlowPredictor

prefix = 'keras_inf1_models'
s3_model_path = sess.upload_data(path='model.tar.gz', key_prefix=prefix)

model = TensorFlowModel(model_data=s3_model_path, 
                        framework_version='1.15',
                        role=role,
                        predictor_cls = TensorFlowPredictor,
                        sagemaker_session=sess)

### Deploy to Inf1 instance with AWS Inferentia

In [None]:
inf1_model = model.compile(target_instance_family='ml_inf1', 
                           input_shape={'input_1':[1, 224, 224, 3]},
                           output_path = f's3://{bucket}/{prefix}/compiled_model/',
                           role = role,
                           framework='tensorflow', 
                           framework_version='1.15.0',
                           job_name = f'inf1-{time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())}')

In [None]:
predictor_inf1 = inf1_model.deploy(initial_instance_count = 1,
                            instance_type = 'ml.inf1.xlarge')

### Test endpoint

In [None]:
## If you have an existing endpoint, create a predictor using the endpoint name

# from sagemaker.tensorflow.model import TensorFlowPredictor
# predictor = TensorFlowPredictor('ENDPOINT_NAME',
#                                sagemaker_session=sess)

In [None]:
def image_preprocess(img, reps=1):
    img = np.asarray(img.resize((224, 224)))
    img = np.stack([img]*reps)
    img = tf.keras.applications.resnet50.preprocess_input(img)
    return img

In [None]:
from PIL import Image 
import numpy as np
import json

img= Image.open('kitten.jpg')
img = image_preprocess(img, 5)

### Invoke Inf1 Instance with AWS Inferentia endpoint

In [None]:
response = predictor_inf1.predict(data=img)
probs = np.array(response['predictions'][0])
tf.keras.applications.resnet.decode_predictions(np.expand_dims(probs, axis=0), top=5)