# Amazon SageMaker inference deployment to CPUs, GPUs, and EI
This example demonstrates Amazon SageMaker inference deployment using SageMaker SDK

This example was tested on Amazon SageMaker Studio Notebook
Run this notebook using the following Amazon SageMaker Studio conda environment:
`TensorFlow 2 CPU Optimized`

In [1]:
# !pip install --upgrade pip -q
# !pip install --upgrade sagemaker -q

In [2]:
import tarfile
import sagemaker
import tensorflow as tf
import tensorflow.keras as keras
import shutil
import os
import time
from tensorflow.keras.applications.resnet50 import ResNet50

role = sagemaker.get_execution_role()
sess = sagemaker.Session()
region = sess.boto_region_name
bucket = sess.default_bucket()
print('sagemaker version: '+sagemaker.__version__)
print('tensorflow version: '+tf.__version__)

sagemaker version: 2.15.1
tensorflow version: 2.1.0


In [3]:
def load_save_resnet50_model(model_path):
    model = ResNet50(weights='imagenet')
    shutil.rmtree(model_path, ignore_errors=True)
    model.save(model_path, include_optimizer=False, save_format='tf')

saved_model_dir = 'resnet50_saved_model' 
model_ver = '1'
model_path = os.path.join(saved_model_dir, model_ver)

# load_save_resnet50_model(model_path)

In [4]:
shutil.rmtree('model.tar.gz', ignore_errors=True)
!tar cvfz model.tar.gz -C resnet50_saved_model .

./
./1/
./1/variables/
./1/variables/variables.data-00000-of-00001
./1/variables/variables.index
./1/saved_model.pb
./1/assets/


In [5]:
from sagemaker.tensorflow.model import TensorFlowModel, TensorFlowPredictor

prefix = 'keras_models'
s3_model_path = sess.upload_data(path='model.tar.gz', key_prefix=prefix)

model = TensorFlowModel(model_data=s3_model_path, 
                        framework_version='1.15',
                        role=role,
                        predictor_cls = TensorFlowPredictor,
                        sagemaker_session=sess)

### Deploy to CPU instance

In [6]:
predictor_cpu = model.deploy(initial_instance_count=1, 
                             instance_type='ml.c5.xlarge')

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


-------------!

### Deploy using EI

In [7]:
predictor_ei = model.deploy(initial_instance_count=1, 
                            instance_type='ml.c5.xlarge',
                            accelerator_type='ml.eia2.large')

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


-------------!

### Deploy to GPU instance

In [8]:
predictor_gpu = model.deploy(initial_instance_count=1, 
                         instance_type='ml.g4dn.xlarge')

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


-------------!

### Test endpoint

In [9]:
## If you have an existing endpoint, create a predictor using the endpoint name

# from sagemaker.tensorflow.model import TensorFlowPredictor
# predictor = TensorFlowPredictor('ENDPOINT_NAME',
#                                sagemaker_session=sess)

In [10]:
def image_preprocess(img, reps=1):
    img = np.asarray(img.resize((224, 224)))
    img = np.stack([img]*reps)
    img = tf.keras.applications.resnet50.preprocess_input(img)
    return img

In [11]:
from PIL import Image 
import numpy as np
import json

img= Image.open('kitten.jpg')
img = image_preprocess(img, 5)

### Invoke CPU endpoint

In [12]:
response = predictor_cpu.predict(data=img)
probs = np.array(response['predictions'][0])
tf.keras.applications.resnet.decode_predictions(np.expand_dims(probs, axis=0), top=5)

[[('n02123159', 'tiger_cat', 0.495739877),
  ('n02123045', 'tabby', 0.434538245),
  ('n02124075', 'Egyptian_cat', 0.0492461845),
  ('n02127052', 'lynx', 0.0143557377),
  ('n02128385', 'leopard', 0.00133766234)]]

### Invoke CPU Instance + EI endpoint

In [13]:
response = predictor_ei.predict(data=img)
probs = np.array(response['predictions'][0])
tf.keras.applications.resnet.decode_predictions(np.expand_dims(probs, axis=0), top=5)

[[('n02123159', 'tiger_cat', 0.495739),
  ('n02123045', 'tabby', 0.434539199),
  ('n02124075', 'Egyptian_cat', 0.0492460541),
  ('n02127052', 'lynx', 0.0143557545),
  ('n02128385', 'leopard', 0.00133766781)]]

### Invoke G4 GPU Instance with NVIDIA T4 endpoint

In [14]:
response = predictor_gpu.predict(data=img)
probs = np.array(response['predictions'][0])
tf.keras.applications.resnet.decode_predictions(np.expand_dims(probs, axis=0), top=5)

[[('n02123159', 'tiger_cat', 0.495739311),
  ('n02123045', 'tabby', 0.434538603),
  ('n02124075', 'Egyptian_cat', 0.0492461771),
  ('n02127052', 'lynx', 0.0143557768),
  ('n02128385', 'leopard', 0.00133766851)]]