## Optimize and deploy models with MAX Engine and MAX Serving

In [None]:
import shutil
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf
import numpy as np
from tensorflow.keras.applications import ResNet50, EfficientNetB0

In [None]:
def load_save_model(keras_model, saved_model_dir = 'saved_model'):
   model = keras_model(weights='imagenet')
   shutil.rmtree(saved_model_dir, ignore_errors=True)
   model.save(saved_model_dir+"/1/saved_model/", 
               include_optimizer=False, 
               save_format='tf')

### Download Resnet50 and EfficientNet models and deploy it with MAX Serving

In [5]:
load_save_model(ResNet50, 'model-repository/resnet50')
load_save_model(EfficientNetB0, 'model-repository/efficientnet')

In [None]:
%%sh
# Create Triton config for ResNet
cat > model-repository/resnet50/config.pbtxt <<EOL
instance_group {
 kind: KIND_CPU
}
default_model_filename: "saved_model"
backend: "max"
EOL

# Create Triton config for EfficientNet
cat > model-repository/efficientnet/config.pbtxt <<EOL
instance_group {
 kind: KIND_CPU
}
default_model_filename: "saved_model"
backend: "max"
EOL

tree model-repository

### Run MAX Serving Container

In [None]:
%%sh
docker run --rm --network=host --name max-serving \
    -v $PWD/model-repository/:/models \
    public.ecr.aws/modular/max-serving-de tritonserver --model-repository=/models \
        --model-control-mode=explicit \
        --load-model=* 

In [None]:
import tritonclient.http as httpclient
from urllib.request import urlretrieve
from PIL import Image
import matplotlib.pyplot as plt

### Triton client ###
client = httpclient.InferenceServerClient(url="localhost:8000")

### Display image ###
urlretrieve('https://s3.amazonaws.com/model-server/inputs/kitten.jpg', "kitten.jpg") 
def display_image(file_name="kitten.jpg"):
    img = Image.open(file_name) 
    plt.imshow(img)
    plt.axis('off');
display_image()

In [None]:
### Image pre-processing ###
def image_preprocess(preprocess_fn,img,reps=1):
   img = np.asarray(img.resize((224, 224)),dtype=np.float32)
   img = np.stack([img]*reps)
   img = preprocess_fn(img)
   return img

### Image to classify ###
img= Image.open('kitten.jpg')

### Choose your model ###
# model_name = "resnet50"
model_name = "efficientnet"

if model_name=="resnet50":
    preprocess_fn = tf.keras.applications.resnet.preprocess_input
    decode_fn = tf.keras.applications.resnet.decode_predictions
    input_name = "input_1" # from input metadata
else:
    preprocess_fn = tf.keras.applications.efficientnet.preprocess_input
    decode_fn = tf.keras.applications.efficientnet.decode_predictions
    input_name = "input_2" # from input metadata

### Preprocess image ###
img = image_preprocess(preprocess_fn,img)

### Inference request format ###
inputs = httpclient.InferInput(input_name,
                              img.shape,
                              datatype="FP32")
inputs.set_data_from_numpy(img, binary_data=True)
outputs = httpclient.InferRequestedOutput("predictions",
                                          binary_data=True,
                                          class_count=1000)

### Submit inference request ###
results = client.infer(model_name=model_name,
                      inputs=[inputs],
                      outputs=[outputs])
inference_output = results.as_numpy('predictions')

### Process request ###
idx = [int(out.decode().split(':')[1]) for out in inference_output]
probs = [float(out.decode().split(':')[0]) for out in inference_output]

### Decoding predictions ###
probs = np.array(probs)[np.argsort(idx)]
display_image()
labels = decode_fn(np.expand_dims(probs, axis=0), top=5)
print(model_name,":")
[l for l in labels[0]]