In [None]:
# !pip install sagemaker botocore boto3 awscli --upgrade

In [None]:
import tarfile
import sagemaker
import tensorflow as tf
import tensorflow.keras as keras
import shutil
import os
import json
import numpy as np
import time

role = sagemaker.get_execution_role()
sess = sagemaker.Session()
region = sess.boto_region_name
bucket = sess.default_bucket()

In [None]:
import boto3
inference_client = boto3.client("sagemaker", region)

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50

def load_save_resnet50_model(model_path):
    model = ResNet50(weights='imagenet')
    shutil.rmtree(model_path, ignore_errors=True)
    model.save(model_path, include_optimizer=False, save_format='tf')

saved_model_dir = 'resnet50_saved_model' 
model_ver = '1'
model_path = os.path.join(saved_model_dir, model_ver)

load_save_resnet50_model(model_path)

In [None]:
shutil.rmtree('model.tar.gz', ignore_errors=True)
!tar cvfz model.tar.gz code -C resnet50_saved_model .

### Download the payload 

In [None]:
payload_location = "sample-payload"
shutil.rmtree(payload_location, ignore_errors=True)
os.makedirs(payload_location)

!curl  https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/pets/boxer_dog.jpg > ./sample-payload/boxer_dog.jpg
!curl  https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/pets/british_blue_shorthair_cat.jpg > ./sample-payload/british_blue_shorthair_cat.jpg
!curl  https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/pets/english_cocker_spaniel_dog.jpg > ./sample-payload/english_cocker_spaniel_dog.jpg
!curl  https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/pets/shiba_inu_dog.jpg > ./sample-payload/shiba_inu_dog.jpg

### Tar the payload

In [None]:
shutil.rmtree('payload.tar.gz', ignore_errors=True)
!cd ./sample-payload/ && tar czvf ../payload.tar.gz *

In [None]:
prefix = 'inference-recommender'
s3_payload_path = sess.upload_data(path='payload.tar.gz', key_prefix=prefix)
s3_model_path = sess.upload_data(path='model.tar.gz', key_prefix=prefix)

## 3. Machine Learning model details

Example ML Domains: `COMPUTER_VISION`, `NATURAL_LANGUAGE_PROCESSING`, `MACHINE_LEARNING` <br>
Example ML Tasks: `CLASSIFICATION`, `REGRESSION`, `OBJECT_DETECTION`, `OTHER` <br>
Example Model name: `resnet50`, `yolov4`, `xgboost` etc

In [None]:
import pandas as pd

list_model_metadata_response = inference_client.list_model_metadata()
pd.DataFrame.from_dict(list_model_metadata_response['ModelMetadataSummaries']).sort_values(['Domain'])

### Container image URL

If you don’t have an inference container image, you can use one of the open source [deep learning containers (DLCs)](https://github.com/aws/deep-learning-containers) provided by AWS to serve your ML model.

In [None]:
from sagemaker import image_uris

framework_name = "tensorflow"
framework_version = "1.15.4"

image_uri = sagemaker.image_uris.retrieve(
    framework=framework_name,
    region=region,
    version=framework_version,
    py_version="py3",
    image_scope='inference',
    instance_type='ml.c5.xlarge'
)
image_uri

## 4. Register Model Version/Package

Inference Recommender expects the model to be packaged in the model registry. Here, we are creating a model package group and a model package version. The model package version which takes container, model url etc, will now allow you to pass additional information about the model like `Domain`, `Task`, `Framework`, `FrameworkVersion`, `NearestModelName`, `SamplePayloadUrl`

As `SamplePayloadUrl` and `SupportedContentTypes` parameters are essential for benchmarking the endpoint. We also highly recommend you to specific `Domain`, `Task`, `Framework`, `FrameworkVersion`, `NearestModelName` for better inference recommendation. 

In [None]:
import boto3
import uuid

model_package_group_name = str(uuid.uuid1())

model_pacakge_group_response = inference_client.create_model_package_group(
    ModelPackageGroupName=model_package_group_name, 
    ModelPackageGroupDescription="description"
)
model_pacakge_group_response

In [None]:
# ML model details
model_name = "resnet50"
ml_domain = "COMPUTER_VISION"
ml_task = "IMAGE_CLASSIFICATION"

model_package_version_response = inference_client.create_model_package(
    ModelPackageGroupName      = model_package_group_name,
    ModelPackageDescription    = "InferenceRecommenderExample",
    Domain                     = ml_domain,
    Task                       = ml_task,
    SamplePayloadUrl           = s3_payload_path,
    InferenceSpecification={
        "Containers": [
            {
                "ContainerHostname": "dlc",
                "Image": image_uri,
                "ModelDataUrl": s3_model_path,
                "Framework": "TENSORFLOW",
                "FrameworkVersion": "1.15.4",
                "NearestModelName": model_name,
                "ModelInput": {"DataInputConfig": '{"input_1":[1,3,224,224]}'},
            },
        ],
        "SupportedRealtimeInferenceInstanceTypes": [
            "ml.c5.xlarge",
            "ml.c5.2xlarge",
            "ml.m5.xlarge",
            "ml.m5.2xlarge",
            "ml.m5.4xlarge",
            "ml.inf1.xlarge",
            "ml.inf1.2xlarge",
            "ml.g4dn.xlarge",
            "ml.g4dn.2xlarge",
            "ml.g4dn.4xlarge",
            "ml.p3.2xlarge",
        ],
        "SupportedContentTypes": [
            "application/x-image",
        ],
        "SupportedResponseMIMETypes": [],
    },
)

print(model_package_version_response)

## Create a SageMaker Inference Recommender Default Job

In [None]:
default_job = uuid.uuid1()
default_response = inference_client.create_inference_recommendations_job(
    JobName=str(default_job),
    JobDescription="Job Description",
    JobType="Default",
    RoleArn=role,
    InputConfig={"ModelPackageVersionArn": model_package_version_response["ModelPackageArn"]},
)

print(default_response)

In [None]:
%%time
import time

describe_inf_recommender_response = inference_client.describe_inference_recommendations_job(JobName=str(default_job))
while describe_inf_recommender_response["Status"] == "IN_PROGRESS":
    describe_inf_recommender_response = inference_client.describe_inference_recommendations_job(JobName=str(default_job))
    print(describe_inf_recommender_response["Status"])
    time.sleep(15)
    
print(f'Inference recommender completed job with status: {describe_inf_recommender_response["Status"]}')

### Detailing out the result

In [None]:
# job_name = str(default_job)
job_name = 'a9990c40-5bd6-11ec-85b5-8a4c27210d85'

describe_inf_recommender_response = inference_client.describe_inference_recommendations_job(JobName=job_name)

data = [
    {**x["EndpointConfiguration"], **x["ModelConfiguration"], **x["Metrics"]}
    for x in describe_inf_recommender_response["InferenceRecommendations"]
]
df = pd.DataFrame(data)
df.drop("VariantName", inplace=True, axis=1)
pd.set_option("max_colwidth", 400)
df.head()

## 7. Custom Load Test

With an 'Advanced' job, you can provide your production requirements, select instance types, tune environment variables and perform more extensive load tests.

In [None]:
advanced_job = uuid.uuid1()
advanced_response = inference_client.create_inference_recommendations_job(
    JobName=str(advanced_job),
    JobDescription="JobDescription",
    JobType="Advanced",
    RoleArn=role,
    InputConfig={
        "ModelPackageVersionArn": model_package_version_response["ModelPackageArn"],
        "JobDurationInSeconds": 7200,
        "EndpointConfigurations": [
            { "InstanceType": "ml.c5.xlarge"},
            { "InstanceType": "ml.c5.2xlarge"},
            { "InstanceType": "ml.g4dn.xlarge"},
            { "InstanceType": "ml.g4dn.2xlarge"},
            { "InstanceType": "ml.p3.2xlarge"}
        ],
        "ResourceLimit": {"MaxNumberOfTests": 5, "MaxParallelOfTests": 5},
        "TrafficPattern": {
            "TrafficType": "PHASES",
            "Phases": [{"InitialNumberOfUsers": 1, "SpawnRate": 3, "DurationInSeconds": 120}],
        },
    },
    StoppingConditions={
        "MaxInvocations": 500,
        "ModelLatencyThresholds": [{"Percentile": "P95", "ValueInMilliseconds": 20}],
    },
)

print(advanced_response)

In [None]:
%%time
import time

describe_inf_recommender_response = inference_client.describe_inference_recommendations_job(JobName=str(advanced_job))
while describe_inf_recommender_response["Status"] != "COMPLETED":
    describe_inf_recommender_response = inference_client.describe_inference_recommendations_job(JobName=str(advanced_job))
    print(describe_inf_recommender_response["Status"])
    time.sleep(15)
    
print(f'Inference recommender completed job with status: {describe_inf_recommender_response["Status"]}')

### Detailing out the result

In [None]:
job_name = str(advanced_job)
# job_name = "c6f81bc2-5be6-11ec-85b5-8a4c27210d85"
# job_name = "2c4e501a-5bea-11ec-85b5-8a4c27210d85"

print(f"Job name: {job_name}")
describe_inf_recommender_response = inference_client.describe_inference_recommendations_job(JobName=job_name)

data = [
    {**x["EndpointConfiguration"], **x["ModelConfiguration"], **x["Metrics"]}
    for x in describe_inf_recommender_response["InferenceRecommendations"]
]
df = pd.DataFrame(data)
df.drop("VariantName", inplace=True, axis=1)
pd.set_option("max_colwidth", 400)
df.head()