In [None]:
%%bash

pip install -q torch-model-archiver

wget -q https://download.pytorch.org/models/densenet161-8d451a50.pth

#git clone https://github.com/pytorch/serve/

mkdir models/
    
torch-model-archiver --model-name densenet161 --version 1.0 \
    --model-file ./serve/examples/image_classifier/densenet_161/model.py \
    --serialized-file densenet161-8d451a50.pth --export-path models \
    --extra-files ./serve/examples/image_classifier/index_to_name.json \
    --handler image_classifier --archive-format tgz

In [None]:
import time
import boto3
import sagemaker

In [None]:
# replace with yours
region = "us-west-2"
bucket = "lninga-mars"
prefix = "ts-model"

model_name = "densenet161"
model_name_prefix = "densenet161"

In [None]:
# run if you need to make a bunch of model file copies
s3 = boto3.client("s3")
for i in range(30):
    s3.upload_file(
        f"models/{model_name}.tar.gz",
        bucket,
        f"{prefix}/{model_name_prefix}-{i}.tar.gz")

In [None]:
sm_client = boto3.client(service_name="sagemaker")

In [None]:
run_date = time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
# store a ts docker image in ECR https://us-east-2.console.aws.amazon.com/ecr/repositories?region=us-east-2
container = "844664758773.dkr.ecr.us-west-2.amazonaws.com/ts_test:v0"

model_url = f"https://s3-{region}.amazonaws.com/{bucket}/{prefix}/"

base_name = f"MultiModel-{prefix}-{run_date}"
model_name = f"{base_name}-Model"

container = {
    "Image": container,
    "ModelDataUrl": model_url,
    "Mode": "MultiModel"
}

create_model_response = sm_client.create_model(
    ModelName=model_name,
    ExecutionRoleArn=sagemaker.get_execution_role(),
    Containers=[container]
)
print(f"ModelArn: {create_model_response['ModelArn']}")

endpoint_config_name = f"{base_name}-EndpointConfig"
create_endpoint_config_response = sm_client.create_endpoint_config(
    EndpointConfigName = endpoint_config_name,
    ProductionVariants=[{
        "InstanceType": "ml.c5.9xlarge",
        "InitialInstanceCount": 1,
        "InitialVariantWeight": 1,
        "ModelName": model_name,
        "VariantName": "AllTraffic"
    }]
)
print(f"EndpointConfigArn: {create_endpoint_config_response['EndpointConfigArn']}")

In [None]:
create_endpoint_response = sm_client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name
)
print(f"EndpointArn: {create_endpoint_response['EndpointArn']}")

print(f"Waiting for {endpoint_name} endpoint to be in service...")
waiter = sm_client.get_waiter("endpoint_in_service")
waiter.wait(EndpointName=endpoint_name)

In [None]:
%%bash 
# Download Test Image
curl -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg

In [None]:
runtime_sm_client = boto3.client(service_name="sagemaker-runtime")

i, num_models, num_invocations = 0, 100, 2
#print(num_models, num_invocations, len(payload), time.time())

img = open('kitten.jpg', 'rb').read()

while i < num_models * num_invocations:
    j = i % num_models
    start_time = time.time()
    response = runtime_sm_client.invoke_endpoint(
        EndpointName=endpoint_name,
        TargetModel=f"{model_name_prefix}-{j}.tar.gz", # this is the rest of the S3 path where the model artifacts are located
        ContentType='application/x-image', 
        Body=bytearray(img)
    )
    end_time = time.time()
    print(j, i, end_time - start_time, end_time)
    i = i + 1

In [None]:
#sm_client.delete_endpoint(EndpointName=endpoint_name)
#sm_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)
#sm_client.delete_model(ModelName=model_name)