### 1.  Set up the environment

In [1]:
# after docker image has been built, setup environment (define s3 bucket and prefix where model artificats that will be invokable by endpoint are located)

import boto3
from sagemaker import get_execution_role
from time import gmtime, strftime

sm_client = boto3.client(service_name="sagemaker")
runtime_sm_client = boto3.client(service_name="sagemaker-runtime")

account_id = boto3.client("sts").get_caller_identity()["Account"]
region = boto3.Session().region_name

bucket = "sagemaker-{}-{}".format(region, account_id)
prefix = "demo-template-model-store"
# Container's ModelDataUrl; S3 URL to All the model artifacts
model_url = "https://s3-{}.amazonaws.com/{}/{}/".format(region, bucket, prefix) 
model_name = "demo-templatemodel" + strftime("%Y-%m-%d-%H-%M-%S", gmtime()) # A namespace for all the model artifacts

image_name = "demo-sagemaker-template-bo-test" # Inference image name in ECR
image = "{}.dkr.ecr.{}.amazonaws.com/{}:latest".format(account_id, region, image_name) # Full address of the ECR image

endpoint_config_name = "demo-template-endpointconfig-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime()) # The name of the endpoint config
endpoint_name = "demo-template-endpoint-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime()) # The name of the endpoint

endpoint_instance = "ml.t2.medium" # The instance type of the endpoint

role = get_execution_role()

### 2. Upload model artifacts to S3
Sagemaker required a model file, so we put a dummy model as a placeholder to s3. The file must be a `tar.gz` file.

In [2]:
# Make a dummy file for the model
import os
import tarfile
with open("model.txt", "w") as f:
    f.write("I'm model 1.")

with tarfile.open("model1.tar.gz","w:gz") as tar:
    for file in ["model.txt"]:
        tar.add(os.path.basename(file))

os.remove("model.txt")

with open("model.txt", "w") as f:
    f.write("I'm model 2.")

with tarfile.open("model2.tar.gz","w:gz") as tar:
    for file in ["model.txt"]:
        tar.add(os.path.basename(file))
        
os.remove("model.txt")
        
s3_client = boto3.client('s3')
s3_client.upload_file("model1.tar.gz", bucket, f"{prefix}/model1.tar.gz")
s3_client.upload_file("model2.tar.gz", bucket, f"{prefix}/model2.tar.gz")

### 3. Import models into hosting

In [3]:
# Import models into hosting
container = {"Image": image, "ModelDataUrl": model_url, "Mode": "MultiModel"}

create_model_response = sm_client.create_model(
    ModelName=model_name, ExecutionRoleArn=role, Containers=[container]
)

print("Model Arn: " + create_model_response["ModelArn"])

Model Arn: arn:aws:sagemaker:us-east-1:318775028588:model/demo-lexrankmodel2021-07-15-04-12-10


### 4. Create endpoint configuration

In [5]:
create_endpoint_config_response = sm_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "InstanceType": endpoint_instance,
            "InitialInstanceCount": 1,
            "ModelName": model_name,
            "VariantName": "AllTraffic",
        }
    ],
)

print("Endpoint config Arn: " + create_endpoint_config_response["EndpointConfigArn"])

Endpoint config Arn: arn:aws:sagemaker:us-east-1:318775028588:endpoint-config/demo-lexrank-endpointconfig-2021-07-15-04-12-10


### 5.Create endpoint

In [6]:
%%time
create_endpoint_response = sm_client.create_endpoint(
    EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name
)
print("Endpoint Arn: " + create_endpoint_response["EndpointArn"])

resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
status = resp["EndpointStatus"]
print("Endpoint Status: " + status)

print("Waiting for {} endpoint to be in service...".format(endpoint_name))
waiter = sm_client.get_waiter("endpoint_in_service")
waiter.wait(EndpointName=endpoint_name)
print("Done")

Endpoint Arn: arn:aws:sagemaker:us-east-1:318775028588:endpoint/demo-lexrank-endpoint-2021-07-15-04-12-10
Endpoint Status: Creating
Waiting for demo-lexrank-endpoint-2021-07-15-04-12-10 endpoint to be in service...
Done
CPU times: user 312 ms, sys: 24.1 ms, total: 337 ms
Wall time: 11min 32s


### 6. Run inference

In [11]:
# invoke the models that we uploaded to S3 previously
# Inference with model1
import json
data = {
    "text":"Hello world! I like pizza! How about you?",
    "n_sent":1
}

payload = json.dumps(data)

response = runtime_sm_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/json",
    TargetModel="model1.tar.gz",  # this is the rest of the S3 path where the model artifacts are located
    Body=payload,
)

response["Body"].read()


In [None]:
# Inference with model2
response = runtime_sm_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/json",
    TargetModel="model2.tar.gz",  # this is the rest of the S3 path where the model artifacts are located
    Body=payload,
)

response["Body"].read()