# serverless endpoint

https://docs.aws.amazon.com/sagemaker/latest/dg/serverless-endpoints-create.html

In [1]:
# !pip3 install sagemaker
#!pip3 install --upgrade botocore

In [2]:
import json

from abcli import fullname
from abcli import string
from roofAI import NAME, VERSION
from roofAI.inference.classes import InferenceClient

import abcli.logging
import logging

logger = logging.getLogger()

logger.info(f"{NAME}-{VERSION}-{fullname()}, built on {string.pretty_date()}")

sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/kamangir/Library/Application Support/sagemaker/config.yaml


roofAI-3.248.1-abcli-7.2554.1-unknown, built on 09 December 2023, 17:49:11


In [3]:
model_name = "model-2023-12-03-11-24-39-75649"

---

In [4]:
inference_client = InferenceClient(verbose=True)

Found credentials in shared credentials file: ~/.aws/credentials


sagemaker.config      - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config      - Not applying SDK defaults from location: /Users/kamangir/Library/Application Support/sagemaker/config.yaml


Couldn't call 'get_role' to get Role ARN from role name shamim to get Role path.
sagemaker_role: defaulting to local.
Defaulting to only available Python version: py3
Defaulting to only supported image scope: cpu.
InferenceClient created.


## creating the model

In [5]:
import boto3
import sagemaker
region = boto3.Session().region_name
client = boto3.client("sagemaker", region_name=region)

In [6]:
assert inference_client.create_model(model_name=model_name)

create_model(model-2023-12-03-11-24-39-75649)...
model_exists(model-2023-12-03-11-24-39-75649): {'Models': [{'ModelName': 'model-2023-12-03-11-24-39-75649', 'ModelArn': 'arn:aws:sagemaker:ca-central-1:120429650996:model/model-2023-12-03-11-24-39-75649', 'CreationTime': datetime.datetime(2023, 12, 9, 17, 47, 36, 466000, tzinfo=tzlocal())}], 'ResponseMetadata': {'RequestId': '6116b3bb-0309-445f-a33c-0b5084390b82', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '6116b3bb-0309-445f-a33c-0b5084390b82', 'content-type': 'application/x-amz-json-1.1', 'content-length': '187', 'date': 'Sun, 10 Dec 2023 01:49:13 GMT'}, 'RetryAttempts': 0}}
model-2023-12-03-11-24-39-75649 already exists, will delete first.
delete_model(model-2023-12-03-11-24-39-75649): {'ResponseMetadata': {'RequestId': 'a1b06a05-5ce6-4652-84c3-2ad370c27f12', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'a1b06a05-5ce6-4652-84c3-2ad370c27f12', 'content-type': 'application/x-amz-json-1.1', 'content-length':

## Creating the endpoint configuration

In [7]:
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/list_endpoint_configs.html
response = client.list_endpoint_configs(NameContains=model_name)

endpoint_config_found = bool(response["EndpointConfigs"])
if endpoint_config_found:
    logger.info("endpoint config already exists, will delete first.")

endpoint config already exists, will delete first.


In [8]:
if endpoint_config_found:
    # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/delete_endpoint_config.html
    response = client.delete_endpoint_config(EndpointConfigName=model_name)
    logger.info(json.dumps(response,indent=4))    

{
    "ResponseMetadata": {
        "RequestId": "ad77b17d-65d4-4bcf-84f9-60db89c57a27",
        "HTTPStatusCode": 200,
        "HTTPHeaders": {
            "x-amzn-requestid": "ad77b17d-65d4-4bcf-84f9-60db89c57a27",
            "content-type": "application/x-amz-json-1.1",
            "content-length": "0",
            "date": "Sun, 10 Dec 2023 01:49:14 GMT"
        },
        "RetryAttempts": 0
    }
}


In [9]:
# https://docs.aws.amazon.com/sagemaker/latest/dg/serverless-endpoints-create.html#serverless-endpoints-create-config

response = client.create_endpoint_config(
   EndpointConfigName=model_name,
   ProductionVariants=[
        {
            "ModelName": model_name,
            "VariantName": "AllTraffic",
            "ServerlessConfig": {
                "MemorySizeInMB": 2048,
                "MaxConcurrency": 20,
                #"ProvisionedConcurrency": 10,
            }
        } 
    ]
)

logger.info(json.dumps(response,indent=4))

{
    "EndpointConfigArn": "arn:aws:sagemaker:ca-central-1:120429650996:endpoint-config/model-2023-12-03-11-24-39-75649",
    "ResponseMetadata": {
        "RequestId": "5783e67b-d928-4a19-ac15-6efbbce75f5a",
        "HTTPStatusCode": 200,
        "HTTPHeaders": {
            "x-amzn-requestid": "5783e67b-d928-4a19-ac15-6efbbce75f5a",
            "content-type": "application/x-amz-json-1.1",
            "content-length": "115",
            "date": "Sun, 10 Dec 2023 01:49:15 GMT"
        },
        "RetryAttempts": 0
    }
}


In [10]:
response = client.list_endpoint_configs(NameContains=model_name)

if response["EndpointConfigs"]:
    logger.info(response["EndpointConfigs"])
else:
    logger.error("endpoint config was not created.")
    assert False

[{'EndpointConfigName': 'model-2023-12-03-11-24-39-75649', 'EndpointConfigArn': 'arn:aws:sagemaker:ca-central-1:120429650996:endpoint-config/model-2023-12-03-11-24-39-75649', 'CreationTime': datetime.datetime(2023, 12, 9, 17, 49, 15, 527000, tzinfo=tzlocal())}]


---

In [11]:
# END