# serverless endpoint

https://docs.aws.amazon.com/sagemaker/latest/dg/serverless-endpoints-create.html

In [1]:
# !pip3 install sagemaker
#!pip3 install --upgrade botocore

In [2]:
import json

from abcli import fullname
from abcli import string
from roofAI import NAME, VERSION
from roofAI.inference.classes import InferenceClient, InferenceObject

import abcli.logging
import logging

logger = logging.getLogger()

logger.info(f"{NAME}-{VERSION}-{fullname()}, built on {string.pretty_date()}")

sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/kamangir/Library/Application Support/sagemaker/config.yaml


roofAI-3.251.1-abcli-7.2555.1-unknown, built on 09 December 2023, 18:10:55


In [3]:
model_name = "model-2023-12-03-11-24-39-75649"

---

In [4]:
inference_client = InferenceClient(verbose=True)

Found credentials in shared credentials file: ~/.aws/credentials


sagemaker.config      - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config      - Not applying SDK defaults from location: /Users/kamangir/Library/Application Support/sagemaker/config.yaml


Couldn't call 'get_role' to get Role ARN from role name shamim to get Role path.
sagemaker_role: defaulting to local.
Defaulting to only available Python version: py3
Defaulting to only supported image scope: cpu.
InferenceClient created.


## creating the model

In [5]:
import boto3
import sagemaker
region = boto3.Session().region_name
client = boto3.client("sagemaker", region_name=region)

In [6]:
assert inference_client.create(InferenceObject.MODEL, model_name)

create(InferenceObject.MODEL,model-2023-12-03-11-24-39-75649)...
exists(InferenceObject.MODEL,model-2023-12-03-11-24-39-75649): {'Models': [{'ModelName': 'model-2023-12-03-11-24-39-75649', 'ModelArn': 'arn:aws:sagemaker:ca-central-1:120429650996:model/model-2023-12-03-11-24-39-75649', 'CreationTime': datetime.datetime(2023, 12, 9, 18, 3, 13, 586000, tzinfo=tzlocal())}], 'ResponseMetadata': {'RequestId': '9aedef03-0f98-4e01-b4df-efd95a781160', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '9aedef03-0f98-4e01-b4df-efd95a781160', 'content-type': 'application/x-amz-json-1.1', 'content-length': '187', 'date': 'Sun, 10 Dec 2023 02:10:57 GMT'}, 'RetryAttempts': 0}}
model-2023-12-03-11-24-39-75649 already exists, will delete first.
delete_model(model-2023-12-03-11-24-39-75649): {'ResponseMetadata': {'RequestId': 'cd30fc26-9dac-4b9a-a295-e20694a32161', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'cd30fc26-9dac-4b9a-a295-e20694a32161', 'content-type': 'application/x-a

## Creating the endpoint configuration

In [7]:
endpoint_config_found = inference_client.exists(InferenceObject.ENDPOINT_CONFIG, model_name)

exists(InferenceObject.ENDPOINT_CONFIG,model-2023-12-03-11-24-39-75649): {'EndpointConfigs': [{'EndpointConfigName': 'model-2023-12-03-11-24-39-75649', 'EndpointConfigArn': 'arn:aws:sagemaker:ca-central-1:120429650996:endpoint-config/model-2023-12-03-11-24-39-75649', 'CreationTime': datetime.datetime(2023, 12, 9, 18, 3, 14, 837000, tzinfo=tzlocal())}], 'ResponseMetadata': {'RequestId': 'ebc3a1df-c6cc-4f17-b8f6-295a8edf4f70', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'ebc3a1df-c6cc-4f17-b8f6-295a8edf4f70', 'content-type': 'application/x-amz-json-1.1', 'content-length': '224', 'date': 'Sun, 10 Dec 2023 02:10:58 GMT'}, 'RetryAttempts': 0}}


In [8]:
if endpoint_config_found:
    logger.info("endpoint config already exists, will delete first.")

    # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/delete_endpoint_config.html
    response = client.delete_endpoint_config(EndpointConfigName=model_name)
    logger.info(json.dumps(response,indent=4))    

endpoint config already exists, will delete first.
{
    "ResponseMetadata": {
        "RequestId": "3b413f67-3daf-4a86-b8a4-2e48b193a455",
        "HTTPStatusCode": 200,
        "HTTPHeaders": {
            "x-amzn-requestid": "3b413f67-3daf-4a86-b8a4-2e48b193a455",
            "content-type": "application/x-amz-json-1.1",
            "content-length": "0",
            "date": "Sun, 10 Dec 2023 02:10:58 GMT"
        },
        "RetryAttempts": 0
    }
}


In [9]:
# https://docs.aws.amazon.com/sagemaker/latest/dg/serverless-endpoints-create.html#serverless-endpoints-create-config

response = client.create_endpoint_config(
   EndpointConfigName=model_name,
   ProductionVariants=[
        {
            "ModelName": model_name,
            "VariantName": "AllTraffic",
            "ServerlessConfig": {
                "MemorySizeInMB": 2048,
                "MaxConcurrency": 20,
                #"ProvisionedConcurrency": 10,
            }
        } 
    ]
)

logger.info(json.dumps(response,indent=4))

{
    "EndpointConfigArn": "arn:aws:sagemaker:ca-central-1:120429650996:endpoint-config/model-2023-12-03-11-24-39-75649",
    "ResponseMetadata": {
        "RequestId": "8823858d-3079-40f5-867f-1b0fa2f839bf",
        "HTTPStatusCode": 200,
        "HTTPHeaders": {
            "x-amzn-requestid": "8823858d-3079-40f5-867f-1b0fa2f839bf",
            "content-type": "application/x-amz-json-1.1",
            "content-length": "115",
            "date": "Sun, 10 Dec 2023 02:10:58 GMT"
        },
        "RetryAttempts": 0
    }
}


In [10]:
response = client.list_endpoint_configs(NameContains=model_name)

if response["EndpointConfigs"]:
    logger.info(response["EndpointConfigs"])
else:
    logger.error("endpoint config was not created.")
    assert False

[{'EndpointConfigName': 'model-2023-12-03-11-24-39-75649', 'EndpointConfigArn': 'arn:aws:sagemaker:ca-central-1:120429650996:endpoint-config/model-2023-12-03-11-24-39-75649', 'CreationTime': datetime.datetime(2023, 12, 9, 18, 10, 59, 336000, tzinfo=tzlocal())}]


---

In [11]:
# END