In [2]:
import boto3   
import datetime

sm_client = boto3.client('sagemaker')
cw_client = boto3.client('cloudwatch')

IDLE_TIME_IN_SECONDS = 3600

now = datetime.datetime.utcnow()
past = now - datetime.timedelta(seconds=IDLE_TIME_IN_SECONDS) 

In [3]:
def is_serverless_endpoint(client, endpoint_name):
    endpoint = client.describe_endpoint(EndpointName = endpoint_name)
    endpoint_config = client.describe_endpoint_config(EndpointConfigName = endpoint["EndpointConfigName"])
    product_variants = endpoint_config["ProductionVariants"]
    return "ServerlessConfig" in product_variants[0]

In [4]:
endpoint_names = []
endpoints =  sm_client.list_endpoints(
    SortBy = 'CreationTime',
    SortOrder = 'Descending',
    StatusEquals = 'InService',
)["Endpoints"]

for each in endpoints:
    name = each["EndpointName"]
    if is_serverless_endpoint(sm_client, name):
        continue
    endpoint_names.append(name)
    
print(endpoint_names)

['hf-llm-starcoder-2024-02-19-21-50-38-531', 'Endpoint-20240219-172343']


In [94]:
idle_endpoints = []
for endpoint_name in endpoint_names:
    response = cw_client.get_metric_statistics(
        Namespace='AWS/SageMaker',
        MetricName='Invocations',
        Dimensions=[
            {
                'Name': 'EndpointName',
                'Value': endpoint_name
            },
            {
                'Name': 'VariantName',
                'Value': 'AllTraffic',
            },
        ],
        StartTime=past, 
        EndTime=now,
        Period=IDLE_TIME_IN_SECONDS,
        Statistics=['Sum']
    )
    if response['Datapoints'] ==[] or response['Datapoints'][0]['Sum'] == 0.0:
        idle_endpoints.append(endpoint_name)

In [None]:
for endpoint in idle_endpoints:
    response = sm_client.delete_endpoint(EndpointName=endpoint)
    print(f"Deleted endpoint {endpoint}")

In [92]:
endpoint_name = endpoint_names[0]
seconds_in_one_hour = 3600

response = cw_client.get_metric_statistics(
    Namespace='AWS/SageMaker',
    MetricName='Invocations',
    Dimensions=[
        {
            'Name': 'EndpointName',
            'Value': endpoint_name
        },
        {
            'Name': 'VariantName',
            'Value': 'AllTraffic',
        },
    ],
    StartTime=past, 
    EndTime=now,
    Period=seconds_in_one_hour,
    Statistics=['Sum']
)
invocation_count = response['Datapoints'][0]['Sum']
print(f'The invocation count for {endpoint_name} in the last 60 minutes is {invocation_count}')

The invocation count for hf-llm-starcoder-2024-02-19-21-50-38-531 in the last 30 minutes is 0.0
