# Asynchronous Inference

_NOTE_: Execute this after the _batch example as we'll reuse the model.

In [None]:
## Deploy Model from Saved Assets, enable asynch

import boto3
import time, random, uuid
from sagemaker import Model
from sagemaker import image_uris
import pandas as pd
import numpy as np


s3_bucket = "INSERT_S3_BUCKET"
sagemaker_role = "INSERT_ARN"
model_name = "INSERT_MODEL_NAME"
s3_async_output_prefix = "async-inference-results" 
s3_async_input_key = "INSERT_INPUT_KEY"

# Initialize Boto3 SageMaker client
sagemaker_client = boto3.client("sagemaker", region_name="us-east-1")
xgboost_image_uri = image_uris.retrieve(framework='xgboost',region='us-east-1', version='1.7-1')

# Model & Endpoint Configurations
uniqueID = uuid.uuid4().hex
endpoint_config_name = f"async-endpoint-config-{uniqueID}"
endpoint_name = f"async-endpoint-{uniqueID}"

# Create Endpoint Configuration with Asynchronous Inference
print("Creating asynchronous endpoint configuration...")
sagemaker_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "VariantName": "XGBoostVariant1",
            "ModelName": model_name,
            "InstanceType": "ml.m5.large",
            "InitialInstanceCount": 1,
        }
    ],
    AsyncInferenceConfig={
        "OutputConfig": {
            "S3OutputPath": f"s3://{s3_bucket}/{s3_async_output_prefix}",
        }
    }
)

# Deploy the Model as an Asynchronous Endpoint
print("Deploying model as an asynchronous endpoint...")
sagemaker_client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name
)

print(f"Asynchronous endpoint '{endpoint_name}' is being created.")

# Wait for endpoint to be ready
print("Waiting for endpoint to be ready...")
while True:
    response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
    status = response["EndpointStatus"]
    if status in ["InService", "Failed"]:
        print(f"Endpoint Status: {status}")
        break
    time.sleep(10)

# Check if deployment was successful
if status != "InService":
    raise Exception(f"Deployment failed with status: {status}")

print(f"Model deployed successfully at endpoint: {endpoint_name}")

## Send an async query to the endpoint

In [None]:
# Invoke Asynchronous Inference
s3_async_input_uri = f"s3://{s3_bucket}/{s3_async_input_key}"
print(s3_async_input_uri)

sagemaker_rt_client = boto3.client("sagemaker-runtime")
response = sagemaker_rt_client.invoke_endpoint_async(
    EndpointName=endpoint_name,
    InputLocation=s3_async_input_uri
)

# Get Inference ID
inference_id = response["InferenceId"]
print(f"Submitted async inference request. InferenceId: {inference_id}")

In [None]:
# Wait for the results and display

# Construct Output S3 Path
s3_output_uri = f"s3://{s3_bucket}/{s3_async_output_prefix}/{inference_id}.out"
print(s3_output_uri)

s3_resource = boto3.resource("s3")
bucket = s3_resource.Bucket(s3_bucket)

print("Waiting for async inference results...")

while True:
    objects = list(bucket.objects.filter(Prefix=s3_output_uri))
    if objects:
        print(f"Output file found: {s3_output_uri}")
        break
    time.sleep(5)  # Wait and retry

# Download and Read Output File
output_filename = f"output_{uuid.uuid4().hex}.csv"
s3_client.download_file(s3_bucket, s3_output_uri, output_filename)

print(f"Downloaded inference output to: {output_filename}")

# Display Results
output_df = pd.read_csv(output_filename, header=None)
print("Inference Results:")
print(output_df)