In [None]:
import boto3

In [None]:
client = boto3.client('application-autoscaling')

#### Configure variables

In [None]:
endpoint_name = "your-endpoint-name"
variant_name = "your-variant-name"
resource_id = f"endpoint/{endpoint_name}/variant/{variant_name}"

#### Initialize boto3 clients

In [None]:
sm_client = boto3.client("sagemaker")
autoscaling_client = boto3.client("application-autoscaling")

#### Register scalable target with min 0, max capacity 1

In [None]:
autoscaling_client.register_scalable_target(
    ServiceNamespace="sagemaker",
    ResourceId=resource_id,
    ScalableDimension="sagemaker:variant:DesiredInstanceCount",
    MinCapacity=0,  # allow scale to zero
    MaxCapacity=1,  # max 1 instance
)

#### Set target tracking scaling policy on ApproximateBacklogSizePerInstance metric


In [None]:
autoscaling_client.put_scaling_policy(
    PolicyName="AsyncInferenceBacklogScalingPolicy",
    ServiceNamespace="sagemaker",
    ResourceId=resource_id,
    ScalableDimension="sagemaker:variant:DesiredInstanceCount",
    PolicyType="TargetTrackingScaling",
    TargetTrackingScalingPolicyConfiguration={
        "TargetValue": 5.0,
        "CustomizedMetricSpecification": {
            "MetricName": "ApproximateBacklogSizePerInstance",
            "Namespace": "AWS/SageMaker",
            "Dimensions": [{"Name": "EndpointName", "Value": endpoint_name}],
            "Statistic": "Average",
        },
        "ScaleInCooldown": 300,
        "ScaleOutCooldown": 300,
    },
)

#### Create step scaling policy for instant scale-up from zero

In [None]:
autoscaling_client.put_scaling_policy(
    PolicyName="HasBacklogWithoutCapacity-ScalingPolicy",
    ServiceNamespace="sagemaker",
    ResourceId=resource_id,
    ScalableDimension="sagemaker:variant:DesiredInstanceCount",
    PolicyType="StepScaling",
    StepScalingPolicyConfiguration={
        "AdjustmentType": "ChangeInCapacity",
        "MetricAggregationType": "Average",
        "Cooldown": 300,
        "StepAdjustments": [{"MetricIntervalLowerBound": 0, "ScalingAdjustment": 1}],
    },
)