In [None]:
import boto3
from typing import Final

# デプロイリソース名の設定
model_name: Final[str] = 'WhisperTranscribeModel'
endpoint_name: Final[str] = model_name + 'Endpoint'
endpoint_config_name: Final[str] = model_name + 'EndpointConfig'
variant_name: Final[str] = 'AllTrafic'

as_client: Final = boto3.client('application-autoscaling')
cw_client: Final = boto3.client('cloudwatch')

# ResourceIdの設定
resource_id = f'endpoint/{endpoint_name}/variant/{variant_name}'

In [None]:
# エンドポイントのスケーリング設定を登録
as_client.register_scalable_target(
    ServiceNamespace='sagemaker',
    ResourceId=resource_id,
    ScalableDimension='sagemaker:variant:DesiredInstanceCount',
    MinCapacity=0,
    MaxCapacity=3
)

In [None]:
# スケーリングポリシーの設定(ApproximateBacklogSizePerInstance監視用)
response = as_client.put_scaling_policy(
    PolicyName="ApproximateBacklogSizePerInstance-ScalingPolicy",
    ServiceNamespace="sagemaker",
    ResourceId=resource_id,
    ScalableDimension="sagemaker:variant:DesiredInstanceCount",
    PolicyType="StepScaling",
    StepScalingPolicyConfiguration={
        "AdjustmentType": "ChangeInCapacity",
        "MetricAggregationType": "Average",
        "Cooldown": 300,
        "StepAdjustments": [
            {
                "MetricIntervalUpperBound": 1,
                "ScalingAdjustment": -1
            },
            {
                "MetricIntervalLowerBound": 1,
                "ScalingAdjustment": 1
            }
        ]
    }
)
scaling_policy_arn = response['PolicyARN']

# メトリクスアラームの設定(ApproximateBacklogSizePerInstance監視用)
cw_client.put_metric_alarm(
    AlarmName='ScaleOutAlarm-ApproximateBacklogSizePerInstance',
    MetricName='ApproximateBacklogSizePerInstance',
    Namespace='AWS/SageMaker',
    Statistic='Average',
    EvaluationPeriods=1,
    DatapointsToAlarm=1,
    Threshold=1,
    ComparisonOperator='GreaterThanThreshold',
    TreatMissingData='missing',
    Dimensions=[
        {'Name': 'EndpointName', 'Value': endpoint_name}
    ],
    Period=60,
    AlarmActions=[scaling_policy_arn]
)

cw_client.put_metric_alarm(
    AlarmName='ScaleInAlarm-ApproximateBacklogSizePerInstance',
    MetricName='ApproximateBacklogSizePerInstance',
    Namespace='AWS/SageMaker',
    Statistic='Average',
    EvaluationPeriods=1,
    DatapointsToAlarm=1,
    Threshold=1,
    ComparisonOperator='LessThanThreshold',
    TreatMissingData='missing',
    Dimensions=[
        {'Name': 'EndpointName', 'Value': endpoint_name}
    ],
    Period=60,
    AlarmActions=[scaling_policy_arn]
)

In [None]:
# スケーリングポリシーの設定(HasBacklogWithoutCapacity監視用)
response = as_client.put_scaling_policy(
    PolicyName="HasBacklogWithoutCapacity-ScalingPolicy",
    ServiceNamespace="sagemaker",
    ResourceId=resource_id,
    ScalableDimension="sagemaker:variant:DesiredInstanceCount",
    PolicyType="StepScaling",
    StepScalingPolicyConfiguration={
        "AdjustmentType": "ChangeInCapacity",
        "MetricAggregationType": "Average",
        "Cooldown": 300,
        "StepAdjustments":
            [
                {
                    "MetricIntervalLowerBound": 0,
                    "ScalingAdjustment": 1
                }
            ]
    }
)
scaling_policy_arn = response['PolicyARN']

# メトリクスアラームの設定(HasBacklogWithoutCapacity監視用)
cw_client.put_metric_alarm(
    AlarmName='ScaleOutAlarm-HasBacklogWithoutCapacity',
    MetricName='HasBacklogWithoutCapacity',
    Namespace='AWS/SageMaker',
    Statistic='Average',
    EvaluationPeriods=1,
    DatapointsToAlarm=1,
    Threshold=1,
    ComparisonOperator='GreaterThanOrEqualToThreshold',
    TreatMissingData='missing',
    Dimensions=[
        {'Name': 'EndpointName', 'Value': endpoint_name}
    ],
    Period=60,
    AlarmActions=[scaling_policy_arn]
)

In [None]:
# スケーリングポリシーの削除
as_client.delete_scaling_policy(
    PolicyName="HasBacklogWithoutCapacity-ScalingPolicy",
    ServiceNamespace="sagemaker",
    ResourceId=resource_id,
    ScalableDimension="sagemaker:variant:DesiredInstanceCount"
)

as_client.delete_scaling_policy(
    PolicyName="ApproximateBacklogSizePerInstance-ScalingPolicy",
    ServiceNamespace="sagemaker",
    ResourceId=resource_id,
    ScalableDimension="sagemaker:variant:DesiredInstanceCount"
)