## Azure Databricks / Azure Machine Learning Sample - Managed Online Endpoint Autoscaling

Sample notebook showcasing how to configure rule-based autoscaling on managed online endpoints. Adapted from samples at https://learn.microsoft.com/en-us/azure/machine-learning/how-to-autoscale-endpoints?view=azureml-api-2&tabs=python.

#### Import required packages

In [0]:
import numpy as np
import os
from pyspark.sql import SparkSession  
from pyspark.sql.types import StructType, StructField, IntegerType, DoubleType  
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import mlflow
import mlflow.sklearn
from sklearn.pipeline import Pipeline
from sklearn import neighbors
from mlflow.deployments import get_deploy_client
import json
from azure.ai.ml import MLClient
from azure.ai.ml.entities import Environment, ManagedOnlineDeployment, CodeConfiguration
from azure.identity import DefaultAzureCredential
import time
from azure.identity import ClientSecretCredential, DefaultAzureCredential
from azure.mgmt.monitor import MonitorManagementClient
from azure.mgmt.monitor.models import AutoscaleProfile, ScaleRule, MetricTrigger, ScaleAction, Recurrence, RecurrentSchedule
import random 
import datetime 

#### Parse arguments

In [0]:
subscription_id = dbutils.widgets.get('subscription_id')
resource_group = dbutils.widgets.get('resource_group')
workspace = dbutils.widgets.get('workspace')

model_name = dbutils.widgets.get('model_name')
endpoint_name = dbutils.widgets.get('endpoint_name')
endpoint_description = dbutils.widgets.get('endpoint_description')

In [0]:
def get_aml_client(subscription_id, resource_group, workspace):
    """
    This function establishes a connection to an Azure Machine Learning (AML) workspace using a service principal.
    It retrieves the tenant ID, client ID, and client secret from a Databricks secret scope and returns an AML client object.

    Args:  
        subscription_id (str): The Azure subscription ID.  
        resource_group (str): The Azure resource group name.  
        workspace (str): The Azure Machine Learning workspace name.  

    Returns:  
        ml_client (azure.ml.core.client.MLClient): An Azure ML client object with an established connection to the specified AML workspace.  
    """  

    tenant_id = dbutils.secrets.get(scope="amlsecretscope",key="tenantid")
    client_id = dbutils.secrets.get(scope="amlsecretscope",key="clientid")
    client_secret = dbutils.secrets.get(scope="amlsecretscope",key="clientsecret")
    
    os.environ["AZURE_TENANT_ID"] = tenant_id
    os.environ["AZURE_CLIENT_ID"] = client_id
    os.environ["AZURE_CLIENT_SECRET"] = client_secret

    credential = ClientSecretCredential(tenant_id, client_id, client_secret)

    ml_client = MLClient(
        credential, subscription_id, resource_group, workspace
    )
    print("Establishing connection to Azure ML workspace")
    return ml_client
    

def get_aml_mlflow_tracking_uri(ml_client):
    """
    This function retrieves the MLflow tracking URI for an Azure Machine Learning workspace.

    Args:  
        ml_client (object): The ml_client which references the target Azure Machine Learning workspace.  

    Returns:  
        tracking_uri (str): The MLflow tracking URI associated with the Azure Machine Learning workspace.  
    """  
    
    ws = ml_client.workspaces.get(workspace)
    return ws.mlflow_tracking_uri

def get_aml_endpoint(ml_client, endpoint_name):
    """
    Retrieve an Azure Machine Learning (AML) endpoint using the specified client and endpoint name.

    Args:
    ml_client (azure.ml.MachineLearningClient): An instance of the Azure ML client to use for accessing the endpoint.
    endpoint_name (str): The name of the online endpoint to fetch.

    Returns:
    azure.ml.models.OnlineEndpoint: The retrieved AML online endpoint object.

    Example:
    >>> from azure.ml import MachineLearningClient
    >>> ml_client = MachineLearningClient(...)
    >>> endpoint_name = "my_endpoint"
    >>> endpoint = get_aml_endpoint(ml_client, endpoint_name)
    """
    endpoint = ml_client.online_endpoints.get(
        endpoint_name
    )
    return endpoint

def get_aml_deployment(ml_client, endpoint_name, deployment_name):
    """
    Retrieve an Azure Machine Learning (AML) deployment using the specified client, endpoint name, and deployment name.

    Args:
    ml_client (azure.ml.MachineLearningClient): An instance of the Azure ML client to use for accessing the deployment.
    endpoint_name (str): The name of the online endpoint associated with the deployment.
    deployment_name (str): The name of the deployment to fetch.

    Returns:
    azure.ml.models.OnlineDeployment: The retrieved AML online deployment object.

    Example:
    >>> from azure.ml import MachineLearningClient
    >>> ml_client = MachineLearningClient(...)
    >>> endpoint_name = "my_endpoint"
    >>> deployment_name = "my_deployment"
    >>> deployment = get_aml_deployment(ml_client, endpoint_name, deployment_name)
    """
    deployment = ml_client.online_deployments.get(
        deployment_name, endpoint_name
    )
    return deployment

def get_mon_client(subscription_id):
    """
Retrieve a Monitor Management Client for a given subscription ID using environment variables and secret values.

    Args:
    subscription_id (str): The subscription ID for which to create the Monitor Management Client.

    Returns:
    azure.mgmt.monitor.MonitorManagementClient: The instantiated Monitor Management Client for the specified subscription ID.

    Example:
    >>> subscription_id = "my_subscription_id"
    >>> mon_client = get_mon_client(subscription_id)
    """
    tenant_id = dbutils.secrets.get(scope="amlsecretscope",key="tenantid")
    client_id = dbutils.secrets.get(scope="amlsecretscope",key="clientid")
    client_secret = dbutils.secrets.get(scope="amlsecretscope",key="clientsecret")
    
    os.environ["AZURE_TENANT_ID"] = tenant_id
    os.environ["AZURE_CLIENT_ID"] = client_id
    os.environ["AZURE_CLIENT_SECRET"] = client_secret

    credential = ClientSecretCredential(tenant_id, client_id, client_secret)

    mon_client = MonitorManagementClient(
        credential, subscription_id
    )

    return mon_client

def create_autoscale_settings_name(endpoint_name, deployment_name):
    """
    Generate a unique autoscale settings name using the endpoint name, deployment name, and a random integer.

    Args:
    endpoint_name (str): The name of the online endpoint associated with the deployment.
    deployment_name (str): The name of the deployment for which to create the autoscale settings.

    Returns:
    str: A unique autoscale settings name combining the endpoint name, deployment name, and a random integer.

    Example:
    >>> endpoint_name = "my_endpoint"
    >>> deployment_name = "my_deployment"
    >>> autoscale_settings_name = create_autoscale_settings_name(endpoint_name, deployment_name)
    """
    autoscale_settings_name = f"autoscale-{endpoint_name}-{deployment_name}-{random.randint(0,1000)}"
    return autoscale_settings_name


def create_scale_out_rule(deployment):
    """
    Create a scale-out rule for an Azure Machine Learning deployment based on CPU utilization percentage.

    Args:
    deployment (azure.ml.models.OnlineDeployment): The AML online deployment object for which to create the scale-out rule.

    Returns:
    azure.mgmt.monitor.models.ScaleRule: The configured scale-out rule object, which increases the instance count if CPU utilization percentage is greater than 70%.

    Example:
    >>> deployment = get_aml_deployment(ml_client, endpoint_name, deployment_name)
    >>> rule_scale_out = create_scale_out_rule(deployment)
    """
    rule_scale_out = ScaleRule(
        metric_trigger = MetricTrigger(
            metric_name="CpuUtilizationPercentage",
            metric_resource_uri = deployment.id, 
            time_grain = datetime.timedelta(minutes = 1),
            statistic = "Average",
            operator = "GreaterThan", 
            time_aggregation = "Last",
            time_window = datetime.timedelta(minutes = 5), 
            threshold = 70
        ), 
        scale_action = ScaleAction(
            direction = "Increase", 
            type = "ChangeCount", 
            value = 2, 
            cooldown = datetime.timedelta(hours = 1)
        )
    )
    return rule_scale_out

def create_scale_in_rule(deployment):
    """
    Create a scale-in rule for an Azure Machine Learning deployment based on CPU utilization percentage.

    Args:
    deployment (azure.ml.models.OnlineDeployment): The AML online deployment object for which to create the scale-in rule.

    Returns:
    azure.mgmt.monitor.models.ScaleRule: The configured scale-in rule object, which increases the instance count if CPU utilization percentage is greater than 70%.

    Note:
    The provided function definition appears to be incorrect for a scale-in rule, as it is configured to increase instance count instead of decreasing it. Please verify and modify the function definition accordingly.

    Example:
    >>> deployment = get_aml_deployment(ml_client, endpoint_name, deployment_name)
    >>> rule_scale_in = create_scale_in_rule(deployment)
    """
    rule_scale_in = ScaleRule(
        metric_trigger = MetricTrigger(
            metric_name="CpuUtilizationPercentage",
            metric_resource_uri = deployment.id, 
            time_grain = datetime.timedelta(minutes = 1),
            statistic = "Average",
            operator = "GreaterThan", 
            time_aggregation = "Last",
            time_window = datetime.timedelta(minutes = 5), 
            threshold = 70
        ), 
        scale_action = ScaleAction(
            direction = "Increase", 
            type = "ChangeCount", 
            value = 2, 
            cooldown = datetime.timedelta(hours = 1)
        )
    )
    return rule_scale_in

def create_update_autoscale_profile(mon_client, endpoint, deployment, resource_group):
    """
    Create or update an autoscale profile for an Azure Machine Learning endpoint deployment in a specified resource group.

    Args:
    mon_client (azure.mgmt.monitor.MonitorManagementClient): The Monitor Management Client to interact with Azure Monitor.
    endpoint (azure.ml.models.OnlineEndpoint): The AML online endpoint associated with the deployment.
    deployment (azure.ml.models.OnlineDeployment): The AML online deployment object for which to create/update the autoscale profile.
    resource_group (str): The name of the resource group where the deployment resides.

    Example:
    >>> mon_client = get_mon_client(subscription_id)
    >>> endpoint = get_aml_endpoint(ml_client, endpoint_name)
    >>> deployment = get_aml_deployment(ml_client, endpoint_name, deployment_name)
    >>> resource_group = "my_resource_group"
    >>> create_update_autoscale_profile(mon_client, endpoint, deployment, resource_group)
    """

    autoscale_settings_name = create_autoscale_settings_name(endpoint.name, deployment.name)
    print(autoscale_settings_name)

    rule_scale_out = create_scale_out_rule(deployment)
    rule_scale_in = create_scale_in_rule(deployment)

    mon_client.autoscale_settings.create_or_update(
        resource_group, 
        autoscale_settings_name, 
        parameters = {
            "location" : endpoint.location,
            "target_resource_uri" : deployment.id,
            "profiles" : [
                AutoscaleProfile(
                    name="my-scale-settings",
                    capacity={
                        "minimum" : 1, 
                        "maximum" : 5,
                        "default" : 1
                    },
                    rules = [
                        rule_scale_in,
                        rule_scale_out
                    ]
                )
            ]
        }
    )

In [0]:
# Establish connection to target Azure ML workspace
ml_client = get_aml_client(subscription_id, resource_group, workspace)

# Get the mlflow tracking URI associated with the AML workspace
mlflow_tracking_uri = get_aml_mlflow_tracking_uri(ml_client)

# Get a reference to the target endpoint
aml_endpoint = get_aml_endpoint(ml_client, 'dev-diabetes-endpoint2')

# Get a reference to the target deployment
aml_deployment = get_aml_deployment(ml_client, 'dev-diabetes-endpoint2', 'blue-dev-diabetes-model')

# Get a reference to the model monitoring client
mon_client = get_mon_client(subscription_id)

# Create or update autoscaling rules on the target endpoint
create_update_autoscale_profile(mon_client, aml_endpoint, aml_deployment, resource_group)