In [1]:
import uuid
from azure.ai.ml import command
from azure.ai.ml.entities import Data
from azure.ai.ml import Input
from azure.ai.ml.constants import AssetTypes, InputOutputModes
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    CodeConfiguration,
    Environment,
)

ml_client = MLClient.from_config(credential=DefaultAzureCredential())

Found the config file in: /mnt/batch/tasks/shared/LS_root/mounts/clusters/jcharley4/code/Users/jcharley/config.json


### STEP 1 : CREATE ONLINE (REAL-TIME) ENDPOINT

Online endpoints are endpoints that are used for online (real-time) inferencing. They receive data from clients and can send responses back in real time.

An endpoint is an HTTPS endpoint that clients can call to receive the inferencing (scoring) output of a trained model. It provides:

Authentication using "key & token" based auth
SSL termination
A stable scoring URI (endpoint-name.region.inference.ml.azure.com)
A deployment is a set of resources required for hosting the model that does the actual inferencing. A single endpoint can contain multiple deployments.

Features of the managed online endpoint:

Test and deploy locally for faster debugging
Traffic to one deployment can also be mirrored (copied) to another deployment.
Application Insights integration
Security
Authentication: Key and Azure ML Tokens
Automatic Autoscaling
Visual Studio Code debugging
blue-green deployment: An approach where a new version of a web service is introduced to production by deploying it to a small subset of users/requests before deploying it fully.

In [4]:
# Creating a unique name for the endpoint
online_endpoint_name = "taxi-online-" + str(uuid.uuid4())[:8]
print(online_endpoint_name)

credit-endpoint-a60eb442


In [6]:
# create an online endpoint
from azure.ai.ml.entities import ManagedOnlineEndpoint

online_endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name, 
    description="Taxi online endpoint",
    auth_mode="aml_token",
)
ml_client.online_endpoints.begin_create_or_update(
    online_endpoint,   
)


ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://credit-endpoint-a60eb442.westeurope.inference.ml.azure.com/score', 'swagger_uri': 'https://credit-endpoint-a60eb442.westeurope.inference.ml.azure.com/swagger.json', 'name': 'credit-endpoint-a60eb442', 'description': 'Taxi online endpoint', 'tags': {}, 'properties': {'azureml.onlineendpointid': '/subscriptions/66914bb5-9cb2-4f6d-a84d-8ff900446b22/resourcegroups/learning/providers/microsoft.machinelearningservices/workspaces/test_learn/onlineendpoints/credit-endpoint-a60eb442', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/66914bb5-9cb2-4f6d-a84d-8ff900446b22/providers/Microsoft.MachineLearningServices/locations/westeurope/mfeOperationsStatus/oe:75310cf9-c772-4f21-9638-4129bab98f70:7beb4ba4-9ede-4046-a337-dca70797bc5f?api-version=2022-02-01-preview'}, 'id': '/subscriptions/66914bb5-9cb2-4f6d-a84d-8ff900446b22/resourceGroups/Learning/providers/Micro

### STEP 2 : CREATE DEPLOYMENT TO (REAL-TIME) ENDPOINT



To create a deployment to online endpoint, you need to specify the following elements:

Model files (or specify a registered model in your workspace)
Scoring script - code needed to do scoring/inferencing
Environment - a Docker image with Conda dependencies, or a dockerfile
Compute instance & scale settings
Note that if you're deploying MLFlow models, there's no need to provide a scoring script and execution environment, as both are autogenerated.

We can create an online deployment with cli v2 or sdk v2 using the following syntax:

In [7]:
# Let's pick the latest version of the model

# first option
latest_model_version = max(
    [int(m.version) for m in ml_client.models.list(name="taxi-model")]
)
print(latest_model_version) 

# OR second option 
model = "taxi-model@latest"

2


In [8]:
# create online deployment
from azure.ai.ml.entities import ManagedOnlineDeployment, Model, Environment

blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=online_endpoint_name,
    model=model,
    instance_type="Standard_DS2_v2",
    instance_count=1,
)

ml_client.online_deployments.begin_create_or_update(
    deployment=blue_deployment
)


Check: endpoint credit-endpoint-a60eb442 exists
Creating/updating online deployment blue Done (9m 50s)


...........................................................................................................

### STEP 3 : CONSUME THE MODEL

In [10]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
data =  {"input_data": 
      [
    [2.86,40.66551971,-73.98258972,1,40.69801331,-73.97357178,0,2,1,1,19,21,3,56,1,1,19,21,21,57],
    [3.98,40.68072128,-73.931633,1,40.6909523,-73.99185181,0,2,0,1,4,21,44,11,0,1,4,21,59,35]]
}

body = str.encode(json.dumps(data))

url = 'https://credit-endpoint-a60eb442.westeurope.inference.ml.azure.com/score'
# Replace this with the primary/secondary key or AMLToken for the endpoint
api_key = ''
if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")

# The azureml-model-deployment header will force the request to go to a specific deployment.
# Remove this header to have the request observe the endpoint traffic rules
headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key), 'azureml-model-deployment': 'blue' }

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))


b'[12.015454016944316, 14.8676752120793]'


In [None]:
#second option to consume by sdk

ml_client.online_endpoints.invoke(
    endpoint_name="taxi-online-endpoint-2",
    request_file="../../data/taxi-request.json",
)
