In [10]:
# import required libraries
from azure.ai.ml import MLClient, command, Input, Output, load_component
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import Data, Environment
from azure.ai.ml.constants import AssetTypes, InputOutputModes
from azure.ai.ml.dsl import pipeline

In [6]:
# Enter details of your AML workspace
subscription_id = "8480def5-8f7a-4285-99f7-295b61d7b22a"
resource_group = "mldemorg"
workspace = "mldemo"

In [3]:
# get a handle to the workspace
ml_client = MLClient(
    DefaultAzureCredential(), subscription_id, resource_group, workspace
)

# Online Endpoint

Online endpoints are endpoints that are used for online (real-time) inferencing. They receive data from clients and can send responses back in real time.

An **endpoint** is an HTTPS endpoint that clients can call to receive the inferencing (scoring) output of a trained model. It provides:
* Authentication using "key & token" based auth
* SSL termination
* A stable scoring URI (endpoint-name.region.inference.ml.azure.com)

A **deployment** is a set of resources required for hosting the model that does the actual inferencing.
A single endpoint can contain multiple deployments.

Features of the managed online endpoint:

* **Test and deploy locally** for faster debugging
* Traffic to one deployment can also be **mirrored** (copied) to another deployment.
* **Application Insights integration**
* Security
* Authentication: Key and Azure ML Tokens
* Automatic Autoscaling
* Visual Studio Code debugging

**blue-green deployment**: An approach where a new version of a web service is introduced to production by deploying it to a small subset of users/requests before deploying it fully.

<center>
<img src="../imgs/endpoint_concept.png" width = "500px" alt="Online Endpoint Concept cli vs sdk">
</center>

## 1. Create Online Endpoint

We can create an **online endpoint** with cli v2 or sdk v2 using the following syntax:

<center>
<img src="../imgs/create_online_endpoint.png" width = "700px" alt="Create Online Endpoint cli vs sdk">
</center>

In [None]:
from azure.ai.ml.entities import ManagedOnlineEndpoint

# create an online endpoint
online_endpoint = ManagedOnlineEndpoint(
    name="taxi-online-endpoint-3", 
    description="Taxi online endpoint",
    auth_mode="aml_token",
)
ml_client.online_endpoints.begin_create_or_update(
    online_endpoint,   
)


## 2. Create Online Deployment

To create a deployment to online endpoint, you need to specify the following elements:

* Model files (or specify a registered model in your workspace)
* Scoring script - code needed to do scoring/inferencing
* Environment - a Docker image with Conda dependencies, or a dockerfile
* Compute instance & scale settings

Note that if you're deploying **MLFlow models**, there's no need to provide **a scoring script** and execution **environment**, as both are autogenerated.

We can create an **online deployment** with cli v2 or sdk v2 using the following syntax:

<center>
<img src="../imgs/create_online_deployment.png" width = "700px" alt="Create Online Deployment cli vs sdk">
</center>

In [None]:
# create online deployment
from azure.ai.ml.entities import ManagedOnlineDeployment, Model, Environment

model = "taxi-model@latest"

blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name="taxi-online-endpoint-3",
    model=model,
    instance_type="Standard_DS2_v2",
    instance_count=1,
)

ml_client.online_deployments.begin_create_or_update(
    deployment=blue_deployment
)


## 3. Allocate Traffic

In [None]:
# allocate traffic
# blue deployment takes 100 traffic
online_endpoint.traffic = {"blue": 100}
ml_client.begin_create_or_update(online_endpoint)

## 4. Invoke and Test Endpoint

We can invoke the **online deployment** with cli v2 or sdk v2 using the following syntax:

<center>
<img src="../imgs/invoke_online_endpoint.png" width = "700px" alt="Invoke online endpoint cli vs sdk">
</center>

In [None]:
# invoke and test endpoint
ml_client.online_endpoints.invoke(
    endpoint_name="taxi-online-endpoint-2",
    request_file="../../data/taxi-request.json",
)


# Batch Endpoint

**Batch endpoints** are endpoints that are used to do batch inferencing on large volumes of data over a period of time. 

**Batch endpoints** receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis.

<center>
<img src="../imgs/endpoint_batch_concept.png" width = "700px" alt="Concept batch endpoint">
</center>

## 1. Create Batch Compute Cluster

In [None]:
# create compute cluster to be used by batch cluster
from azure.ai.ml.entities import AmlCompute

my_cluster = AmlCompute(
    name="batch-cluster",
    type="amlcompute", 
    size="STANDARD_DS3_V2", 
    min_instances=0, 
    max_instances=3,
    location="westeurope", 	
)
ml_client.compute.begin_create_or_update(my_cluster)

## 2. Create Batch Endpoint

We can create the **batch endpoint** with cli v2 or sdk v2 using the following syntax:


<center>
<img src="../imgs/create_batch_endpoint.png" width = "700px" alt="Create batch endpoint cli vs sdk">
</center>

In [None]:
# create batch endpoint
from azure.ai.ml.entities import BatchEndpoint

batch_endpoint = BatchEndpoint(
    name="taxi-batch-endpoint-2",
    description="Taxi batch endpoint",
    tags={"model": "taxi-model@latest"},
)

ml_client.begin_create_or_update(batch_endpoint)


## 3. Create Batch Deployment

We can create the **batch deployment** with cli v2 or sdk v2 using the following syntax:

<center>
<img src="../imgs/create_batch_deployment.png" width = "700px" alt="Create batch deployment cli vs sdk">
</center>

Note that if you're deploying **MLFlow models**, there's no need to provide **a scoring script** and execution **environment**, as both are autogenerated.

In [None]:
# create batch deployment
from azure.ai.ml.entities import BatchDeployment, Model, Environment
from azure.ai.ml.constants import BatchDeploymentOutputAction

model = "taxi-model@latest"

batch_deployment = BatchDeployment(
    name="taxi-batch-dp",
    description="this is a sample batch deployment",
    endpoint_name="taxi-batch-endpoint-2",
    model=model,
    compute="batch-cluster",
    instance_count=2,
    max_concurrency_per_instance=2,
    mini_batch_size=10,
    output_action=BatchDeploymentOutputAction.APPEND_ROW,
    output_file_name="predictions.csv",
)

ml_client.begin_create_or_update(batch_deployment)


Set deployment as the default deployment in the endpoint:

In [None]:
batch_endpoint = ml_client.batch_endpoints.get("taxi-batch-endpoint-2")
batch_endpoint.defaults.deployment_name = batch_deployment.name
ml_client.batch_endpoints.begin_create_or_update(batch_endpoint)

## 4. Invoke and Test Endpoint

We can invoke the **batch deployment** with cli v2 or sdk v2 using the following syntax:

<center>
<img src="../imgs/invoke_batch_deployment.png" width = "700px" alt="Invoke batch deployment cli vs sdk">
</center>

In [None]:
# invoke and test endpoint
from azure.ai.ml import Input
from azure.ai.ml.constants import AssetTypes, InputOutputModes

input = Input(path="../../data/taxi-batch.csv", 
              type=AssetTypes.URI_FILE, 
              mode=InputOutputModes.DOWNLOAD)


# invoke the endpoint for batch scoring job
ml_client.batch_endpoints.invoke(
    endpoint_name="taxi-batch-endpoint",
    input=input,
    deployment_name="taxi-batch-dp"
)
