## Deploy a model to an online endpoint, using Azure Machine Learning Python SDK v2.

For reference, [click here](https://learn.microsoft.com/en-us/azure/machine-learning/tutorial-deploy-model?view=azureml-api-2)

#### Prerequisites 

In [None]:
! pip install azure-ai-ml

In [2]:
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
    OnlineRequestSettings
)
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential

In [3]:
# enter details of your AML workspace
subscription_id = "<SUBSCRIPTION_ID>"
resource_group = "<RESOURCE_GROUP>"
workspace_name = "<AML_WORKSPACE_NAME>"

# authenticate
credential = DefaultAzureCredential()

# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    workspace_name=workspace_name,
)

In [4]:
# Configure an environment

env = Environment(
    conda_file="conda_dep_opti.yml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
    )

# configure an inference configuration with a code folder and scoring script
code_config = CodeConfiguration(
        code="padchest_score_code",
        scoring_script="score_opti.py"
    )   

In [None]:
# Configure/Register a model in AzureML

model_path="../outputs/az-register-models"

model = Model(
    path=model_path,
    type=AssetTypes.CUSTOM_MODEL,
    name="padchest-pt-onnx-ov-v2sdk",
    version="1",
    description="SDKv2-az-register-models with PT, ONNX and OV models of padchest"
)
ml_client.models.create_or_update(model)

In [6]:
endpoint_name = "padchest-opti-endpt-sdk-v2"
# Define and setup an endpoint
endpoint = ManagedOnlineEndpoint(
    name = endpoint_name, 
    description="padchest-opti-sdk-v2-endpoint",
    auth_mode="key"
)
# create an ONLINE endpoint
poller = ml_client.online_endpoints.begin_create_or_update(endpoint)
poller.wait()

### Define Deployment
See VM SKUs that are supported for Azure Machine Learning managed online endpoints [here](https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list?view=azureml-api-2)

In [8]:

req_settings = OnlineRequestSettings(request_timeout_ms=36000)

# Define a deployment
blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=endpoint_name,
    model=model,
    environment=env,
    code_configuration=code_config,
    instance_type="Standard_F2s_v2", #Standard_FX4mds, Standard_F2s_v2
    instance_count=1,
    request_settings=req_settings
)

# create the deployment:
poller = ml_client.begin_create_or_update(blue_deployment)


Check: endpoint padchest-opti-endpt-sdk-v2 exists


............................................................................................

In [9]:
# blue deployment takes 100% traffic
endpoint.traffic = {"blue": 100}
ml_client.begin_create_or_update(endpoint)

<azure.core.polling._poller.LROPoller at 0x7fad5c75eb80>

In [10]:
deployment_logs = ml_client.online_deployments.get_logs(
    name="blue", endpoint_name=endpoint_name, lines=50
)

In [11]:
# Get the details for online endpoint
deployed_endpoint = ml_client.online_endpoints.get(name=endpoint_name)

# existing traffic details
print(deployed_endpoint.traffic)

# Get the scoring URI
print(deployed_endpoint.scoring_uri)

auth_key = ml_client.online_endpoints.get_keys(endpoint_name).primary_key
print(f"Authkye:{auth_key}")

{'blue': 100}
https://padchest-opti-endpt-sdk-v2.eastus.inference.ml.azure.com/score
Authkye:m3C7n2b1kYVwDiDfiJj9LA42d1lRBTLQ


In [None]:
import requests

test_file = "./sample_dicom.dcm"
files = {'image': open(test_file, 'rb').read()}

# resp = requests.post(scoring_uri, input_data, headers=headers)
scoring_uri = deployed_endpoint.scoring_uri

# Send the DICOM as a raw HTTP request and obtain results from endpoint.
response = requests.post(scoring_uri, headers={"Authorization": f"Bearer {auth_key}"},files=files, timeout=60)
print("output:", response.content)

In [13]:
import json
output_dict = json.loads(response.content)

pt_metrics = output_dict['pt_summary']
ipex_metrics = output_dict['ipex_summary']
ov_metrics = output_dict['ov_summary']

print(f"PyTorch Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['PyTorch']}")
print(f"\tTop Labels:\t{pt_metrics['pt_result']['top_labels']}")
print(f"\tTop Probabilities:\t{pt_metrics['pt_result']['top_probabilities']}")
print(f"\tAvg Latency:\t{pt_metrics['avg_latency']:.4f} sec")
print(f"\tFPS:\t{pt_metrics['fps']:.2f}")

print(f"\nIPEX Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['IPEX']}")
print(f"\tTop Labels:\t{ipex_metrics['ipex_result']['top_labels']}")
print(f"\tTop Probabilities:\t{ipex_metrics['ipex_result']['top_probabilities']}")
print(f"\tAvg Latency:\t{ipex_metrics['avg_latency']:.4f} sec")
print(f"\tFPS:\t{ipex_metrics['fps']:.2f}")

print(f"\nOpenVINO Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['OpenVINO']}")
print(f"\tTop Labels:\t{ov_metrics['ov_result']['top_labels']}")
print(f"\tTop Probabilities:\t{ov_metrics['ov_result']['top_probabilities']}")
print(f"\tAvg Latency:\t{ov_metrics['avg_latency']:.4f} sec")
print(f"\tFPS:\t{ov_metrics['fps']:.2f}")

# Calculate the FPS speedup with IPEX compared to PyTorch
ipex_fps_speedup = ipex_metrics['fps'] / pt_metrics['fps']
print(f"\nSpeedup with IPEX: {ipex_fps_speedup:.2f}x")

# Calculate the FPS speedup with OpenVINO compared to PyTorch
ov_fps_speedup = ov_metrics['fps'] / pt_metrics['fps']
print(f"\nSpeedup with OpenVINO: {ov_fps_speedup:.2f}x")

PyTorch Metrics:
	Framework Version:	1.13.1+cpu
	Top Labels:	['Pneumonia', 'Infiltration', 'Effusion']
	Top Probabilities:	[49.63, 32.22, 3.29]
	Avg Latency:	0.0875 sec
	FPS:	11.43

IPEX Metrics:
	Framework Version:	1.13.100
	Top Labels:	['Pneumonia', 'Infiltration', 'Effusion']
	Top Probabilities:	[49.63, 32.22, 3.29]
	Avg Latency:	0.0687 sec
	FPS:	14.55

OpenVINO Metrics:
	Framework Version:	2022.3.0-9052-9752fafe8eb-releases/2022/3
	Top Labels:	['Pneumonia', 'Infiltration', 'Effusion']
	Top Probabilities:	[49.63, 32.22, 3.29]
	Avg Latency:	0.0444 sec
	FPS:	22.53

Speedup with IPEX: 1.27x

Speedup with OpenVINO: 1.97x


In [14]:
#Print System info
lscpu_out=output_dict['system_info']['lscpu_out'].encode().decode('unicode_escape')
print(f"\nSystem Info:\n{lscpu_out}")

mem_out_gb=output_dict['system_info']['mem_out_gb'].encode().decode('unicode_escape')
print(f"\nSystem Memory Info (GB):\n{mem_out_gb}")

os_out=output_dict['system_info']['os'].encode().decode('unicode_escape')
print(f"\nSystem OS:\n{os_out}")


System Info:
Architecture:                    x86_64
CPU op-mode(s):                  32-bit, 64-bit
Byte Order:                      Little Endian
Address sizes:                   46 bits physical, 48 bits virtual
CPU(s):                          2
On-line CPU(s) list:             0,1
Thread(s) per core:              2
Core(s) per socket:              1
Socket(s):                       1
NUMA node(s):                    1
Vendor ID:                       GenuineIntel
CPU family:                      6
Model:                           85
Model name:                      Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Stepping:                        7
CPU MHz:                         2593.905
BogoMIPS:                        5187.81
Virtualization:                  VT-x
Hypervisor vendor:               Microsoft
Virtualization type:             full
L1d cache:                       32 KiB
L1i cache:                       32 KiB
L2 cache:                        1 MiB
L3 cache:          

#### Delete endpoint

In [None]:
#ml_client.online_endpoints.begin_delete(name=endpoint_name)

..