## Deploy a model to an online endpoint, using Azure Machine Learning Python SDK v2.

For reference, [click here](https://learn.microsoft.com/en-us/azure/machine-learning/tutorial-deploy-model?view=azureml-api-2)

#### Prerequisites 

In [4]:
# ! pip install azure-ai-ml

In [1]:
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
    OnlineRequestSettings
)
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential

In [2]:
# enter details of your AML workspace
subscription_id = "<SUBSCRIPTION_ID>"
resource_group = "<RESOURCE_GROUP>"
workspace = "<AML_WORKSPACE_NAME>"

# get a handle to the workspace
ml_client = MLClient(
    DefaultAzureCredential(), subscription_id, resource_group, workspace
)

In [None]:
online_endpoint_name = "padchest-optimized-ipex-ov-sdk-v2"
# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name = online_endpoint_name, 
    description="online deployment of: padchest-ipex-sdk-v2-2",
    auth_mode="key"
)

poller = ml_client.online_endpoints.begin_create_or_update(endpoint)
poller.wait()

In [None]:
# Configure a model


folder_model_path="./outputs/az-register-models"

file_model = Model(
    path=folder_model_path,
    type=AssetTypes.CUSTOM_MODEL,
    name="padchest-opti-sdk-v2-endpoint",
    version="1",
    description="SDKv2-az-register-models with PT, ONNX and OV models of padchest"
)
ml_client.models.create_or_update(file_model)

In [41]:
# Configure an environment

env = Environment(
    conda_file="conda_dep_opti.yml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
    )


# configure an inference configuration with a scoring script
code_config = CodeConfiguration(
        code="padchest_score_code",
        scoring_script="score_opti.py"
    )   

### Define Deployment
See VM SKUs that are supported for Azure Machine Learning managed online endpoints [here](https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list?view=azureml-api-2)

### Define Deployment
See VM SKUs that are supported for Azure Machine Learning managed online endpoints [here](https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list?view=azureml-api-2)

In [None]:

req_settings = OnlineRequestSettings(request_timeout_ms=90000)

# Define a deployment
blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=online_endpoint_name,
    model=file_model,
    environment=env,
    code_configuration=code_config,
    instance_type="Standard_FX4mds", #Standard_FX12mds, #Standard_FX24mds 
    instance_count=1,
    request_settings=req_settings
)

# create the deployment:
poller = ml_client.begin_create_or_update(blue_deployment)


In [None]:
# blue deployment takes 100% traffic
endpoint.traffic = {"blue": 100}
ml_client.begin_create_or_update(endpoint)

In [None]:
deployment_logs = ml_client.online_deployments.get_logs(
    name="blue", endpoint_name=online_endpoint_name, lines=50
)
deployment_logs

In [None]:
# Get the details for online endpoint
deployed_endpoint = ml_client.online_endpoints.get(name=online_endpoint_name)

# existing traffic details
print(deployed_endpoint.traffic)

# Get the scoring URI
print(deployed_endpoint.scoring_uri)

auth_key = ml_client.online_endpoints.get_keys(online_endpoint_name).primary_key
print(f"Authkye:{auth_key}")

In [None]:
# visualize image

import pydicom
import matplotlib.pylab as plt

# Visualize converted DICOM file from the corresponding PNG file
test_file = "./sample_dicom.dcm"
dcm = pydicom.read_file(test_file)
print(dcm)
plt.imshow(dcm.pixel_array, cmap=plt.cm.bone)

In [None]:
import requests

test_file = "./sample_dicom.dcm"
files = {'image': open(test_file, 'rb').read()}

# resp = requests.post(scoring_uri, input_data, headers=headers)
scoring_uri = deployed_endpoint.scoring_uri

# Send the DICOM as a raw HTTP request and obtain results from endpoint.
response = requests.post(scoring_uri, headers={"Authorization": f"Bearer {auth_key}"},files=files, timeout=60)
print("output:", response.content)

In [56]:
import json
output_dict = json.loads(response.content)

pt_metrics = output_dict['pt_summary']
pt_graph_metrics = output_dict['pt_graph_summary']
ipex_metrics = output_dict['ipex_eager_summary']
ipex_graph_metrics = output_dict['ipex_graph_summary']
ov_metrics = output_dict['ov_summary']

print(f"Stock PyTorch Metrics:")
print(f"\tFramework Version:\t{pt_metrics['fwk_version']}")
print(f"\tTop Labels:\t{pt_metrics['pt_result']['top_labels']}")
print(f"\tTop Probabilities:\t{pt_metrics['pt_result']['top_probabilities']}")
print(f"\tAvg Latency:\t{pt_metrics['avg_latency']:.4f} sec")
print(f"\tFPS:\t{pt_metrics['fps']:.2f}")

print(f"PyTorch Graph Mode Metrics:")
print(f"\tFramework Version:\t{pt_graph_metrics['fwk_version']}")
print(f"\tTop Labels:\t{pt_graph_metrics['pt_graph_result']['top_labels']}")
print(f"\tTop Probabilities:\t{pt_graph_metrics['pt_graph_result']['top_probabilities']}")
print(f"\tAvg Latency:\t{pt_graph_metrics['avg_latency']:.4f} sec")
print(f"\tFPS:\t{pt_graph_metrics['fps']:.2f}")

print(f"\nIPEX Eager Metrics:")
print(f"\tFramework Version:\t{ipex_metrics['fwk_version']}")
print(f"\tTop Labels:\t{ipex_metrics['ipex_result']['top_labels']}")
print(f"\tTop Probabilities:\t{ipex_metrics['ipex_result']['top_probabilities']}")
print(f"\tAvg Latency:\t{ipex_metrics['avg_latency']:.4f} sec")
print(f"\tFPS:\t{ipex_metrics['fps']:.2f}")

print(f"\nIPEX Graph Mode Metrics:")
print(f"\tFramework Version:\t{ipex_graph_metrics['fwk_version']}")
print(f"\tTop Labels:\t{ipex_graph_metrics['ipex_graph_result']['top_labels']}")
print(f"\tTop Probabilities:\t{ipex_graph_metrics['ipex_graph_result']['top_probabilities']}")
print(f"\tAvg Latency:\t{ipex_graph_metrics['avg_latency']:.4f} sec")
print(f"\tFPS:\t{ipex_graph_metrics['fps']:.2f}")

print(f"\nOpenVINO Metrics:")
print(f"\tFramework Version:\t{ov_metrics['fwk_version']}")
print(f"\tTop Labels:\t{ov_metrics['ov_result']['top_labels']}")
print(f"\tTop Probabilities:\t{ov_metrics['ov_result']['top_probabilities']}")
print(f"\tAvg Latency:\t{ov_metrics['avg_latency']:.4f} sec")
print(f"\tFPS:\t{ov_metrics['fps']:.2f}")


# Calculate the FPS speedup with IPEX compared to PyTorch
ipex_fps_speedup = ipex_graph_metrics['fps'] / pt_metrics['fps']
print(f"\nSpeedup with IPEX: {ipex_fps_speedup:.2f}x")

# Calculate the FPS speedup with OpenVINO compared to PyTorch
ov_fps_speedup = ov_metrics['fps'] / pt_metrics['fps']
print(f"\nSpeedup with OV: {ov_fps_speedup:.2f}x")

# Calculate the FPS speedup with Stock Graph Mode compared to PyTorch
ov_fps_speedup = pt_graph_metrics['fps'] / pt_metrics['fps']
print(f"\nSpeedup with stock graph mode: {ov_fps_speedup:.2f}x")

Stock PyTorch Metrics:
	Framework Version:	PyTorch: 1.13.1+cpu
	Top Labels:	['Pneumonia', 'Infiltration', 'Effusion']
	Top Probabilities:	[49.63, 32.22, 3.29]
	Avg Latency:	0.0506 sec
	FPS:	19.76
PyTorch Graph Mode Metrics:
	Framework Version:	PyTorch: 1.13.1+cpu
	Top Labels:	['Pneumonia', 'Infiltration', 'Effusion']
	Top Probabilities:	[49.63, 32.22, 3.29]
	Avg Latency:	0.0429 sec
	FPS:	23.32

IPEX Eager Metrics:
	Framework Version:	IPEX: 1.13.100
	Top Labels:	['Pneumonia', 'Infiltration', 'Effusion']
	Top Probabilities:	[49.63, 32.22, 3.29]
	Avg Latency:	0.0585 sec
	FPS:	17.09

IPEX Graph Mode Metrics:
	Framework Version:	IPEX: 1.13.100
	Top Labels:	['Pneumonia', 'Infiltration', 'Effusion']
	Top Probabilities:	[49.63, 32.22, 3.29]
	Avg Latency:	0.0273 sec
	FPS:	36.69

OpenVINO Metrics:
	Framework Version:	OpenVINO: 2023.0.0-10926-b4452d56304-releases/2023/0
	Top Labels:	['Pneumonia', 'Infiltration', 'Effusion']
	Top Probabilities:	[49.63, 32.22, 3.29]
	Avg Latency:	0.0208 sec
	FPS:	4

In [57]:
#Print System info
lscpu_out=output_dict['system_info']['lscpu_out'].encode().decode('unicode_escape')
print(f"\nSystem Info:\n{lscpu_out}")

mem_out_gb=output_dict['system_info']['mem_out_gb'].encode().decode('unicode_escape')
print(f"\nSystem Memory Info (GB):\n{mem_out_gb}")

os_out=output_dict['system_info']['os'].encode().decode('unicode_escape')
print(f"\nSystem OS:\n{os_out}")


System Info:
Architecture:                    x86_64
CPU op-mode(s):                  32-bit, 64-bit
Byte Order:                      Little Endian
Address sizes:                   46 bits physical, 48 bits virtual
CPU(s):                          4
On-line CPU(s) list:             0-3
Thread(s) per core:              2
Core(s) per socket:              2
Socket(s):                       1
NUMA node(s):                    1
Vendor ID:                       GenuineIntel
CPU family:                      6
Model:                           85
Model name:                      Intel(R) Xeon(R) Gold 6246R CPU @ 3.40GHz
Stepping:                        7
CPU MHz:                         3392.031
BogoMIPS:                        6784.06
Virtualization:                  VT-x
Hypervisor vendor:               Microsoft
Virtualization type:             full
L1d cache:                       64 KiB
L1i cache:                       64 KiB
L2 cache:                        2 MiB
L3 cache:               

#### Delete endpoint

In [None]:
#ml_client.online_endpoints.begin_delete(name=online_endpoint_name)

..