## Deploy a model to a Local endpoint, using Azure Machine Learning Python SDK v2.

In [None]:
! pip install azure-ai-ml

In [1]:
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
)
from azure.identity import DefaultAzureCredential

In [2]:
# enter details of your AML workspace
subscription_id = "<SUBSCRIPTION_ID>"
resource_group = "<RESOURCE_GROUP>"
workspace_name = "<AML_WORKSPACE_NAME>"

# authenticate
credential = DefaultAzureCredential()

# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    workspace_name=workspace_name,
)

In [29]:
# Configure an environment

env = Environment(
    conda_file="conda_dep_opti.yml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
    )

# configure an inference configuration with a code folder and scoring script
code_config = CodeConfiguration(
        code="padchest_score_code",
        scoring_script="score_opti.py"
    ) 

In [33]:
# configure a model
model_path="../outputs/az-register-models"

model = Model(
    path=model_path,
    type="custom_model",
    name="padchest-pt-onnx-ov",
    version="1",
    description="A folder az-register-models with PT, ONNX and OV models of padchest",
)


In [9]:
endpoint_name = "padchest-pt-ipex-ov-local-sdk-v2"
# Define and setup an endpoint
endpoint = ManagedOnlineEndpoint(
    name = endpoint_name, 
    description="this is local: padchest-pt-ipex-ov-local-sdk-v2",
    auth_mode="key"
)

# create an LOCAL endpoint
poller = ml_client.online_endpoints.begin_create_or_update(endpoint, local=True)
#poller.wait()

Updating local endpoint (padchest-pt-ipex-ov-local-sdk-v2) .Done (0m 5s)


### Define Deployment
See VM SKUs that are supported for Azure Machine Learning managed online endpoints [here](https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list?view=azureml-api-2)

- For LOCAL deployments, pass `local=True` parameter

In [34]:

# define a deployment
blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=endpoint_name,
    model=model,
    environment=env,
    code_configuration=code_config,
    instance_type="Standard_F2s_v2",
    instance_count=1,
)

# create the deployment:
ml_client.begin_create_or_update(blue_deployment, local=True)


Updating local deployment (padchest-pt-ipex-ov-local-sdk-v2 / blue) .
Building Docker image from Dockerfile
Step 1/6 : FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest
 ---> a663ea825a6c
Step 2/6 : RUN mkdir -p /var/azureml-app/
 ---> Using cache
 ---> 11cb60119f42
Step 3/6 : WORKDIR /var/azureml-app/
 ---> Using cache
 ---> 880258a5c0d4
Step 4/6 : COPY conda.yml /var/azureml-app/
 ---> Using cache
 ---> a6df009a097b
Step 5/6 : RUN conda env create -n inf-conda-env --file conda.yml
 ---> Using cache
 ---> b215e53bc271
Step 6/6 : CMD ["conda", "run", "--no-capture-output", "-n", "inf-conda-env", "runsvdir", "/var/runit"]
 ---> Using cache
 ---> 957b35d935b2
Successfully built 957b35d935b2
Successfully tagged padchest-pt-ipex-ov-local-sdk-v2:blue

Starting up endpoint...Done (0m 20s)


ManagedOnlineDeployment({'private_network_connection': None, 'provisioning_state': 'Succeeded', 'endpoint_name': 'padchest-pt-ipex-ov-local-sdk-v2', 'type': 'Managed', 'name': 'blue', 'description': None, 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': PosixPath('/mnt/batch/tasks/shared/LS_root/mounts/clusters/ravi-nuance/code/Users/ravi.panchumarthy/01.PadChest/Medical-Imaging-AI-RSNA-2021-Demo/4.2.Deploy the model(optimized)_sdk_v2'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x7f1d640b7b50>, 'model': Model({'job_name': None, 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'padchest-pt-onnx-ov', 'description': 'A folder az-register-models with PT, ONNX and OV models of padchest', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': PosixPath('/mnt/batch/t

In [35]:
# blue deployment takes 100% traffic
endpoint.traffic = {"blue": 100}
ml_client.begin_create_or_update(endpoint, local=True)

Updating local endpoint (padchest-pt-ipex-ov-local-sdk-v2) .Done (0m 5s)


ManagedOnlineEndpoint({'public_network_access': None, 'provisioning_state': 'Succeeded', 'scoring_uri': 'http://localhost:32815/score', 'openapi_uri': None, 'name': 'padchest-pt-ipex-ov-local-sdk-v2', 'description': 'this is local: padchest-pt-ipex-ov-local-sdk-v2', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': PosixPath('/mnt/batch/tasks/shared/LS_root/mounts/clusters/ravi-nuance/code/Users/ravi.panchumarthy/01.PadChest/Medical-Imaging-AI-RSNA-2021-Demo/4.2.Deploy the model(optimized)_sdk_v2'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x7f1d4d689fa0>, 'auth_mode': 'key', 'location': 'local', 'identity': None, 'traffic': {'blue': 100}, 'mirror_traffic': {}, 'kind': None})

In [36]:
ml_client.online_endpoints.get(name=endpoint_name, local=True)

ManagedOnlineEndpoint({'public_network_access': None, 'provisioning_state': 'Succeeded', 'scoring_uri': 'http://localhost:32815/score', 'openapi_uri': None, 'name': 'padchest-pt-ipex-ov-local-sdk-v2', 'description': 'this is local: padchest-pt-ipex-ov-local-sdk-v2', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': PosixPath('/mnt/batch/tasks/shared/LS_root/mounts/clusters/ravi-nuance/code/Users/ravi.panchumarthy/01.PadChest/Medical-Imaging-AI-RSNA-2021-Demo/4.2.Deploy the model(optimized)_sdk_v2'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x7f1d4d689bb0>, 'auth_mode': 'key', 'location': 'local', 'identity': None, 'traffic': {'blue': 100}, 'mirror_traffic': {}, 'kind': None})

In [None]:
ml_client.online_deployments.get_logs(
    name="blue", endpoint_name=endpoint_name, local=True, lines=500
)

In [39]:
# Get the details for online endpoint
endpoint_deployed = ml_client.online_endpoints.get(name=endpoint_name, local=True)

# existing traffic details
print(endpoint_deployed.traffic)

# Get the scoring URI
print(endpoint_deployed.scoring_uri)

{'blue': 100}
http://localhost:32815/score


In [None]:
import requests

test_file = "./sample_dicom.dcm"
files = {'image': open(test_file, 'rb').read()}

# resp = requests.post(scoring_uri, input_data, headers=headers)
scoring_uri = endpoint_deployed.scoring_uri

# Send the DICOM as a raw HTTP request and obtain results from endpoint.
response = requests.post(scoring_uri, files=files)
print("output:", response.content)

In [41]:
import json
output_dict = json.loads(response.content)

pt_metrics = output_dict['pt_summary']
ipex_metrics = output_dict['ipex_summary']
ov_metrics = output_dict['ov_summary']

print(f"PyTorch Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['PyTorch']}")
print(f"\tTop Labels:\t{pt_metrics['pt_result']['top_labels']}")
print(f"\tTop Probabilities:\t{pt_metrics['pt_result']['top_probabilities']}")
print(f"\tAvg Latency:\t{pt_metrics['avg_latency']:.4f} sec")
print(f"\tFPS:\t{pt_metrics['fps']:.2f}")

print(f"\nIPEX Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['IPEX']}")
print(f"\tTop Labels:\t{ipex_metrics['ipex_result']['top_labels']}")
print(f"\tTop Probabilities:\t{ipex_metrics['ipex_result']['top_probabilities']}")
print(f"\tAvg Latency:\t{ipex_metrics['avg_latency']:.4f} sec")
print(f"\tFPS:\t{ipex_metrics['fps']:.2f}")

print(f"\nOpenVINO Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['OpenVINO']}")
print(f"\tTop Labels:\t{ov_metrics['ov_result']['top_labels']}")
print(f"\tTop Probabilities:\t{ov_metrics['ov_result']['top_probabilities']}")
print(f"\tAvg Latency:\t{ov_metrics['avg_latency']:.4f} sec")
print(f"\tFPS:\t{ov_metrics['fps']:.2f}")

# Calculate the FPS speedup with IPEX compared to PyTorch
ipex_fps_speedup = ipex_metrics['fps'] / pt_metrics['fps']
print(f"\nSpeedup with IPEX: {ipex_fps_speedup:.2f}x")

# Calculate the FPS speedup with OpenVINO compared to PyTorch
ov_fps_speedup = ov_metrics['fps'] / pt_metrics['fps']
print(f"\nSpeedup with OpenVINO: {ov_fps_speedup:.2f}x")

PyTorch Metrics:
	Framework Version:	1.13.1+cpu
	Top Labels:	['Pneumonia', 'Infiltration', 'Effusion']
	Top Probabilities:	[49.63, 32.22, 3.29]
	Avg Latency:	0.0469 sec
	FPS:	21.33

IPEX Metrics:
	Framework Version:	1.13.100
	Top Labels:	['Pneumonia', 'Infiltration', 'Effusion']
	Top Probabilities:	[49.63, 32.22, 3.29]
	Avg Latency:	0.0333 sec
	FPS:	30.03

OpenVINO Metrics:
	Framework Version:	2022.3.0-9052-9752fafe8eb-releases/2022/3
	Top Labels:	['Pneumonia', 'Infiltration', 'Effusion']
	Top Probabilities:	[49.63, 32.22, 3.29]
	Avg Latency:	0.0219 sec
	FPS:	45.73

Speedup with IPEX: 1.41x

Speedup with OpenVINO: 2.14x


In [44]:
#Print System info
lscpu_out=output_dict['system_info']['lscpu_out'].encode().decode('unicode_escape')
print(f"\nSystem Info:\n{lscpu_out}")

mem_out_gb=output_dict['system_info']['mem_out_gb'].encode().decode('unicode_escape')
print(f"\nSystem Memory Info (GB):\n{mem_out_gb}")

os_out=output_dict['system_info']['os'].encode().decode('unicode_escape')
print(f"\nSystem OS:\n{os_out}")


System Info:
Architecture:                    x86_64
CPU op-mode(s):                  32-bit, 64-bit
Byte Order:                      Little Endian
Address sizes:                   46 bits physical, 48 bits virtual
CPU(s):                          8
On-line CPU(s) list:             0-7
Thread(s) per core:              2
Core(s) per socket:              4
Socket(s):                       1
NUMA node(s):                    1
Vendor ID:                       GenuineIntel
CPU family:                      6
Model:                           85
Model name:                      Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Stepping:                        7
CPU MHz:                         2593.906
BogoMIPS:                        5187.81
Virtualization:                  VT-x
Hypervisor vendor:               Microsoft
Virtualization type:             full
L1d cache:                       128 KiB
L1i cache:                       128 KiB
L2 cache:                        4 MiB
L3 cache:        

### Delete endpoint

In [None]:
ml_client.online_endpoints.begin_delete(name=endpoint_name)