### Deploy the model as a web service hosted on Azure Container Instances (ACI). 

1. Create the scoring script.
1. Prepare an inference configuration.
1. Deploy the previously trained model to the cloud.
1. Consume data sample and test the web service.

###  1. Create the scoring script.

Create the scoring script, called score.py, used by the web service call to show how to use the model.  
You must include two required functions into the scoring script:
* The `init()` function, which typically loads the model into a global object. 
    * This function is run only once when the Docker container is started. 
* The `run(input_data)` function uses the model to predict a value based on the input data. 
    * Inputs and outputs to the run typically use JSON for serialization and de-serialization, but other formats are supported.

TIP: Documentation on Deploy a model to Azure Container Instances [here](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-deploy-azure-container-instance/). Advanced entry script authoring [here](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-deploy-advanced-entry-script#binary-data/).

In [None]:
%%writefile score_opti.py
from azureml.contrib.services.aml_request import AMLRequest, rawhttp
from azureml.contrib.services.aml_response import AMLResponse
import json, os, io
import numpy as np
import torch
import intel_extension_for_pytorch as ipex
import torchxrayvision as xrv
from torchvision import transforms
from torchxrayvision.datasets import normalize
import pydicom

import time
from openvino.runtime import Core
from openvino.runtime import get_version

def init():
    global bench_time
    bench_time = 10  # benchmark time in sec
    global target_device
    target_device = "CPU"

    # Initial PyTorch model
    global modelx
    # AZUREML_MODEL_DIR is an environment variable created during deployment.
    # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
    # For multiple models, it points to the folder containing all deployed models (./azureml-models)
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'az-register-models', 'pc-densenet-densenet-best.pt')
    # print(model_path)
    modelx = torch.load(model_path)
    modelx.eval()

    # Initial PyTorch IPEX model
    global ipex_modelx
    global traced_model
    ipex_modelx = ipex.optimize(modelx)

    # Initialize OpenVINO Runtime.
    global ov_compiled_model
    ie = Core()
    ov_xml = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'az-register-models', 'pc-densenet-densenet-best.onnx')
    # Load and compile the OV model
    ov_model = ie.read_model(ov_xml)
    ov_compiled_model = ie.compile_model(model=ov_model, device_name=target_device)



# TIP:  To accept raw data, use the AMLRequest class in your entry script and add the @rawhttp decorator to the run() function
#       more details in: https://docs.microsoft.com/en-us/azure/machine-learning/how-to-deploy-advanced-entry-script
# Note that despite the fact that we trained our model on PNGs, we would like to simulate
# a scenario closer to the real world here and accept DICOMs into our score script. Here's how:
@rawhttp
def run(request):

    if request.method == 'GET':
        # For this example, just return the URL for GETs.
        respBody = str.encode(request.full_path)
        return AMLResponse(respBody, 200)

    elif request.method == 'POST':
        # For a real-world solution, you would load the data from reqBody
        # and send it to the model. Then return the response.
        try:

            # For labels definition see file: '3.Build a model/trainingscripts/padchest_config.py'
            pathologies_labels = ['Air Trapping', 'Aortic Atheromatosis', 'Aortic Elongation', 'Atelectasis',
             'Bronchiectasis', 'Cardiomegaly', 'Consolidation', 'Costophrenic Angle Blunting', 'Edema', 'Effusion',
             'Emphysema', 'Fibrosis', 'Flattened Diaphragm', 'Fracture', 'Granuloma', 'Hemidiaphragm Elevation',
             'Hernia', 'Hilar Enlargement', 'Infiltration', 'Mass', 'Nodule', 'Pleural_Thickening',
             'Pneumonia', 'Pneumothorax', 'Scoliosis', 'Tuberculosis']
            def benchmark_pt(test_image):
                latency_arr = []
                end = time.time() + int(bench_time)

                print(f"\n==== Benchmarking PyTorch inference with Fake Data for {bench_time}sec on CPU ====")
                print(f"Input shape: {test_image.shape}")

                while time.time() < end:
                    start_time = time.time()
                    pt_result = modelx(test_image)
                    latency = time.time() - start_time
                    latency_arr.append(latency)

                # Process output
                index = np.argsort( pt_result.data.cpu().numpy() )
                probability = torch.nn.functional.softmax(pt_result[0], dim=0).data.cpu().numpy()
                pt_result = get_top_predictions(index, probability)

                avg_latency = np.array(latency_arr).mean()
                fps = 1 / avg_latency

                print(f"PyTorch Avg Latency: {avg_latency:.4f} sec, FPS: {fps:.2f}")

                #Return the result
                pt_summary = {
                    "fwk_version": f"PyTorch: {torch.__version__}",
                    "pt_result": pt_result,
                    "avg_latency": avg_latency,
                    "fps": fps
                }
                return pt_summary

            def benchmark_ipex(test_image):
                latency_arr = []
                end = time.time() + int(bench_time)

                with torch.no_grad():
                    traced_model = torch.jit.trace(ipex_modelx, test_image)
                    traced_model = torch.jit.freeze(traced_model)

                print(f"\n==== Benchmarking IPEX inference with Fake Data for {bench_time}sec on CPU ====")
                print(f"Input shape: {test_image.shape}")

                while time.time() < end:
                    start_time = time.time()
                    with torch.no_grad():
                        ipex_result = traced_model(test_image)
                    latency = time.time() - start_time
                    latency_arr.append(latency)

                # Process output
                index = np.argsort( ipex_result.data.cpu().numpy() )
                probability = torch.nn.functional.softmax(ipex_result[0], dim=0).data.cpu().numpy()
                ipex_result = get_top_predictions(index, probability)

                avg_latency = np.array(latency_arr).mean()
                fps = 1 / avg_latency

                print(f"PyTorch Avg Latency: {avg_latency:.4f} sec, FPS: {fps:.2f}")

                #Return the result
                ipex_summary = {
                    "fwk_version": f"IPEX: {ipex.__version__}",
                    "ipex_result": ipex_result,
                    "avg_latency": avg_latency,
                    "fps": fps
                }
                return ipex_summary

            def benchmark_ov(test_image):
                # get the names of input and output layers of the model
                input_layer = ov_compiled_model.input(0)
                output_layer =ov_compiled_model.output(0)

                latency_arr = []
                end = time.time() + int(bench_time)
                print(f"\n==== Benchmarking OpenVINO {bench_time}sec on {target_device} ====")
                print(f"Input shape: {test_image.shape}")

                while time.time() < end:
                    start_time = time.time()
                    ov_output = ov_compiled_model([test_image])
                    latency = time.time() - start_time
                    latency_arr.append(latency)

                # Process output
                ov_output = ov_output[output_layer]
                index = np.argsort(ov_output)
                probability = torch.nn.functional.softmax(torch.from_numpy(ov_output[0]), dim=0).data.cpu().numpy()
                ov_result = get_top_predictions(index, probability)

                avg_latency = np.array(latency_arr).mean()
                fps = 1 / avg_latency

                print(f"OpenVINO Avg Latency: {avg_latency:.4f} sec, FPS: {fps:.2f}")

                ov_summary = {
                    "fwk_version": f"OpenVINO: {get_version()}",
                    "ov_result": ov_result,
                    "avg_latency": avg_latency,
                    "fps": fps
                }
                return ov_summary

            # Read DICOM and apply photometric transformations
            def read_and_rescale_image( filepath):
                dcm = pydicom.read_file(filepath)
                image = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept

                def window_image(image, wc, ww):
                    img_min = wc - ww // 2
                    img_max = wc + ww // 2
                    image[image < img_min] = img_min
                    image[image > img_max] = img_max
                    return image

                image = window_image(image, dcm.WindowCenter, dcm.WindowWidth)
                # Scales 16bit to [-1024 1024]
                image = normalize(image, maxval=65535, reshape=True)
                return image

            # Decode output and get predictions
            def get_top_predictions(index, probability, num_predictions=3):
                # For labels definition see file: '3.Build a model/trainingscripts/padchest_config.py'
                pathologies_labels = ['Air Trapping', 'Aortic Atheromatosis', 'Aortic Elongation', 'Atelectasis',
                    'Bronchiectasis', 'Cardiomegaly', 'Consolidation', 'Costophrenic Angle Blunting', 'Edema', 'Effusion',
                    'Emphysema', 'Fibrosis', 'Flattened Diaphragm', 'Fracture', 'Granuloma', 'Hemidiaphragm Elevation',
                    'Hernia', 'Hilar Enlargement', 'Infiltration', 'Mass', 'Nodule', 'Pleural_Thickening',
                    'Pneumonia', 'Pneumothorax', 'Scoliosis', 'Tuberculosis']

                top_labels = []
                top_probs = []
                for i in range(num_predictions):
                    top_labels.append(pathologies_labels[index[0][-1-i]])
                    top_probs.append(round(probability[index[0][-1-i]] * 100, 2))

                result = {"top_labels": top_labels, "top_probabilities": top_probs}
                return result

            # Get System information
            def get_system_info():
                import subprocess


                # Run lscpu command and capture output
                lscpu_out = subprocess.check_output(['lscpu']).decode('utf-8')
                print(lscpu_out)
                # Run free -g command and capture output
                mem_out = subprocess.check_output(['free', '-g']).decode('utf-8')
                print(mem_out)
                os_out = subprocess.check_output(['cat', '/etc/os-release']).decode('utf-8')
                kernal_out = subprocess.check_output(['uname', '-a']).decode('utf-8')
                pyver_out = subprocess.check_output(['which', 'python']).decode('utf-8')
                os_out = os_out + " \n" + kernal_out + "\n" + pyver_out
                print(os_out)

                return_data = {
                    "lscpu_out": lscpu_out,
                    "mem_out_gb": mem_out,
                    "os": os_out
                }
                return return_data

            #
            # Start Processing
            #
            file_bytes = request.files["image"]

            # Note that user can define this to be any other type of image
            input_image = read_and_rescale_image(file_bytes)

            preprocess = transforms.Compose([
                xrv.datasets.XRayCenterCrop(),
                xrv.datasets.XRayResizer(224)
            ])

            input_image = preprocess(input_image)
            input_batch =  torch.from_numpy( input_image[np.newaxis,...] )

            #Benchmark PyTorch
            pt_summary = benchmark_pt(input_batch)
            print(f"PyTorch Output: {pt_summary}")

            #Benchmark IPEX
            ipex_summary = benchmark_ipex(input_batch)
            print(f"IPEX Output: {ipex_summary}")

            # Benchmark OpenVINO
            ov_summary = benchmark_ov(input_batch)
            print(f"OpenVINO Output: {ov_summary}")

            sys_info = get_system_info()

            return_data = {"pt_summary": pt_summary,
            "ipex_summary" : ipex_summary,
            "ov_summary": ov_summary,
            "system_info": sys_info}

            return return_data

        except Exception as e:
            result = str(e)
            # return error message back to the client
            return AMLResponse(json.dumps({"error": result}), 200)

    else:
        return AMLResponse("bad request", 500)



### 2. Prepare an inference configuration.
   * Create an environment object
   * Create inference configuration to deploy the model as a web service using:
      * The scoring file (`score.py`)
         *  Use [AMLRequest](https://docs.microsoft.com/en-us/python/api/azureml-contrib-services/azureml.contrib.services.aml_request?view=azure-ml-py) and [AMLResponse](https://docs.microsoft.com/en-us/python/api/azureml-contrib-services/azureml.contrib.services.aml_response.amlresponse?view=azure-ml-py) classes to access RAW data

Create environment object from an environment specification YAML file.
See Documentation [HERE](https://learn.microsoft.com/en-us/python/api/azureml-core/azureml.core.environment.environment?view=azure-ml-py#azureml-core-environment-environment-from-conda-specification)

In [None]:
%%writefile conda_dep_opti.yml
channels:
  - anaconda
  - defaults
dependencies:
    - pip:
        - azureml-defaults
        - azure-ml-api-sdk
        - torchxrayvision
        - pydicom
        - openvino-dev
        - torch==1.13.1+cpu
        - torchvision==0.14.1+cpu
        - intel_extension_for_pytorch==1.13.100
        - "--index-url https://pypi.org/simple/"
        - "--extra-index-url https://download.pytorch.org/whl/cpu"

In [None]:
from azureml.core.environment import Environment
# # We create a light weight environment for inference 
# # An Environment defines Python packages, environment variables, and Docker settings that are used in machine learning experiments,
# # including in data preparation, training, and deployment to a web service.
# # Environment Documentation: https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.environment.environment?view=azure-ml-py
# # Conda dependencies Documentation: https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.conda_dependencies.condadependencies?view=azure-ml-py
# # Conda YAML Documentation: https://learn.microsoft.com/en-us/python/api/azureml-core/azureml.core.environment.environment?view=azure-ml-py#azureml-core-environment-environment-from-conda-specification 

# # Create environment object from an environment specification YAML file.
himms_env_yml = Environment.from_conda_specification('himms_env_opti', 'conda_dep_opti.yml')

In [None]:
%%time
import uuid
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.environment import Environment
from azureml.core import Workspace
from azureml.core.model import Model
from azureml.core.environment import CondaDependencies

# Connect to workspace
from azureml.core import Workspace
# Load workspace from config file
# The workspace is the top-level resource for Azure Machine Learning, 
# providing a centralized place to work with all the artifacts you create when you use Azure Machine Learning.
# Documentation: https://docs.microsoft.com/en-us/azure/machine-learning/concept-workspace
ws = Workspace.from_config(path='../')
print("Workspace:",ws.name)

# Register model:
# A model is the result of a Azure Machine learning training Run or some other model training process outside of Azure. 
# Regardless of how the model is produced, it can be registered in a workspace, where it is represented by a name and a version. 
# With the Model class, you can package models for use with Docker and deploy them as a real-time endpoint that can be used for inference requests.
# Please set the version number accordingly the number of models that you have registered.
# Documentation: https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.model.model?view=azure-ml-py
model = Model(ws, 'padchest-pt-onnx-ov', version=1)

# Set inference and ACI web service:
# The inference configuration describes how to configure the model to make predictions. 
# It references to the scoring script (entry_script) and is used to locate all the resources required for the deployment. 
# Documentation: https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.model.inferenceconfig?view=azure-ml-py
inference_config = InferenceConfig(entry_script="score_opti.py", environment=himms_env_yml)

### 3. Deploy in ACI
   Deploy the model as ACI web service. Note that this step may take about 2-5 minutes to complete

In [None]:
# Set AciWebservice:
# The AciWebservice class represents a machine learning model deployed as a web service endpoint on Azure Container Instances
# The Inference configuration (inference_config) is an input parameter for Model deployment-related actions
# Note that we trained using a GPU cluster and we set resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=2) respectively.
# This will allow us to run inference in CPU and optimize memory. 
# Documentation: https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.model.inferenceconfig?view=azure-ml-py
aci_config = AciWebservice.deploy_configuration(
    cpu_cores=2,
    memory_gb=4)

service_name = 'padchest-opti-sdk-v1'
# Deploy:
# The model is packaged (using Docker behind the scenes) as a real-time endpoint that is later used for inference requests.
# Documentation: https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.model.model?view=azure-ml-py
service = Model.deploy(workspace=ws, 
                       name=service_name, 
                       models=[model], 
                       inference_config=inference_config, 
                       deployment_config=aci_config,
                       overwrite=True)

service.wait_for_deployment(show_output=True)


In [None]:
# # [Optional] Get deployment service Logs
# print(service.get_logs())

### 4. Consume data sample and test the web service.
We demonstrate how to consume DICOM images:
* We trained our model from PNG files with 16 bits pixel depth. 
* To test the web service, we will send a DICOM file (16 bits).
    * We will apply the image normalization implemented in the scoring script.

To try out the model you would need a sample DICOM image. In order to obtain one, we recommend that you use one of the PADCHEST images you trained on and use the provided `png2dcm.py` script to generate a DICOM file out of it. You can also try using your own DICOM!

In [None]:
# # Assuming "sample.png" file exists. THe following cmd will generate "sample_dicom.dcm" file.
!python png2dcm.py

In [None]:
import pydicom
import matplotlib.pylab as plt

# Visualize converted DICOM file from the corresponding PNG file
test_file = "./sample_dicom.dcm"
dcm = pydicom.read_file(test_file)
print(dcm)
plt.imshow(dcm.pixel_array, cmap=plt.cm.bone)

Now that the model is deployed we can get the scoring web service's HTTP endpoint, which accepts REST client calls. 

In [None]:
import requests
from azureml.core.webservice import Webservice
import numpy as np

# Webservice constructor is used to retrieve a cloud representation of a Webservice
# object associated with the provided Workspace
# Documentation: https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice(class)?view=azure-ml-py
service = Webservice(name=service_name, workspace=ws)

# Get the web service HTTP endpoint.
# This endpoint can be shared with anyone who wants to test the web service or integrate it into an application.
uri = service.scoring_uri
print(uri)

files = {'image': open(test_file, 'rb').read()}

# Send the DICOM as a raw HTTP request and obtain results from endpoint.
response = requests.post(uri, files=files)
print("output:", response.content)

In [None]:
import json
output_dict = json.loads(response.content)

pt_metrics = output_dict['pt_summary']
ipex_metrics = output_dict['ipex_summary']
ov_metrics = output_dict['ov_summary']

print(f"PyTorch Metrics:")
print(f"\tFramework Version:\t{pt_metrics['fwk_version']}")
print(f"\tTop Labels:\t{pt_metrics['pt_result']['top_labels']}")
print(f"\tTop Probabilities:\t{pt_metrics['pt_result']['top_probabilities']}")
print(f"\tAvg Latency:\t{pt_metrics['avg_latency']:.4f} sec")
print(f"\tFPS:\t{pt_metrics['fps']:.2f}")

print(f"\nIPEX Metrics:")
print(f"\tFramework Version:\t{ipex_metrics['fwk_version']}")
print(f"\tTop Labels:\t{ipex_metrics['ipex_result']['top_labels']}")
print(f"\tTop Probabilities:\t{ipex_metrics['ipex_result']['top_probabilities']}")
print(f"\tAvg Latency:\t{ipex_metrics['avg_latency']:.4f} sec")
print(f"\tFPS:\t{ipex_metrics['fps']:.2f}")

print(f"\nOpenVINO Metrics:")
print(f"\tFramework Version:\t{ov_metrics['fwk_version']}")
print(f"\tTop Labels:\t{ov_metrics['ov_result']['top_labels']}")
print(f"\tTop Probabilities:\t{ov_metrics['ov_result']['top_probabilities']}")
print(f"\tAvg Latency:\t{ov_metrics['avg_latency']:.4f} sec")
print(f"\tFPS:\t{ov_metrics['fps']:.2f}")

# Calculate the FPS speedup with IPEX compared to PyTorch
ipex_fps_speedup = ipex_metrics['fps'] / pt_metrics['fps']
print(f"\nSpeedup with IPEX: {ipex_fps_speedup:.2f}x")

# Calculate the FPS speedup with OpenVINO compared to PyTorch
ov_fps_speedup = ov_metrics['fps'] / pt_metrics['fps']
print(f"\nSpeedup with OpenVINO: {ov_fps_speedup:.2f}x")



In [None]:
lscpu_out=output_dict['system_info']['lscpu_out'].encode().decode('unicode_escape')
print(f"\nSystem Info:\n{lscpu_out}")

mem_out_gb=output_dict['system_info']['mem_out_gb'].encode().decode('unicode_escape')
print(f"\nSystem Memory Info:\n{mem_out_gb}")

os_out=output_dict['system_info']['os'].encode().decode('unicode_escape')
print(f"\nSystem OS:\n{os_out}")

### Delete Service Endpoint
After testing the service, you can uncomment the following and execute the cell to delete the endpoint.

In [None]:
#service.delete()