# Deploying ML models using ACI and AKS

Inference is performed on the serialized model on a container and an auto-scaling cluster.

1. deploying a REST API service on an Azure container instance using Azure ML

2. deploy a REST API service on an auto-scaling cluster using Kubernetes (for container orchestration) using Azure ML

3. deploy on an Azure container instance using MLflow and an open source ML framework

# Deploying ML model as web service with Azure Container Instance

In [None]:
import numpy as np 
import azureml.core

# display the core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

In [None]:
# initialize workspace

from azureml.core import Workspace
from azureml.core.model import Model

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, sep = '\n')

## Scoring script

In [None]:
%%writefile score.py
import json
import numpy as np
import os
import pickle
import joblib
import onnxruntime
import time
from azureml.core.model import Model
from azureml.monitoring import ModelDataCollector
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType

def init():
    global model, scaler, input_name, label_name, inputs_dc, prediction_dc
    

    scaler_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model-scaler/1/model-scaler.pkl')
    # deserialize the model file back into a sklearn model
    scaler = joblib.load(scaler_path)
    
    model_onnx = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'support-vector-classifier/2/svc.onnx')
    # print(os.listdir(model_onnx))
    model = onnxruntime.InferenceSession(model_onnx, None)
    input_name = model.get_inputs()[0].name
    label_name = model.get_outputs()[0].name
    
    # variables to monitor model input and output data
    inputs_dc = ModelDataCollector("Support vector classifier model", designation="inputs", feature_names=["feat1", "feat2", "feat3", "feat4", "feat5", "feat6", "feat7"])
    prediction_dc = ModelDataCollector("Support vector classifier model", designation="predictions", feature_names=["weatherprediction"])

    
@input_schema('data', NumpyParameterType(np.array([[34.927778, 0.24, 7.3899, 83, 16.1000, 1016.51, 1]])))
@output_schema(NumpyParameterType(np.array([0])))
def run(data):
                try: 
                    data = scaler.fit_transform(data.reshape(1, 7))
                    inputs_dc.collect(data)
                    
                    # model inference
                    result = model.run([label_name], {input_name: data.astype(np.float32)})[0]
                    # this call is saving model output data into Azure Blob
                    prediction_dc.collect(result)

                 
                except Exception as e:   
                    result = 'error'
                    prediction_dc.collect(result)
                    
                return result.tolist()

In [1]:
# Environment 

In [None]:
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig

Environment(name="myenv")

env = Environment.get(workspace=ws, name="AzureML-Minimal").clone('myenv')

In [None]:
# specyfying conda dependenices

for pip_package in ["numpy", "onnxruntime", "joblib", "azureml-core", "azureml-monitoring", "azureml-defaults", "scikit-learn==0.20.3", "inference-schema", "inference-schema[numpy-support]"]:
    env.python.conda_dependencies.add_pip_package(pip_package)

inference_config = InferenceConfig(entry_script='score.py',
                                    environment=env)

## Deployment Configuration


In [None]:
from azureml.core.webservice import AciWebservice

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1, collect_model_data=True)

## Deploy web service

In [None]:
model1 = Model(ws, 'model-scaler')
model2 = Model(ws, 'support-vector-classifier')

service_name = 'weather-aci-prediction'

In [None]:
service = Model.deploy(ws, service_name, models=[model1, model2], inference_config=inference_config, deployment_config=deployment_config, overwrite=True)
service.wait_for_deployment(show_output = True)
print(service.state)

In [None]:
print(service.get_logs())

## Application Insights 

In [None]:
service.update(enable_app_insights=True)

## Testing the web service

In [None]:
print(service.scoring_uri)

In [None]:
print(service.swagger_uri)

In [None]:
service.state

## Testing with inout from user ( from score.py)

In [None]:
import json


input_payload = json.dumps({
    'data': [[34.927778, 0.24, 7.3899, 83, 16.1000, 1016.51, 1]],
    'method': 'predict'  # If you have a classification model, you can get probabilities by changing this to 'predict_proba'.
})

output = service.run(input_payload)

print(output)

In [None]:
# then delete the service 
# service.delete()