# Deploy Scikit Learn model to AKS(Azure Kubernetes Service)

In [2]:
# import the Workspace class and check the azureml SDK version
# exist_ok checks if workspace exists or not.

from azureml.core import Workspace
from azureml.core.authentication import InteractiveLoginAuthentication
subscription_id = "be1b220b-fcad-4ce7-b323-6732ecc06c14" #you should be owner or contributor
resource_group = "pocdlkdevwu2rgtraning" #you should be owner or contributor
workspace_name = "commamlws01" #your workspace name
workspace_region = "westus2" #your region
ws = Workspace.create(name = workspace_name,
                      subscription_id = subscription_id,
                      auth = InteractiveLoginAuthentication(force=True, tenant_id="be413eec-6262-4083-97c8-8c2a817c2fe1"),
                      resource_group = resource_group, 
                      location = workspace_region,
                      
                      exist_ok=True)



## Create Image from a model

#### Download Model

In [5]:
from azureml.core.model import Model
from sklearn.externals import joblib
import os
import shutil

#Download the model from the best run to a local folder. Use this during inference. 
model_name = "ml.joblib"

#spark ml can only load model from hdfs/dbfs file, not local file
model_name_dbfs_client_path = "/dbfs/mnt/models/"+model_name

model = Model(name = model_name,workspace = ws)
if os.path.isfile(model_name) or os.path.isdir(model_name):
    shutil.rmtree(model_name)

model.download(model_name_dbfs_client_path, exist_ok=True)
print(model_name_dbfs_client_path)

Check files in driver node

```file:``` is pointing drvier node file system

#### Create Score.py

In [8]:
%%writefile score.py
import json
import numpy as np
import pandas as pd
import os
from sklearn.externals import joblib
from azureml.core.model import Model
import logging

def init():
  logger = logging.getLogger("scoring_script")
  logging.basicConfig(level=logging.info)
  global model
  global ft
  model_name = 'ml.joblib'
  ft_file = 'ft.joblib'
  ml_file = 'ml.joblib'
  # retrieve the path to the model file using the model name
  model_path = Model.get_model_path(model_name)
  print(Model.get_model_path(model_name=model_name))
  # Load model
  model = joblib.load(os.path.join(model_path,ml_file))  
  if os.path.exists(os.path.join(model_path,ml_file)):
    print("found ml file from "+ml_file)
  else:
    print('NOT found')

  # Load transformation 
  ft = joblib.load(os.path.join(model_path,ft_file))
  if os.path.exists(os.path.join(model_path,ft_file)):
    print('Found ft file from '+ft_file)
  else:
    print('NOT found')

def run(input_json_raw):
  #load data transformation
  test_json = json.loads(input_json_raw)
  # orient='split' will keep the order of columns 
  pdInput = pd.read_json(test_json,orient='split')
  # Transform data
  x = ft.transform(pdInput)
  # Get score
  predictions = model.predict(x)
  return json.dumps(predictions.tolist())

#### Create env.yml

In [10]:
from azureml.core.conda_dependencies import CondaDependencies 

myenv = CondaDependencies()
myenv.add_conda_package("scikit-learn")
myenv.add_conda_package("pandas")

with open("myenv.yml","w") as f:
    f.write(myenv.serialize_to_string())

with open("myenv.yml","r") as f:
    print(f.read())

#### Create Image

It may take 5 mins

In [12]:
from azureml.core.image import ContainerImage, Image

runtime = "python"
scoring_script = "score.py"

# Configure the image
image_config = ContainerImage.image_configuration(execution_script=scoring_script, 
                                                  runtime=runtime, 
                                                  conda_file="myenv.yml",
                                                  description="Crime Prediction",
                                                  tags={"Regression": "RandomForestRegressor"})

# Create image
image = Image.create(name = "crime-pred",
                     # this is the model object 
                     models = [model],
                     image_config = image_config, 
                     workspace = ws)

image.wait_for_creation(show_output = True)

### Create AKS Cluster

It may take 11 mins

In [14]:
from azureml.core.compute import AksCompute, ComputeTarget

# Use the default configuration (you can also provide parameters to customize this)
prov_config = AksCompute.provisioning_configuration()

aks_name = 'aml-aks-1'
# Create the cluster
aks_target = ComputeTarget.create(workspace = ws,
                                    name = aks_name,
                                    provisioning_configuration = prov_config)

# Wait for the create process to complete
aks_target.wait_for_completion(show_output = True)
print(aks_target.provisioning_state)
print(aks_target.provisioning_errors)

### Deploy from image

In [16]:
from azureml.core.webservice import Webservice, AksWebservice

# Set configuration and service name
aks_config = AksWebservice.deploy_configuration(autoscale_enabled=True,
                                                autoscale_target_utilization=60,
                                                autoscale_min_replicas=1,
                                                autoscale_max_replicas=4,
                                                cpu_cores=1,
                                                memory_gb=1)


aks_service_name ='crime-pred-service-1'
# Deploy from image
service = Webservice.deploy_from_image(workspace = ws,
                                            name = aks_service_name,
                                            image = image,
                                            deployment_config = aks_config,
                                            deployment_target = aks_target)
# Wait for the deployment to complete
service.wait_for_deployment(show_output = True)
print(service.state)

#### (optional) Update image for the leastest model

In [18]:
service.update(image=image)
service.wait_for_deployment(show_output = True)
print(service.state)

#### Inferencing with AKS

In [20]:
import json

# Read 5 rows fro the test dataset
test_data = spark.sql("select * from crime_dataset limit 100").na.drop()
test_data=test_data.drop('crime_count')
test_data=test_data.drop('year')

In [21]:
# Convert it to JSON
tjson=test_data.toPandas()
test_json_raw = json.dumps(tjson.to_json(orient='split'))
# test_json=json.loads(test_json_raw)
# # # test_json = tjson.to_json()
# pdj =pd.read_json(test_json,orient='split')

In [22]:
import pandas as pd
out =service.run(input_data=test_json_raw)
pd.read_json(out).T.squeeze()

In [23]:
service.scoring_uri

In [24]:
service.get_keys()

In [25]:
# service.get_logs()
print(ws.webservices['crime-pred-service-1'].get_logs())


#### (Optional) To debug transformation and prediction function in local Azure Databricks

In [27]:
ft_file = 'ft.joblib'
ml_file = 'ml.joblib'

ft_path = os.path.join(model_name_dbfs_client_path+"/"+model_name,ft_file )
ml_path = os.path.join(model_name_dbfs_client_path+"/"+model_name,ml_file )

if not os.path.exists(ft_path):
  print(ft_path + ' is NOT exists')
  
if not os.path.exists(ml_path):
  print(ml_path + ' is NOT exists')

#dbutils.fs.ls("/mnt/models/"+model_name+"/"+model_name)
print(ft_path)
print(ml_path)

ml = joblib.load(ml_path)
ft = joblib.load(ft_path)

End of notebook