## MLOps with Azure ML Pipelines

ML Pipeline - Write Scripts to folder

In [7]:
registered_env_name = "experiment_env"
experiment_folder = 'devOps_deploy_pipeline'
dataset_prefix_name = 'exp'
cluster_name = "mm-cluster"

Import required packages

In [8]:
# Import required packages
from azureml.core import Workspace, Experiment, Datastore, Environment, Dataset
from azureml.core.compute import ComputeTarget, AmlCompute, DataFactoryCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE
from azureml.pipeline.core import Pipeline, PipelineParameter, PipelineData
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import PipelineParameter, PipelineData
from azureml.data.output_dataset_config import OutputTabularDatasetConfig, OutputDatasetConfig, OutputFileDatasetConfig
from azureml.data.datapath import DataPath
from azureml.data.data_reference import DataReference
from azureml.data.sql_data_reference import SqlDataReference
from azureml.pipeline.steps import DataTransferStep
import logging

In [9]:
# Connect to AML Workspace
ws = Workspace.from_config()

# Get the default datastore
default_ds = ws.get_default_datastore()

#Select AML Compute Cluster
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException


try:
    # Check for existing compute target
    pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        pipeline_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

Found existing cluster, use it.


In [10]:
import os
# Create a folder for the pipeline step files
os.makedirs(experiment_folder, exist_ok=True)

print(experiment_folder)

devOps_deploy_pipeline


In [11]:
#conda_yml_file = '../configuration/environment.yml'
conda_yml_file = './'+ experiment_folder+ '/environment.yml'

In [12]:
%%writefile $conda_yml_file
name: experiment_env
dependencies:
- python=3.6.2
- scikit-learn
- ipykernel
- matplotlib
- pandas
- pip
- pip:
  - azureml-defaults
  - pyarrow
  - azureml-monitoring
  - azureml-interpret
  - inference-schema
  - joblib
  - azure-ml-api-sdk

Overwriting ./devOps_deploy_pipeline/environment.yml


In [13]:
%%writefile ./$experiment_folder/deployACI.py

import argparse
from azureml.core import Workspace, Environment
from azureml.core.model import Model
from azureml.core.run import Run
from azureml.core.model import InferenceConfig
from azureml.core.webservice import Webservice, AciWebservice
from azureml.exceptions import WebserviceException

parser = argparse.ArgumentParser(description='Deploy arg parser')
parser.add_argument('--scoring_file_output', type=str, help='File storing the scoring url')
parser.add_argument('--deploy_file', type=str, help='File storing if model should be deployed')
parser.add_argument('--environment_name', type=str,help='Environment name')
parser.add_argument('--service_name', type=str,help='service name')
parser.add_argument('--model_name', type=str,help='model name')



args = parser.parse_args()
scoring_url_file = args.scoring_file_output
deploy_file      = args.deploy_file
environment_name = args.environment_name
service_name     = args.service_name
model_name       = args.model_name


run = Run.get_context()

#Get associated AML workspace
ws = run.experiment.workspace

model = Model(ws, model_name)
env = Environment.get(ws, environment_name)
inference_config = InferenceConfig(entry_script='score.py', environment=env)

# Deploy model
aci_config = AciWebservice.deploy_configuration(
            cpu_cores = 1, 
            memory_gb = 2, 
            tags = {'model': 'diabetes remote training'},
            auth_enabled=True,
            enable_app_insights=True,
            collect_model_data=True)

try:
    service = Webservice(ws, name=service_name)
    if service:
        service.delete()
except WebserviceException as e:
         print()

service = Model.deploy(ws, service_name, [model], inference_config, aci_config)
service.wait_for_deployment(True)
    

# Output scoring url
print(service.scoring_uri)
with open(scoring_url_file, 'w+') as f:
    f.write(service.scoring_uri)



Writing ./devOps_deploy_pipeline/deployACI.py


In [14]:
%%writefile ./$experiment_folder/score.py

import json
import joblib
import numpy as np
from azureml.core.model import Model
from azureml.monitoring import ModelDataCollector
import time
import os
import pandas as pd


#version 2
# Called when the service is loaded
def init():
    global model
    #Print statement for appinsights custom traces:
    print ("model initialized" + time.strftime("%H:%M:%S"))
    # Get the path to the deployed model file and load it
    path = os.path.join(Model.get_model_path('diabetes_model_remote'))
    
    print(path)
    model = joblib.load(path)

    
    global inputs_dc, prediction_dc
    inputs_dc = ModelDataCollector("best_model", designation="inputs", feature_names=['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age'])
    prediction_dc = ModelDataCollector("best_model", designation="predictions", feature_names=["Diabetic"])



# Called when a request is received
def run(raw_data):
    try:
        # Get the input data as a numpy array
        #data = np.array(json.loads(raw_data)['data'])
        # Get a prediction from the model
        
        json_data = json.loads(raw_data)
        predictions = model.predict(json_data['data'])
        print ("Prediction created" + time.strftime("%H:%M:%S"))
        # Get the corresponding classname for each prediction (0 or 1)
        classnames = ['not-diabetic', 'diabetic']
        predicted_classes = []
        for prediction in predictions:
            val = int(prediction)
            predicted_classes.append(classnames[val])
        # Return the predictions as JSON
        
         # Log the input and output data to appinsights:
        info = {
            "input": raw_data,
            "output": predicted_classes
            }
        print(json.dumps(info))
        
        inputs_dc.collect(json_data['data']) #this call is saving our input data into Azure Blob
        prediction_dc.collect(predicted_classes) #this call is saving our prediction data into Azure Blob

        
        return json.dumps(predicted_classes)
    except Exception as e:
        error = str(e)
        print (error + time.strftime("%H:%M:%S"))
        return error

Writing ./devOps_deploy_pipeline/score.py
