# Experiment with parameters for a Ridge Regression Model on the Diabetes Dataset in an Azure ML Pipeline

This notebook is for experimenting with different parameters to train a ridge regression model on the Diabetes dataset.

In [11]:
import os
import requests
import tempfile
import azureml.core
from azureml.core import Workspace, Experiment, Datastore
from azureml.widgets import RunDetails

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.34.0


In [105]:
from azureml.pipeline.core import Pipeline
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core.graph import PipelineParameter

print("Pipeline SDK-specific imports completed")

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

Pipeline SDK-specific imports completed
leonamltestcn
leonamljci
chinaeast2
f7ccde76-13d3-4104-a67c-a396e59e872b


In [13]:
import os, shutil

# Create a folder for the experiment files
model_register_folder = 'model'
os.makedirs(model_register_folder, exist_ok=True)


In [111]:
#Define data transfer folder between different steps
from azureml.pipeline.core import Pipeline, PipelineData, TrainingOutput
model_folder = PipelineData("model_release_folder", datastore=ws.get_default_datastore())

# Generate Pipeline step execution script

In [110]:
%%writefile $model_register_folder/prepare.py
from azureml.core import Run
import pandas as pd
import shutil
import joblib
import os
import json
import urllib.request

# Get the experiment run context
run = Run.get_context()

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument('--output_folder', type=str, dest='output_folder', default="model", help='output folder')
parser.add_argument("--pipeline_param", type=str, help="pipeline parameter")

args = parser.parse_args()
output_folder = args.output_folder

#  Get input model release config file
print("pipeline Argument :" + args.pipeline_param)
config_file = args.pipeline_param

# Save the parameters, model, score, cona etc. files to the outputs folder
os.makedirs(output_folder, exist_ok=True)
print("%s created" % args.output_folder)

# Create a folder for the model release files
model_register_folder = 'model'
os.makedirs(model_register_folder, exist_ok=True)

urllib.request.urlretrieve(config_file, "model/parameters.json")

with open("model/parameters.json") as f:
        pars = json.load(f)
try:
    register_args_model = pars["model"]
    print(register_args_model)
except KeyError:
    print("Could not load registration values from file")
    register_args_model =""

try:
    register_args_score = pars["scoring"]
    print(register_args_score)
except KeyError:
    print("Could not load registration values from file")
    register_args_score =""

try:
    register_args_conda = pars["conda"]
    print(register_args_conda)
except KeyError:
    print("Could not load registration values from file")
    register_args_conda =""

try:
    register_args_modelname = pars["modelname"]
    print(register_args_modelname)
except KeyError:
    print("Could not load registration values from file")
    register_args_modelname = "model.pkl"

urllib.request.urlretrieve(register_args_model, "model/"+ register_args_modelname)

urllib.request.urlretrieve(register_args_conda, os.path.join(model_register_folder, 'conda_dependencies.yml'))

urllib.request.urlretrieve(register_args_score, os.path.join(model_register_folder, 'score.py'))


#list all file in model and all file in output_folder
local_dir_list = os.listdir(model_register_folder)
 
print("Files and directories in '", model_register_folder, "' :")
 
# prints all files
print(local_dir_list)


print("saving model release files")
#shutil.copytree(model_register_folder, output_folder)

# iterating over all the files in
# the source directory
for fname in local_dir_list:
     
    # copying the files to the
    # destination directory
    shutil.copy2(os.path.join(model_register_folder,fname), output_folder)

#list all file in output_folder
dir_list = os.listdir(output_folder)
 
print("Files and directories in '", output_folder, "' :")
 
# prints all files
print(dir_list)

print("delete temp model folder")
#shutil.rmtree(model_register_folder)

print("model release files saved....." + output_folder)

run.complete()

Overwriting model/prepare.py


In [101]:
%%writefile $model_register_folder/register_deploy.py
# Import libraries
import argparse
import joblib
import sklearn
from azureml.core import Workspace, Model, Run
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument('--model_folder', type=str, dest='model_folder', default="model", help='model location')
args = parser.parse_args()
model_folder = args.model_folder

# Get the experiment run context
run = Run.get_context()

# load the model release files
model_file = model_folder + "/sklearn_regression_model.pkl"
conda_file = model_folder + "/conda_dependencies.yml"
score_file = model_folder + "/score.py"

#model_file = "sklearn_regression_model.pkl"
#conda_file = "conda_dependencies.yml"
#score_file = "score.py"

print("Loading model: " + model_file)
print("Loading conda: " + conda_file)
print("Loading score: " + score_file)

fileconent = open(conda_file, "r").read()
print("show yml: \n" + fileconent)

model = joblib.load(model_file)

model_deployed = Model.register(workspace=run.experiment.workspace,
               model_path = model_file,
               model_name = 'test_sklearn_model_pipeline',
               tags={'Training context':'Pipeline'})

#Deploy to ACI test

from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice


service_name = 'mytestpipelinesrv3'

environment = Environment.from_conda_specification(name="myenv", file_path=conda_file)

#environment = Environment('my-sklearn-environment2')
#environment.python.conda_dependencies = CondaDependencies.create(conda_packages=[
#    'pip==20.2.4'],
#    pip_packages=[
#    'azureml-defaults',
#    'inference-schema[numpy-support]',
#    'joblib',
#    'numpy',
#    'scikit-learn=={}'.format(sklearn.__version__)
#])


print("Env is get ready")


inference_config = InferenceConfig(entry_script=score_file, environment = environment)
aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

print("start deployment" + model_deployed.name)
service = Model.deploy(workspace=run.experiment.workspace,
                       name=service_name,
                       models=[model_deployed],
                       inference_config=inference_config,
                       deployment_config=aci_config,
                       overwrite=True)

service.wait_for_deployment(show_output=True)

#print(service.get_logs())

print(service.scoring_uri)

run.complete()

Overwriting model/register_deploy.py


# Setup computer resource for running Pipeline

In [94]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "amlcluster"

# Verify that cluster exists
try:
    pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If not, create it
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4,
                                                           idle_seconds_before_scaledown=5400)
    pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
    pipeline_cluster.wait_for_completion(show_output=True)

Found existing cluster, use it.


# Set build pipeline and pipeline running environment

In [54]:
# Use a RunConfiguration to specify some additional requirements for this step.
from azureml.core.runconfig import RunConfiguration
from azureml.core.runconfig import DockerConfiguration 
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE

# create a new runconfig object
run_config = RunConfiguration()

# enable Docker 
#run_config.environment.docker.enabled = True
run_config.docker = DockerConfiguration(use_docker = True)

# set Docker base image to the default CPU-based image
run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE

# use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_config.environment.python.user_managed_dependencies = False

# specify CondaDependencies obj
run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn','pandas'],
                                                                            pip_packages=['azureml-sdk'], python_version='3.8.13')

In [36]:
#Need to be removed
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration
from azureml.core.runconfig import DockerConfiguration

# Create a Python environment for the experiment
diabetes_env = Environment("diabetes-pipeline-env")
diabetes_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies
diabetes_env.docker.enabled = True # Use a docker container

# Create a set of package dependencies
diabetes_packages = CondaDependencies.create(conda_packages=['scikit-learn','pandas'],
                                             pip_packages=['azureml-sdk'])

# Add the dependencies to the environment
diabetes_env.python.conda_dependencies = diabetes_packages

# Register the environment (just in case you want to use it again)
diabetes_env.register(workspace=ws)
registered_env = Environment.get(ws, 'diabetes-pipeline-env')

# Create a new runconfig object for the pipeline
pipeline_run_config = RunConfiguration()

# Use the compute you created above. 
pipeline_run_config.target = pipeline_cluster

# enable Docker 
#run_config.environment.docker.enabled = True
pipeline_run_config.docker = DockerConfiguration(use_docker = True)

# Assign the environment to the run configuration
pipeline_run_config.environment = registered_env

print ("Run configuration created.")

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


# Construct Pipeline input parameter and pipelin steps

In [119]:
# We will use this later in publishing pipeline
pipeline_param = PipelineParameter(name="config_pipeline_arg", default_value="")
print("pipeline parameter created")

pipeline parameter created


In [121]:
# Syntax
# PythonScriptStep(
#     script_name, 
#     name=None, 
#     arguments=None, 
#     compute_target=None, 
#     runconfig=None, 
#     inputs=None, 
#     outputs=None, 
#     params=None, 
#     source_directory=None, 
#     allow_reuse=True, 
#     version=None, 
#     hash_paths=None)
# This returns a Step
prepare_step = PythonScriptStep(name="prepare",
                         script_name="prepare.py",
                         arguments=['--output_folder', model_folder,"--pipeline_param", pipeline_param], 
                         compute_target=pipeline_cluster, 
                         source_directory=model_register_folder,
                         outputs=[model_folder],
                         runconfig = run_config,
                         allow_reuse=True)
print("prepare_step created")

prepare_step created


In [122]:
# Step 2, run the model registration script
register_step = PythonScriptStep(name = "Register Model",
                                source_directory = model_register_folder,
                                script_name = "register_deploy.py",
                                arguments = ['--model_folder', model_folder],
                                inputs=[model_folder],
                                compute_target = pipeline_cluster,
                                runconfig = run_config,
                                allow_reuse = True)
print("register_step created")


register_step created


In [130]:
#Test input parameter for pipeline
config_file = "https://raw.githubusercontent.com/leonlj/mlopstmptest/main/parameters.json"

## Debug pipeline, run pipeline with experiement

In [123]:
#for temporary pipeline parameter testing
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline
from azureml.widgets import RunDetails

# Construct the pipeline
pipeline_steps = [prepare_step, register_step]
pipeline = Pipeline(workspace = ws, steps=pipeline_steps)
print("Pipeline is built.")

# Create an experiment and run the pipeline
experiment = Experiment(workspace = ws, name = 'testparam-model-release-pipeline')
pipeline_run_with_param = experiment.submit(pipeline, pipeline_parameters={'config_pipeline_arg': config_file}, regenerate_outputs=True)
print("Pipeline submitted for execution.")

RunDetails(pipeline_run_with_param).show()
pipeline_run_with_param.wait_for_completion()

Pipeline is built.
Created step prepare [3300efcc][158237fd-5552-44b7-87c8-b24516d1b35e], (This step will run and generate new outputs)
Created step Register Model [421749f1][2b06def4-13ba-4f42-8341-21b847f34dca], (This step will run and generate new outputs)
Submitted PipelineRun e6c20870-5483-49cb-a39b-5601ed72ebd1
Link to Azure Machine Learning Portal: https://studio.ml.azure.cn/runs/e6c20870-5483-49cb-a39b-5601ed72ebd1?wsid=/subscriptions/f7ccde76-13d3-4104-a67c-a396e59e872b/resourcegroups/leonamljci/workspaces/leonamltestcn&tid=fb79b746-da69-43ae-8666-e506470b969c
Pipeline submitted for execution.


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRunId: e6c20870-5483-49cb-a39b-5601ed72ebd1
Link to Azure Machine Learning Portal: https://studio.ml.azure.cn/runs/e6c20870-5483-49cb-a39b-5601ed72ebd1?wsid=/subscriptions/f7ccde76-13d3-4104-a67c-a396e59e872b/resourcegroups/leonamljci/workspaces/leonamltestcn&tid=fb79b746-da69-43ae-8666-e506470b969c
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: 0a418f9c-9f01-4caf-a82a-d9de4723d533
Link to Azure Machine Learning Portal: https://studio.ml.azure.cn/runs/0a418f9c-9f01-4caf-a82a-d9de4723d533?wsid=/subscriptions/f7ccde76-13d3-4104-a67c-a396e59e872b/resourcegroups/leonamljci/workspaces/leonamltestcn&tid=fb79b746-da69-43ae-8666-e506470b969c
StepRun( prepare ) Status: Queued

Streaming azureml-logs/55_azureml-execution-tvmps_3b63de77aeb24fe9dd894ab12eef23d19f60dee55488f36f2d63518a0a5067c7_d.txt
2022-12-02T03:32:23Z Executing 'Copy ACR Details file' on 10.0.0.4
2022-12-02T03:32:23Z Copy ACR Details file succeeded on 10.0.0.4. Output: 
>>>   
>>>   
Login Succeed

'Finished'

## Run published pipeline
**Publish the pipeline**

In [124]:
published_pipeline = pipeline.publish(name="Mode_Release_Pipeline", description="Use Pipeline for model release", continue_on_step_failure=True)
published_pipeline

Name,Id,Status,Endpoint
Mode_Release_Pipeline,b963a087-2d67-4237-9b35-6c269f222fb5,Active,REST Endpoint


### Get published pipeline

You can get the published pipeline using **pipeline id**.

To get all the published pipelines for a given workspace(ws): 
```css
all_pub_pipelines = PublishedPipeline.get_all(ws)

In [125]:
from azureml.pipeline.core import PublishedPipeline

pipeline_id = published_pipeline.id # use your published pipeline id
published_pipeline = PublishedPipeline.get(ws, pipeline_id)
published_pipeline

Name,Id,Status,Endpoint
Mode_Release_Pipeline,b963a087-2d67-4237-9b35-6c269f222fb5,Active,REST Endpoint


### Run published pipeline using its REST endpoint
[This notebook](https://aka.ms/pl-restep-auth) shows how to authenticate to AML workspace.

In [126]:
from azureml.core.authentication import InteractiveLoginAuthentication
import requests

auth = InteractiveLoginAuthentication()
aad_token = auth.get_authentication_header()

rest_endpoint1 = published_pipeline.endpoint

print("You can perform HTTP POST on URL {} to trigger this pipeline".format(rest_endpoint1))

# specify the param when running the pipeline
response = requests.post(rest_endpoint1, 
                         headers=aad_token, 
                         json={"ExperimentName": "My_Pipeline1",
                               "RunSource": "SDK",
                               "ParameterAssignments": {"config_pipeline_arg": config_file}})

You can perform HTTP POST on URL https://chinaeast2.api.ml.azure.cn/pipelines/v1.0/subscriptions/f7ccde76-13d3-4104-a67c-a396e59e872b/resourceGroups/leonamljci/providers/Microsoft.MachineLearningServices/workspaces/leonamltestcn/PipelineRuns/PipelineSubmit/b963a087-2d67-4237-9b35-6c269f222fb5 to trigger this pipeline


In [127]:
try:
    response.raise_for_status()
except Exception as ex:    
    raise Exception('Received bad response from the endpoint: {}\n'
                    'Response Code: {}\n'
                    'Headers: {}\n'
                    'Content: {}'.format(rest_endpoint1, response.status_code, response.headers, response.content)) from ex

run_id = response.json().get('Id')
print('Submitted pipeline run: ', run_id)

Submitted pipeline run:  1d6b6f63-c4ab-4312-acd5-db1eb2fa7d4d


In [128]:
from azureml.pipeline.core import PipelineRun
published_pipeline_run_via_rest = PipelineRun(ws.experiments["My_Pipeline1"], run_id)
RunDetails(published_pipeline_run_via_rest).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [129]:
published_pipeline_run_via_rest.wait_for_completion()

PipelineRunId: 1d6b6f63-c4ab-4312-acd5-db1eb2fa7d4d
Link to Azure Machine Learning Portal: https://studio.ml.azure.cn/runs/1d6b6f63-c4ab-4312-acd5-db1eb2fa7d4d?wsid=/subscriptions/f7ccde76-13d3-4104-a67c-a396e59e872b/resourcegroups/leonamljci/workspaces/leonamltestcn&tid=fb79b746-da69-43ae-8666-e506470b969c

PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': '1d6b6f63-c4ab-4312-acd5-db1eb2fa7d4d', 'status': 'Completed', 'startTimeUtc': '2022-12-02T06:34:02.233731Z', 'endTimeUtc': '2022-12-02T06:34:03.644159Z', 'services': {}, 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': 'SDK', 'runType': 'HTTP', 'azureml.parameters': '{"config_pipeline_arg":"https://raw.githubusercontent.com/leonlj/mlopstmptest/main/parameters.json"}', 'azureml.continue_on_step_failure': 'True', 'azureml.continue_on_failed_optional_input': 'True', 'azureml.pipelineid': 'b963a087-2d67-4237-9b35-6c269f222fb5', 'azureml.pipelineComponent': 'pipelinerun', 'azureml.pipelines.stag

'Finished'