# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license.

# Get ML Workspace

In [2]:
import azureml.core
from azureml.core import Workspace, Experiment, Datastore, Environment, Dataset
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.core.authentication import ServicePrincipalAuthentication

import os
from dotenv import load_dotenv

True

In [3]:
load_dotenv() 

True

In [4]:
# Initilize Workspace
tenant_id = os.environ['AML_TENANT_ID']
client_id = os.environ['AML_PRINCIPAL_ID'] 
client_secret = os.environ['AML_PRINCIPAL_PASS']

sp = ServicePrincipalAuthentication(tenant_id=tenant_id,
                                    service_principal_id=client_id,
                                    service_principal_password=client_secret)

ws_name = os.environ['AML_WORKSPACE_NAME']
subscription_id = os.environ['AML_SUBSCRIPTION_ID']
resource_group = os.environ['AML_RESOURCE_GROUP']

ws = Workspace.get(name=ws_name,
                   auth=sp,
                   subscription_id=subscription_id,
                   resource_group=resource_group)

print(ws.get_details())

{'id': '/subscriptions/f501f470-c695-4681-856a-988d86851132/resourceGroups/rg-mlops-template-dev-001/providers/Microsoft.MachineLearningServices/workspaces/amlmlopsdevhk01', 'name': 'amlmlopsdevhk01', 'identity': {'principal_id': 'fb4fe568-db82-40f2-864c-4d66bf2a5723', 'tenant_id': '72f988bf-86f1-41af-91ab-2d7cd011db47', 'type': 'SystemAssigned'}, 'location': 'westeurope', 'type': 'Microsoft.MachineLearningServices/workspaces', 'tags': {}, 'sku': 'Basic', 'workspaceid': 'a4081a5d-2399-414d-9cb9-ca047cbd5787', 'sdkTelemetryAppInsightsKey': '9ac578de-874f-4fea-85bc-7e4cefd0d47f', 'description': '', 'friendlyName': '', 'containerRegistry': '/subscriptions/f501f470-c695-4681-856a-988d86851132/resourcegroups/rg-mlops-template-dev-001/providers/microsoft.containerregistry/registries/crmlopsdevhk01', 'keyVault': '/subscriptions/f501f470-c695-4681-856a-988d86851132/resourcegroups/rg-mlops-template-dev-001/providers/microsoft.keyvault/vaults/kvmlopsdevhk01', 'applicationInsights': '/subscriptio

# Get Compute Cluster

In [5]:
# Retrieve or create an Aml compute
min_nodes = int(os.environ['AML_MIN_NODES'])
max_nodes = int(os.environ['AML_MAX_NODES'])

aml_compute_target = os.environ['AML_COMPUTE_NAME']
try:
    aml_compute = AmlCompute(ws, aml_compute_target)
    print("found existing compute target.")
except ComputeTargetException:
    print("creating new compute target")
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = min_nodes, 
                                                                max_nodes = max_nodes)    
    aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

found existing compute target.


# Create Run COnfiguration

In [6]:
# Default datastore (Azure blob storage)
def_blob_store = ws.get_default_datastore()

# source directory
source_directory = '../src'
    
print(f'Source code is in {source_directory} directory.')

Source code is in ../src directory.


In [8]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE

# create a new runconfig object
run_config = RunConfiguration()

# environment
env = Environment('op-env')

# enable Docker 
env.docker.enabled = True
# set Docker base image to the default CPU-based image
env.docker.base_image = DEFAULT_CPU_IMAGE
# use conda_dependencies.yml to create a conda environment in the Docker image for execution
env.python.user_managed_dependencies = False
# specify CondaDependencies obj
env.python.conda_dependencies = CondaDependencies.create(conda_packages=['or-tools'])

# set environment
run_config.environment = env

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


# Step 1: Reduce the search space of the problem

In [None]:
# Model input
model_input_path = os.environ['MODEL_INPUT_PATH'] 
model_input = Dataset.File.from_files(path=(def_blob_store, model_input_path))
model_input_param = PipelineParameter(name="model_input", default_value=model_input)
model_input_consumption = DatasetConsumptionConfig("model_input", model_input_param)

# Naming the intermediate data 
model_result_partial = PipelineData("model_result_partial",datastore=def_blob_store)
model_input_reduced = PipelineData("model_input_reduced",datastore=def_blob_store)

reduce_step = PythonScriptStep(
    script_name="reduce.py", 
    arguments=["--model_input", model_input_consumption, "--model_result_partial", model_result_partial, "--model_input_reduced", model_input_reduced],
    inputs=[model_input_consumption],
    outputs=[model_result_partial, model_input_reduced],
    compute_target=aml_compute, 
    source_directory=source_directory,
    runconfig=run_config
)

# Step 2: Partition the problem

In [None]:
# Naming the intermediate data 
model_input_list = PipelineData("model_input_list",datastore=def_blob_store)

parition_step = PythonScriptStep(
    script_name="partiition.py", 
    arguments=["--model_input_reduced", model_input_reduced, "--model_input_list", model_input_list],
    inputs=[model_input_reduced],
    outputs=[model_input_list],
    compute_target=aml_compute, 
    source_directory=source_directory,
    runconfig=run_config
)

# Step 3: Solve individual problem

In [None]:
# Naming the intermediate data 
model_result_list = PipelineData("model_result_list",datastore=def_blob_store)

parallel_run_config = ParallelRunConfig(
    source_directory=source_directory,
    entry_script='solve.py',
    mini_batch_size="1",
    error_threshold=1,
    output_action="append_row",
    environment=env,
    compute_target=aml_compute,
    node_count=max_nodes)

solve_step = ParallelRunStep(
    name="solve",
    parallel_run_config=parallel_run_config,
    inputs=[model_input_list],
    output=model_result_list,
    allow_reuse=True
)

# Step 4: Merge the result

In [None]:
# Naming the intermediate data 
model_result_final = PipelineData("model_result_final",datastore=def_blob_store)

merge_step = PythonScriptStep(
    script_name="merge.py", 
    arguments=["--model_input", model_input, "--model_result_partial", model_result_partial, "--model_result_list", model_result_list, "--model_result_final", model_result_final],
    inputs=[model_input, model_result_partial, model_result_list],
    outputs=[model_result_final],
    compute_target=aml_compute, 
    source_directory=source_directory,
    runconfig=run_config
)

# Create the Pipeline

In [None]:
pipeline = Pipeline(workspace=ws, steps=[reduce_step, parition_step, solve_step, merge_step])
print("Pipeline is built")

pipeline_run = Experiment(ws, 'optimization_example').submit(pipeline)
print("Pipeline is submitted for execution")

RunDetails(pipeline_run).show()

pipeline_run.wait_for_completion(show_output=True)