# 1.0 Load libraries

In [33]:
import os
from dotenv import load_dotenv, find_dotenv

import azureml.core
from azureml.core.authentication import AzureCliAuthentication
from azureml.core import Workspace, Experiment, Datastore, Environment, Dataset
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails

from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE

from azureml.pipeline.core import Pipeline, PipelineParameter, PipelineData
from azureml.pipeline.steps import PythonScriptStep

from azureml.data.data_reference import DataReference

from azureml.contrib.pipeline.steps import ParallelRunStep, ParallelRunConfig

# 1.1 Setup some environment
## 1.1.1 Load variables

In [42]:
load_dotenv(find_dotenv('../.env'))

ws_name = os.environ['AML_WORKSPACE_NAME']
subscription_id = os.environ['AML_SUBSCRIPTION_ID']
resource_group = os.environ['AML_RESOURCE_GROUP']
tenant_id = os.environ['AML_TENANT_ID']
min_nodes = int(os.environ['AML_MIN_NODES'])
max_nodes = int(os.environ['AML_MAX_NODES'])

aml_compute_target = os.environ['AML_COMPUTE_NAME']
model_input_path = os.environ['MODEL_INPUT_PATH'] 

order_file = os.environ['MODEL_INPUT_ORDER_FILE']
distance_file = os.environ['MODEL_INPUT_DISTANCE_FILE']

print('---- Check Azure setting ----')
print(f'AML Workspace name       : {ws_name}')
print(f'Subscription ID          : {subscription_id}')
print(f'Resource group           : {resource_group}')
print(f'tenant id                : {tenant_id}')
print(f'min nodes of AML compute : {min_nodes}')
print(f'max nodes of AML compute : {max_nodes}')
print(f'AML compute target       : {aml_compute_target}')
print(f'Input path for models    : {model_input_path}')
print(f'Model input order file   : {order_file}')
print(f'Model input distance file: {distance_file}')


---- Check Azure setting ----
AML Workspace name       : amlrouteoptimization
Subscription ID          : 0b3f04a9-6375-4341-a513-dd53731a99a4
Resource group           : dstoolkit-route-optimization
tenant id                : 72f988bf-86f1-41af-91ab-2d7cd011db47
min nodes of AML compute : 0
max nodes of AML compute : 4
AML compute target       : aml-compute-cluster
Input path for models    : models
Model input order file   : model_input_orders
Model input distance file: model_input_distance


# 1.1.2 Azure authentication and Load Azure ML Workspace

In [35]:
#!az login
!az login --use-device-code

[93mTo sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code PD8N3EE2F to authenticate.[0m
^C


In [36]:
cli_auth = AzureCliAuthentication()
ws =  Workspace.get(name=ws_name
                    ,subscription_id=subscription_id
                    ,resource_group=resource_group
                    ,auth=cli_auth)

print(ws.get_details())

{'id': '/subscriptions/0b3f04a9-6375-4341-a513-dd53731a99a4/resourceGroups/dstoolkit-route-optimization/providers/Microsoft.MachineLearningServices/workspaces/amlrouteoptimization', 'name': 'amlrouteoptimization', 'identity': {'principal_id': '6a398b87-cd4d-4ff3-b66f-344edbdd9bd5', 'tenant_id': '72f988bf-86f1-41af-91ab-2d7cd011db47', 'type': 'SystemAssigned'}, 'location': 'australiaeast', 'type': 'Microsoft.MachineLearningServices/workspaces', 'tags': {}, 'sku': 'Basic', 'workspaceid': 'bb87308f-2811-4bdc-8c73-339822d22a39', 'sdkTelemetryAppInsightsKey': 'bd22fe07-ca7e-4337-bd98-3d5116261c1f', 'description': '', 'friendlyName': 'amlrouteoptimization', 'keyVault': '/subscriptions/0b3f04a9-6375-4341-a513-dd53731a99a4/resourceGroups/dstoolkit-route-optimization/providers/Microsoft.Keyvault/vaults/amlrouteoptimi7471905268', 'applicationInsights': '/subscriptions/0b3f04a9-6375-4341-a513-dd53731a99a4/resourceGroups/dstoolkit-route-optimization/providers/Microsoft.insights/components/amlroute

# 1.1.3 Get Compute Cluster

In [37]:
# Retrieve or create an Aml compute
aml_compute_target = os.environ['AML_COMPUTE_NAME']
try:
    aml_compute = AmlCompute(ws, aml_compute_target)
    print("found existing compute target.")
except ComputeTargetException:
    print("creating new compute target")
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = min_nodes, 
                                                                max_nodes = max_nodes)    
    aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

found existing compute target.


# 1.1.4 Create Run Configuration

In [38]:
# Default datastore (Azure blob storage)
def_blob_store = ws.get_default_datastore()

# source directory
source_directory = '../src'
    
print(f'Source code is in {source_directory} directory.')

Source code is in ../src directory.


In [39]:
# create a new runconfig object
run_config = RunConfiguration()

# environment
env = Environment('op-env')

# enable Docker 
env.docker.enabled = True
# set Docker base image to the default CPU-based image
env.docker.base_image = DEFAULT_CPU_IMAGE
# use conda_dependencies.yml to create a conda environment in the Docker image for execution
env.python.user_managed_dependencies = False
# specify CondaDependencies obj
env.python.conda_dependencies = CondaDependencies.create(conda_packages=['or-tools'])

# set environment
run_config.environment = env

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


# 1.2 Set up Azure ML Pipeline
## 1.2.1 Reduce the search space of the problem

In [43]:
model_input = DataReference(
    datastore=def_blob_store,
    data_reference_name="order",
    path_on_datastore=order_file)

# The pair-wise distance between cities
distance = DataReference(
    datastore=def_blob_store,
    data_reference_name="distance",
    path_on_datastore=distance_file)

# Naming the intermediate data 
model_result_partial = PipelineData("model_result_partial",datastore=def_blob_store)
model_input_reduced = PipelineData("model_input_reduced",datastore=def_blob_store)

reduce_step = PythonScriptStep(
    script_name="reduce.py", 
    arguments=["--model_input", model_input
            , "--model_result_partial", model_result_partial
            , "--model_input_reduced", model_input_reduced],
    inputs=[model_input],
    outputs=[model_result_partial
            , model_input_reduced],
    compute_target=aml_compute, 
    source_directory=source_directory,
    runconfig=run_config
)

# 1.2.2 Partition the problem

In [44]:
# Naming the intermediate data 
model_input_list = PipelineData("model_input_list",datastore=def_blob_store)

parition_step = PythonScriptStep(
    script_name="partiition.py", 
    arguments=["--model_input_reduced", model_input_reduced
            , "--model_input_list", model_input_list],
    inputs=[model_input_reduced],
    outputs=[model_input_list],
    compute_target=aml_compute, 
    source_directory=source_directory,
    runconfig=run_config
)

## 1.2.3 Solve individual problem

In [45]:
# Naming the intermediate data 
model_result_list = PipelineData("model_result_list", datastore=def_blob_store)

parallel_run_config = ParallelRunConfig(
    source_directory=source_directory,
    entry_script='solve.py',
    mini_batch_size="1",
    error_threshold=1,
    output_action="append_row",
    environment=env,
    compute_target=aml_compute,
    node_count=max_nodes)

In [46]:
solve_step = ParallelRunStep(
    name="solve",
    inputs=[model_input_list],    ## Which class to be used?
    output=model_result_list,
    parallel_run_config=parallel_run_config,
    allow_reuse=False
)

Exception: Step input must be of any type: dict_keys([<class 'azureml.data.tabular_dataset.TabularDataset'>, <class 'azureml.pipeline.core.pipeline_output_dataset.PipelineOutputTabularDataset'>, <class 'azureml.data.file_dataset.FileDataset'>, <class 'azureml.pipeline.core.pipeline_output_dataset.PipelineOutputFileDataset'>]), found <class 'azureml.pipeline.core.builder.PipelineData'>

## 1.2.4 Merge the results

In [None]:
# Naming the intermediate data 
model_result_final = PipelineData("model_result_final",datastore=def_blob_store)

merge_step = PythonScriptStep(
    script_name="merge.py", 
    arguments=["--model_input", model_input, "--model_result_partial", model_result_partial, "--model_result_list", model_result_list, "--model_result_final", model_result_final],
    inputs=[model_input, model_result_partial, model_result_list],
    outputs=[model_result_final],
    compute_target=aml_compute, 
    source_directory=source_directory,
    runconfig=run_config
)

# 1.3 Create the Pipeline

In [None]:
pipeline = Pipeline(workspace=ws, steps=[reduce_step, parition_step, solve_step, merge_step])
print("Pipeline is built")

pipeline_run = Experiment(ws, 'optimization_example').submit(pipeline)
print("Pipeline is submitted for execution")

RunDetails(pipeline_run).show()

pipeline_run.wait_for_completion(show_output=True)