# 1.0 Load libraries

In [None]:
import os
from dotenv import load_dotenv, find_dotenv

import azureml.core
from azureml.core.authentication import AzureCliAuthentication
from azureml.core import Workspace, Experiment, Datastore, Environment, Dataset
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails

from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE

from azureml.pipeline.core import Pipeline, PipelineParameter, PipelineData
from azureml.pipeline.steps import PythonScriptStep
from azureml.data.datapath import DataPath, DataPathComputeBinding
from azureml.data import OutputFileDatasetConfig

from azureml.data.data_reference import DataReference

from azureml.contrib.pipeline.steps import ParallelRunStep, ParallelRunConfig

# 1.1 Setup some environment
## 1.1.1 Load variables

In [None]:
load_dotenv(find_dotenv('../.env'))

ws_name = os.environ['AML_WORKSPACE_NAME']
subscription_id = os.environ['AML_SUBSCRIPTION_ID']
resource_group = os.environ['AML_RESOURCE_GROUP']
tenant_id = os.environ['AML_TENANT_ID']
min_nodes = int(os.environ['AML_MIN_NODES'])
max_nodes = int(os.environ['AML_MAX_NODES'])
aml_compute_target = os.environ['AML_COMPUTE_NAME']

model_input_path = os.environ['MODEL_INPUT_PATH']
order_file = os.environ['MODEL_INPUT_ORDER_FILE']
distance_file = os.environ['MODEL_INPUT_DISTANCE_FILE']
model_output_path = os.environ['MODEL_OUTPUT_PATH']

print('---- Check Azure setting ----')
print(f'AML Workspace name       : {ws_name}')
print(f'Subscription ID          : {subscription_id}')
print(f'Resource group           : {resource_group}')
print(f'tenant id                : {tenant_id}')
print(f'min nodes of AML compute : {min_nodes}')
print(f'max nodes of AML compute : {max_nodes}')
print(f'AML compute target       : {aml_compute_target}')
print(f'Input path for models    : {model_input_path}')
print(f'Model input order file   : {order_file}')
print(f'Model input distance file: {distance_file}')
print(f'Input output             : {model_output_path}')

# 1.1.2 Azure authentication and Load Azure ML Workspace

In [None]:
#!az login
!az login --use-device-code

In [None]:
cli_auth = AzureCliAuthentication()
ws =  Workspace.get(name=ws_name
                    ,subscription_id=subscription_id
                    ,resource_group=resource_group
                    ,auth=cli_auth)

print(ws.get_details())

# 1.1.3 Get Compute Cluster

In [None]:
# Retrieve or create an Aml compute
aml_compute_target = os.environ['AML_COMPUTE_NAME']
try:
    aml_compute = AmlCompute(ws, aml_compute_target)
    print("Found existing compute target.")
except ComputeTargetException:
    print("Creating new compute target")
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = min_nodes, 
                                                                max_nodes = max_nodes)    
    aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

# 1.1.4 Create Run Configuration

In [None]:
# Default datastore (Azure blob storage)
def_blob_store = ws.get_default_datastore()

# source directory
source_directory = '../src/'
print(f'Source code is in {source_directory} directory.')

In [None]:
# create a new runconfig object
run_config = RunConfiguration()

# environment
env = Environment('op-env')

# enable Docker 
env.docker.enabled = True
# set Docker base image to the default CPU-based image
env.docker.base_image = DEFAULT_CPU_IMAGE
# use conda_dependencies.yml to create a conda environment in the Docker image for execution
env.python.user_managed_dependencies = False
# specify CondaDependencies obj
env.python.conda_dependencies = CondaDependencies.create(
        conda_packages=['pandas']
        ,pip_packages=['ortools'
                        ,'azureml-sdk']
    )

# set environment
run_config.environment = env

# 1.2 Set up Azure ML Pipeline
## 1.2.1 Reduce the search space of the problem

In [None]:
## Order file
orderpath = DataPath(datastore=def_blob_store, 
                path_on_datastore='model_input/order_small.csv')
order_input = (PipelineParameter(name="order_data", default_value=orderpath),
                           DataPathComputeBinding(mode='mount'))

## Distance file
datapath = DataPath(datastore=def_blob_store, 
                path_on_datastore='model_input/distance.csv')
distance_input = (PipelineParameter(name="distance_data", default_value=datapath),
                           DataPathComputeBinding(mode='mount'))

## Intermediate files
model_result_partial = OutputFileDatasetConfig(destination=(def_blob_store, 'reduced_result/'))
model_input_reduced = OutputFileDatasetConfig(destination=(def_blob_store, 'reduced_result/'))

In [None]:
reduce_step = PythonScriptStep(
    script_name="reduce.py",
    source_directory=source_directory,
    arguments=["--model_input", order_input
            ,"--distance", distance_input
            ,"--model_result_partial", model_result_partial
            ,"--model_input_reduced", model_input_reduced
            ],
    inputs=[order_input, distance_input],
    outputs=[model_result_partial
            , model_input_reduced],
    compute_target=aml_compute,
    runconfig=run_config,
    allow_reuse=True
)

In [None]:
pipeline = Pipeline(workspace=ws, steps=[reduce_step])
print("Pipeline is built")

pipeline_run = Experiment(ws, 'optimization_example_reduce_kyiwasak').submit(pipeline)
print("Pipeline is submitted for execution")

#RunDetails(pipeline_run).show()

pipeline_run.wait_for_completion(show_output=False)

# 1.2.2 Partition the problem

In [None]:
# Naming the intermediate data 
model_input_list = OutputFileDatasetConfig(destination=(def_blob_store, 'partition_result/'))

parition_step = PythonScriptStep(
    script_name="partition.py", 
    arguments=["--model_input_reduced", model_input_reduced
            ,"--distance", distance_input
            , "--model_input_list", model_input_list],
    inputs=[model_input_reduced, distance_input],
    outputs=[model_input_list],
    compute_target=aml_compute, 
    source_directory=source_directory,
    runconfig=run_config
)

In [None]:
pipeline = Pipeline(workspace=ws, steps=[reduce_step, parition_step])
print("Pipeline is built")

pipeline_run = Experiment(ws, 'optimization_example_partition_kyiwasak').submit(pipeline)
print("Pipeline is submitted for execution")

pipeline_run.wait_for_completion(show_output=True)

## 1.2.3 (Under Construction) Solve individual problem

In [None]:
# Naming the intermediate data 
#model_result_list = PipelineData("model_result_list", datastore=def_blob_store).as_dataset()

model_result_list = OutputFileDatasetConfig(destination=(def_blob_store, 'solve_result/'))

parallel_run_config = ParallelRunConfig(
    source_directory=source_directory,
    entry_script='solve.py',
    mini_batch_size="1",
    error_threshold=1,
    output_action="append_row",
    environment=env,
    compute_target=aml_compute,
    node_count=max_nodes)

solve_step = ParallelRunStep(
    name="solve",
    inputs=[model_input_list, distance_input],
    output=model_result_list,
    parallel_run_config=parallel_run_config,
    allow_reuse=False
)

In [None]:
pipeline = Pipeline(workspace=ws, steps=[reduce_step, parition_step, solve_step])
print("Pipeline is built")

pipeline_run = Experiment(ws, 'optimization_example_solve_kyiwasak').submit(pipeline)
print("Pipeline is submitted for execution")

RunDetails(pipeline_run).show()

pipeline_run.wait_for_completion(show_output=True)

## 1.2.4 (Under Construction) Merge the results

In [None]:
# Naming the intermediate data 
model_result_final = PipelineData("model_result_final",datastore=def_blob_store)

merge_step = PythonScriptStep(
    script_name="merge.py", 
    arguments=["--model_input", model_input, "--model_result_partial", model_result_partial, "--model_result_list", model_result_list, "--model_result_final", model_result_final],
    inputs=[model_input, model_result_partial, model_result_list],
    outputs=[model_result_final],
    compute_target=aml_compute, 
    source_directory=source_directory,
    runconfig=run_config
)

# 1.3 (Under Construction) Create the Pipeline

In [None]:
pipeline = Pipeline(workspace=ws, steps=[reduce_step, parition_step, solve_step, merge_step])
print("Pipeline is built")

pipeline_run = Experiment(ws, 'optimization_example_kyiwasak').submit(pipeline)
print("Pipeline is submitted for execution")

RunDetails(pipeline_run).show()

pipeline_run.wait_for_completion(show_output=True)