In [None]:
from azureml.core import Workspace 

ws = Workspace.from_config()

print('Workspace Name: ' + ws.name, 
      'Azure Region: ' + ws.location, 
      'Subscription Id: ' + ws.subscription_id, 
      'Resource Group: ' + ws.resource_group, sep='\n')

In [None]:
from azureml.core import Experiment

experiment = Experiment(ws, 'forecasting_pipeline')

In [None]:
from azureml.core import Datastore

dstore = ws.get_default_datastore()

In [None]:
from azureml.core.dataset import Dataset

small_dataset = Dataset.get_by_name(ws, name='oj_data_small')
small_dataset_input = small_dataset.as_named_input('forecast_10_models')

In [None]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

forecast_env = Environment(name="many_models_environment")
forecast_conda_deps = CondaDependencies.create(pip_packages=['sklearn', 'pmdarima'])
forecast_env.python.conda_dependencies = forecast_conda_deps

In [None]:
from azureml.contrib.pipeline.steps import ParallelRunStep, ParallelRunConfig 
from azureml.core.compute import AmlCompute

process_count_per_node = 8
node_count = 5
timeout = 500

tags = {}
tags['node_count'] = node_count
tags['process_count_per_node'] = process_count_per_node
tags['timeout'] = timeout

compute = AmlCompute(ws, "cpucluster")

parallel_run_config = ParallelRunConfig(
    source_directory='./scripts',
    entry_script='forecast.py',
    mini_batch_size='1',
    run_invocation_timeout=timeout, 
    error_threshold=10,
    output_action='append_row', 
    environment=forecast_env, 
    process_count_per_node=process_count_per_node, 
    compute_target=compute, 
    node_count=node_count
)

In [None]:
from azureml.data.data_reference import DataReference
from azureml.pipeline.core import PipelineData
from azureml.pipeline.steps import PythonScriptStep

output_dir = PipelineData(name='output_dir', datastore=dstore)

parallelrun_step = ParallelRunStep(
    name="many-models-forecasting",
    parallel_run_config=parallel_run_config,
    inputs=[small_dataset_input],
    output=output_dir,
    arguments=['--forecast_horizon', 8,
              '--starting_date', '1992-10-01']
)

output_dstore = Datastore.register_azure_blob_container(
    workspace=ws, 
    datastore_name="predictions",
    container_name="predictions",
    account_name=dstore.account_name,
    account_key=dstore.account_key,
    create_if_not_exists=True
)

output_dref = DataReference(output_dstore)
upload_predictions_step = PythonScriptStep(
    name="copy_predictions",
    script_name="copy_predictions.py",
    compute_target=compute,
    source_directory='./scripts',
    inputs=[output_dref, output_dir],
    arguments=['--parallel_run_step_output', output_dir,
              '--output_dir', output_dref]
)

In [None]:
from azureml.pipeline.core import Pipeline

pipeline = Pipeline(workspace=ws, steps=[parallelrun_step, upload_predictions_step])
run = experiment.submit(pipeline, tags=tags)

In [None]:
run.wait_for_completion(show_output=True)