In [None]:
from azureml.core import Workspace 

ws = Workspace.from_config()

print('Workspace Name: ' + ws.name, 
      'Azure Region: ' + ws.location, 
      'Subscription Id: ' + ws.subscription_id, 
      'Resource Group: ' + ws.resource_group, sep='\n')

In [None]:
from azureml.core.compute import AmlCompute, ComputeTarget

compute = AmlCompute(ws, 'cluster20nodes')

In [None]:
from azureml.core import Experiment

experiment = Experiment(ws, 'forecasting-pipeline')

In [None]:
from azureml.core import Datastore

dstore = ws.get_default_datastore()

In [None]:
from azureml.core.dataset import Dataset

small_dataset = Dataset.get_by_name(ws, name='oj_data_small')
small_dataset_input = small_dataset.as_named_input('forecast_10_models')

In [None]:
from azureml.core import Environment
from azureml.core.runconfig import DEFAULT_CPU_IMAGE
from azureml.core.conda_dependencies import CondaDependencies

forecast_env = Environment(name="many_models_environment")
forecast_conda_deps = CondaDependencies.create(pip_packages=['sklearn', 'pmdarima'])
forecast_env.python.conda_dependencies = forecast_conda_deps
forecast_env.docker.enabled = True
forecast_env.docker.base_image = DEFAULT_CPU_IMAGE

In [None]:
from azureml.contrib.pipeline.steps import ParallelRunStep, ParallelRunConfig 

process_count_per_node = 8
node_count = 5
timeout = 500

tags = {}
tags['node_count'] = node_count
tags['process_count_per_node'] = process_count_per_node
tags['timeout'] = timeout 

parallel_run_config = ParallelRunConfig(
    source_directory='./scripts',
    entry_script='forecast.py',
    mini_batch_size='1',
    run_invocation_timeout=timeout, 
    error_threshold=10,
    output_action='append_row', 
    environment=forecast_env, 
    process_count_per_node=process_count_per_node, 
    compute_target=compute, 
    node_count=node_count
)

In [None]:
from azureml.pipeline.core import PipelineData

datasetname = 'stores_filedatasets'
output_dir = PipelineData(name='forecasting_output', 
                         datastore=dstore)

parallelrun_step = ParallelRunStep(
    name="many-models-forecasting",
    parallel_run_config=parallel_run_config,
    inputs=[small_dataset_input],
    output=output_dir,
    arguments=['--forecast_horizon', 8,
              '--starting_date', '1992-10-01'])

In [None]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.runconfig import CondaDependencies

run_config = RunConfiguration(framework="python")
run_config.target = compute
cd = CondaDependencies.create(pip_packages=['azureml-pipeline-core'], conda_packages=['pandas'])
run_config.environment.python.conda_dependencies = cd

In [None]:
from azureml.pipeline.core import Pipeline

pipeline = Pipeline(workspace = ws, steps=[parallelrun_step])
run = experiment.submit(pipeline, tags=tags)

In [None]:
# run.wait_for_completion(show_output=True)

In [None]:
published_pipeline = pipeline.publish(name='forecast_many_models',
                                     description='forecast many models and log the run',
                                     version='1',
                                     continue_on_step_failure=False)

In [None]:
from azureml.pipeline.core import Schedule, ScheduleRecurrence
    
forecast_pipeline_id = published_pipeline.id

recurrence = ScheduleRecurrence(frequency="Week", interval=1, start_time="2020-01-01T13:00:00")
recurring_schedule = Schedule.create(ws, name="Forecasting-Pipeline-Recurring-Schedule", 
                            description="Schedule forecasting Pipeline to run on the first day of every month starting Jan 1, 2020 at 1PM",
                            pipeline_id=forecast_pipeline_id, 
                            experiment_name=experiment.name, 
                            recurrence=recurrence)