In [None]:
from azureml.core import Workspace

# set up workspace
ws = Workspace.from_config()

# Take a look at Workspace
ws.get_details()

# set up datastores
dstore = ws.get_default_datastore()

print('Workspace Name: ' + ws.name, 
      'Azure Region: ' + ws.location, 
      'Subscription Id: ' + ws.subscription_id, 
      'Resource Group: ' + ws.resource_group, 
      sep = '\n')

In [None]:
from azureml.core import Experiment

experiment = Experiment(ws, 'github_training_pipeline')

print('Experiment name: ' + experiment.name)

In [None]:
from azureml.core.dataset import Dataset

dataset_name = 'oj_data_small'
small_dataset = Dataset.get_by_name(ws, name=dataset_name)
small_dataset_input = small_dataset.as_named_input('oj_dataset')

In [None]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

train_env = Environment(name="many_models_environment")
train_conda_deps = CondaDependencies.create(pip_packages=['sklearn', 'pmdarima'])
train_env.python.conda_dependencies = train_conda_deps

In [None]:
from azureml.contrib.pipeline.steps import ParallelRunConfig
from azureml.core.compute import AmlCompute

compute = AmlCompute(ws, "cpucluster")
process_count_per_node = 8
node_count = 5
timeout = 500

tags = {}
tags['dataset_name'] = dataset_name
tags['node_count'] = node_count
tags['process_count_per_node'] = process_count_per_node
tags['timeout'] = timeout

parallel_run_config = ParallelRunConfig(
    source_directory='./scripts',
    entry_script='train.py',
    mini_batch_size="1",
    run_invocation_timeout=timeout,
    error_threshold=10,
    output_action="append_row",
    environment=train_env,
    process_count_per_node=process_count_per_node,
    compute_target=compute,
    node_count=node_count)

In [None]:
from azureml.pipeline.core import PipelineData

output_dir = PipelineData(name="training_output", 
                          datastore=dstore)

In [None]:
from azureml.contrib.pipeline.steps import ParallelRunStep

parallel_run_step = ParallelRunStep(
    name="many-models-training",
    parallel_run_config=parallel_run_config,
    inputs=[small_dataset_input],
    output=output_dir,
    arguments=['--target_column', 'Quantity', 
               '--n_test_periods', 6, 
               '--timestamp_column', 'WeekStarting', 
               '--stepwise_training', True])

In [None]:
from azureml.pipeline.core import Pipeline
from azureml.widgets import RunDetails

pipeline = Pipeline(workspace=ws, steps=[parallel_run_step])
run = experiment.submit(pipeline,tags=tags)
RunDetails(run).show()

In [None]:
# run.wait_for_completion(show_output=True)

In [None]:
published_pipeline = pipeline.publish(name='train_many_models',
                                     description='train many models',
                                     version='1',
                                     continue_on_step_failure=False)

In [None]:
from azureml.pipeline.core import Schedule, ScheduleRecurrence
    
training_pipeline_id = published_pipeline.id

recurrence = ScheduleRecurrence(frequency="Month", interval=1, start_time="2020-01-01T09:00:00")
recurring_schedule = Schedule.create(ws, name="training_pipeline_recurring_schedule", 
                            description="Schedule Training Pipeline to run on the first day of every month starting Jan 1, 2020 at 9AM",
                            pipeline_id=training_pipeline_id, 
                            experiment_name=experiment.name, 
                            recurrence=recurrence)