In [1]:
import azureml.core
from azureml.core import Workspace, Datastore, Dataset

mlonazure_ws = Workspace.from_config()

mlonazure_ds = mlonazure_ws.get_default_datastore()

print('Workspace Name: ' + mlonazure_ws.name, 
      'Resource Group: ' + mlonazure_ws.resource_group,
      'Default Storage Account Name: ' + mlonazure_ds.account_name,
      'AzureML Core Version: ' + azureml.core.VERSION,
      sep = '\n')

Workspace Name: mlonazure-ws
Resource Group: mlonazure-rg
Default Storage Account Name: mlonazurews7669880942
AzureML Core Version: 1.3.0


In [2]:
from azureml.core.compute import AmlCompute

compute_target = AmlCompute(mlonazure_ws, 'amlcluster2c-4gb')

print('Compute Name:', compute_target.name)

Compute Name: amlcluster2c-4gb


In [3]:
#Note that you need to run ../00. Get Data from OpenDatasets.ipynb to get and register this dataset.
datasetName_train = 'OJSales_Train_Subset'
datasetName_validate = 'OJSales_Validate_Subset'

data_train_ds = Dataset.get_by_name(mlonazure_ws,datasetName_train,version='latest')
data_validate_ds = Dataset.get_by_name(mlonazure_ws,datasetName_validate,version='latest')

In [40]:
from azureml.core.workspace import Workspace
from azureml.core.experiment import Experiment
from azureml.train.automl import AutoMLConfig
from azureml.pipeline.core import PipelineParameter
import logging

target_column_name = 'Quantity'
dateColumn = 'WeekStarting'

max_horizon_param = PipelineParameter(name='max_horizon', default_value=30)


mymaxhorizon = 6

training_data = data_train_ds

automl_settings = {
    "max_horizon": mymaxhorizon
}

automl_config = AutoMLConfig(task='forecasting',
                             path = './project',
                             debug_log='automl_debuglog.log',
                             primary_metric='r2_score',
                             iteration_timeout_minutes = 120,
                             experiment_timeout_hours=10,
                             featurization='auto',
                             max_concurrent_iterations=20, #100 is the max but its one per node so max should be set to the max nodes.
                             max_cores_per_iteration=-1,
                             enable_dnn=False,
                             enable_early_stopping=True,
                             n_cross_validations=3,
                             verbosity = logging.INFO,                             
                             compute_target=compute_target,
                             training_data=training_data,
                             label_column_name=target_column_name,
                             time_column_name = dateColumn,
                             grain_column_names = ['Store', 'Brand'],
                             #drop_column_names = ['',''],
                             model_explainability=True,
                            **automl_settings)

In [41]:
from azureml.pipeline.core import PipelineData, TrainingOutput, PipelineDataset
from azureml.pipeline.steps import AutoMLStep

metrics_data = PipelineData(name='metrics_data',
                           datastore=mlonazure_ds,
                           pipeline_output_name='metrics_output_name',
                           training_output=TrainingOutput(type='Metrics'))

model_data_bestmodel = PipelineData(name='model_data_bestmodel',
                           datastore=mlonazure_ds,
                           pipeline_output_name='best_model_output_name',
                           training_output=TrainingOutput(type='Model'))

model_data_normalizedrmse = PipelineData(name='model_data_normalizedrmse',
                           datastore=mlonazure_ds,
                           pipeline_output_name='nrmse_model_output_name',
                           training_output=TrainingOutput(type='Model',metric='normalized_root_mean_squared_error'))

automl_step = AutoMLStep(name='TrainOJSalesData_AutoML',
                            automl_config=automl_config,
                            inputs=None,
                            outputs=[metrics_data, model_data_bestmodel,model_data_normalizedrmse],
                            allow_reuse=False,
                            version=None,
                            passthru_automl_config=True, #I set it to false so that the step verifies the parameters are correct
                            script_repl_params={'mymaxhorizon': max_horizon_param, 'max_concurrent_iterations': '10','max_horizon':max_horizon_param}
                        )

In [42]:
from azureml.pipeline.core import Pipeline
from azureml.pipeline.steps import PythonScriptStep

source_directory = 'Scripts'
model_name = 'OJForecastingModel'
model_description = 'Built using AutoML'
#registeredModelInput = InputPortBinding("bestmodel", bind_object=model_data_bestmodel)

pyscriptStepRegisterModel = PythonScriptStep(name="pystepRegisterModel",
                                             script_name="registermodel.py",
                                             arguments=["--model_path", model_data_bestmodel, 
                                                        "--model_name", model_name, 
                                                        "--model_description",model_description],
                                             inputs=[model_data_bestmodel],
                                             compute_target=compute_target, 
                                             source_directory=source_directory,
                                             allow_reuse=False)

print("pyscriptStepRegisterModel Script Step Created")

pyscriptStepRegisterModel Script Step Created


In [43]:
from azureml.pipeline.core import Pipeline

amlpipeline = Pipeline(workspace=mlonazure_ws, 
         steps=[automl_step,pyscriptStepRegisterModel],
         description= 'ForecastOJSales Training Pipeline!')

In [44]:
from azureml.core import Experiment 

experiment_name = 'OJ_Sales_Quantity_Prediction_Pipeline'
experiment = Experiment(workspace = mlonazure_ws, name = experiment_name)

experiment.tag('Author', 'mlonazure')
experiment.tag('Department', 'Finance')
experiment.tag('Executor', 'Manual')

exp_pipelinerun = experiment.submit(amlpipeline)

exp_pipelinerun.wait_for_completion()

Created step TrainOJSalesData_AutoML [78aeac88][47abeaff-3ec8-42f3-88c0-16aa8401ed86], (This step will run and generate new outputs)
Created step pystepRegisterModel [0e5f35d9][31dbdf24-c178-4c64-ac52-5c98be8a6d02], (This step will run and generate new outputs)
Submitted PipelineRun 558de722-7775-4789-ad30-556f29af582e
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/OJ_Sales_Quantity_Prediction_Pipeline/runs/558de722-7775-4789-ad30-556f29af582e?wsid=/subscriptions/286d2a8f-e90b-4e3f-b4d7-08971a2ab3fe/resourcegroups/mlonazure-rg/workspaces/mlonazure-ws
PipelineRunId: 558de722-7775-4789-ad30-556f29af582e
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/OJ_Sales_Quantity_Prediction_Pipeline/runs/558de722-7775-4789-ad30-556f29af582e?wsid=/subscriptions/286d2a8f-e90b-4e3f-b4d7-08971a2ab3fe/resourcegroups/mlonazure-rg/workspaces/mlonazure-ws
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: 1ee2fdfb-4e72-41d4-af5c-239c16107

ExperimentExecutionException: ExperimentExecutionException:
	Message: The output streaming for the run interrupted.
But the run is still executing on the compute target. 
Details for canceling the run can be found here: https://aka.ms/aml-docs-cancel-run
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "The output streaming for the run interrupted.\nBut the run is still executing on the compute target. \nDetails for canceling the run can be found here: https://aka.ms/aml-docs-cancel-run"
    }
}

In [None]:
amlpipeline.publish(
        name='OJSalesTrainingPipeline', 
        description='AML Pipeline for automation', 
        version=None, 
        continue_on_step_failure=False)