In [77]:
import azureml.core
from azureml.core import Workspace, Datastore, Dataset, Experiment

mlonazure_ws = Workspace.from_config()

mlonazure_ds = mlonazure_ws.get_default_datastore()

print('Workspace Name: ' + mlonazure_ws.name, 
      'Resource Group: ' + mlonazure_ws.resource_group,
      'Default Storage Account Name: ' + mlonazure_ds.account_name,
      'AzureML Core Version: ' + azureml.core.VERSION,
      sep = '\n')

Workspace Name: mlonazure-ws
Resource Group: mlonazure-rg
Default Storage Account Name: mlonazurews7669880942
AzureML Core Version: 1.15.0


In [78]:
from azureml.core.compute import AmlCompute

compute_target = AmlCompute(mlonazure_ws, 'amlcluster2c-4gb')

print('Compute Name:', compute_target.name)

Compute Name: amlcluster2c-4gb


In [79]:
#Note that you need to run ../00. Get Data from OpenDatasets.ipynb to get and register this dataset.
datasetName_train = 'OJSales_Train_Subset'
datasetName_validate = 'OJSales_Validate_Subset'

data_train_ds = Dataset.get_by_name(mlonazure_ws,datasetName_train,version='latest')
data_validate_ds = Dataset.get_by_name(mlonazure_ws,datasetName_validate,version='latest')

In [80]:
pdtrain = data_train_ds.to_pandas_dataframe()

In [81]:
pdtrain.head(10)

Unnamed: 0,Column1,WeekStarting,Store,Brand,Quantity,Advert,Price,Revenue
0,1990-06-14,1990-06-14,1000,dominicks,12003,1,2.59,31087.77
1,1990-06-21,1990-06-21,1000,dominicks,10239,1,2.39,24471.21
2,1990-06-28,1990-06-28,1000,dominicks,17917,1,2.48,44434.16
3,1990-07-05,1990-07-05,1000,dominicks,14218,1,2.33,33127.94
4,1990-07-12,1990-07-12,1000,dominicks,15925,1,2.01,32009.25
5,1990-07-19,1990-07-19,1000,dominicks,17850,1,2.17,38734.5
6,1990-07-26,1990-07-26,1000,dominicks,10576,1,1.97,20834.72
7,1990-08-02,1990-08-02,1000,dominicks,9912,1,2.26,22401.12
8,1990-08-09,1990-08-09,1000,dominicks,9571,1,2.11,20194.81
9,1990-08-16,1990-08-16,1000,dominicks,15748,1,2.42,38110.16


In [82]:
from azureml.core.workspace import Workspace
from azureml.core.experiment import Experiment
from azureml.train.automl import AutoMLConfig
from azureml.automl.core.forecasting_parameters import ForecastingParameters

import logging

target_column_name = 'Quantity'
dateColumn = 'WeekStarting'

training_data = data_train_ds

forecastingParams = ForecastingParameters(
    time_column_name=dateColumn,
    forecast_horizon=24,
    time_series_id_column_names=['Store', 'Brand'])

automl_config = AutoMLConfig(task='forecasting',
                             path = './project',
                             debug_log='automl_debuglog.log',
                             primary_metric='r2_score',
                             iteration_timeout_minutes = 20,
                             experiment_timeout_hours=1,
                             featurization='auto',
                             max_concurrent_iterations=15, #100 is the max but its one per node so max should be set to the max nodes.
                             max_cores_per_iteration=-1,
                             enable_dnn=False,
                             enable_early_stopping=True,
                             n_cross_validations=3,
                             verbosity = logging.INFO,                             
                             compute_target=compute_target,
                             training_data=data_train_ds,
                             label_column_name=target_column_name,
                             forecasting_parameters = forecastingParams,
                             model_explainability=True)

In [83]:
from azureml.core import Experiment 

experiment_name = 'CETA_DemandForecasting_Training'
experiment = Experiment(workspace = mlonazure_ws, name = experiment_name)

experiment.tag('Author', 'mlonazure')
experiment.tag('Industry', 'Retail')
experiment.tag('Executor', 'Manual')

exp_pipelinerun = experiment.submit(automl_config)

exp_pipelinerun.wait_for_completion()

Running on remote.


{'runId': 'AutoML_7fdc4b39-2110-43c5-9d17-b954d53e58e8',
 'target': 'amlcluster2c-4gb',
 'status': 'Completed',
 'startTimeUtc': '2020-10-29T22:59:42.932499Z',
 'endTimeUtc': '2020-10-29T23:20:16.154417Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'r2_score',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '3',
  'target': 'amlcluster2c-4gb',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"7e5f39f1-37c9-48e3-b8eb-5fa1355c890f\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"workspaceblobstore\\\\\\", \\\\\\"path\\\\\\": \\\\\\"MyDatasets/OJSales_Subset/OJSales_Train_Subset.csv\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"mlonazure-rg\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"286d2a8f-e90b-4e3f-b4d7-0