#### This notebook is heavily based off: https://github.com/Azure/azureml-examples/blob/main/v1/python-sdk/tutorials/automl-with-azureml/forecasting-hierarchical-timeseries/auto-ml-forecasting-hierarchical-timeseries.ipynb
#### Use the two in conjunction

#### Install required packages 

In [None]:
pip install azureml-train-automl --user
pip install azure-ai-ml azureml-core azure-identity azureml-mlflow mlflow
pip install azureml-dataset-runtime --upgrade
pip install azureml-contrib-automl-pipeline-steps
pip install pyOpenSSL
pip install cryptography==38.0.4 --user

In [1]:
# Import required libraries
import os
from azure.identity import DefaultAzureCredential
from azure.identity import AzureCliCredential
from azure.ai.ml import automl, Input, MLClient, command

from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.entities import Data


#### Authenticate via AzureCLI separately and then connect to workspace

In [29]:
import azureml.core
from azureml.core import Workspace, Datastore
import pandas as pd

# Set up your workspace
ws = Workspace.from_config()
ws.get_details()

# Set up your datastores
dstore = ws.get_default_datastore()

output = {}
output["SDK version"] = azureml.core.VERSION
output["Subscription ID"] = ws.subscription_id
output["Workspace"] = ws.name
output["Resource Group"] = ws.resource_group
output["Location"] = ws.location
output["Default datastore name"] = dstore.name
output["SDK Version"] = azureml.core.VERSION
pd.set_option("display.max_colwidth", None)
outputDf = pd.DataFrame(data=output, index=[""])
outputDf.T

Unnamed: 0,Unnamed: 1
SDK version,1.49.0
Subscription ID,d9412b06-e31c-4c66-b2c5-5e77beb91bc1
Workspace,demo-ws
Resource Group,demo-rg
Location,uksouth
Default datastore name,workspaceblobstore
SDK Version,1.49.0


#### Create experiment

In [3]:
from azureml.core import Experiment

experiment = Experiment(ws, "automl-hts")

print("Experiment name: " + experiment.name)

Experiment name: automl-hts


In [4]:
datastore_path = "hts-sample"

In [5]:
datastore = ws.get_default_datastore()
datastore

{
  "name": "workspaceblobstore",
  "container_name": "azureml-blobstore-8343b37d-a835-4e2d-8040-a061b9b90fba",
  "account_name": "demows8714441228",
  "protocol": "https",
  "endpoint": "core.windows.net"
}

#### Set up data for train and test, MLTables are created from the csvs below

In [6]:
from azureml.data.dataset_factory import TabularDatasetFactory

registered_train = TabularDatasetFactory.register_pandas_dataframe(
    pd.read_csv("Data/hts-sample-train.csv"),
    target=(datastore, "hts-sample"),
    name="hts-sales-train",
)
registered_inference = TabularDatasetFactory.register_pandas_dataframe(
    pd.read_csv("Data/hts-sample-test.csv"),
    target=(datastore, "hts-sample"),
    name="hts-sales-test",
)

Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to hts-sample/221ef6a1-4d9c-43e3-8140-a50ac15ae85c/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.
Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to hts-sample/0cf1ef30-6a50-4448-b7f8-a304809a0ebd/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


#### Find or create/configure compute

In [7]:
from azureml.core.compute import ComputeTarget, AmlCompute

# Name your cluster
compute_name = "hts-compute"


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print("Found compute target: " + compute_name)
else:
    print("Creating a new compute target...")
    provisioning_config = AmlCompute.provisioning_configuration(
        vm_size="STANDARD_DS12_V2", max_nodes=6
    )
    # Create the compute target
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)

    # Can poll for a minimum number of nodes and for a specific timeout.
    # If no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=2
    )

    # For a more detailed view of current cluster status, use the 'status' property
    print(compute_target.status.serialize())

Found compute target: hts-compute


#### Configure Forecasting parameters, AutoML settings and HTS parameters

In [8]:
from azureml.train.automl.runtime._hts.hts_parameters import HTSTrainParameters
from azureml.automl.core.forecasting_parameters import ForecastingParameters
from azureml.train.automl.automlconfig import AutoMLConfig


model_explainability = True

engineered_explanations = False
# Define your hierarchy. Adjust the settings below based on your dataset.
hierarchy = ["state", "store_id", "product_category", "SKU"]
training_level = "SKU"

# Set your forecast parameters. Adjust the settings below based on your dataset.
time_column_name = "date"
label_column_name = "quantity"
forecast_horizon = 7

forecasting_parameters = ForecastingParameters(
    time_column_name=time_column_name,
    forecast_horizon=forecast_horizon,
)

automl_settings = AutoMLConfig(
    task="forecasting",
    primary_metric="normalized_root_mean_squared_error",
    experiment_timeout_hours=1,
    label_column_name=label_column_name,
    track_child_runs=False,
    forecasting_parameters=forecasting_parameters,
    pipeline_fetch_max_batch_size=15,
    model_explainability=model_explainability,
    n_cross_validations="auto",  # Feel free to set to a small integer (>=2) if runtime is an issue.
    cv_step_size="auto",
    # The following settings are specific to this sample and should be adjusted according to your own needs.
    iteration_timeout_minutes=10,
    iterations=15,
)

hts_parameters = HTSTrainParameters(
    automl_settings=automl_settings,
    hierarchy_column_names=hierarchy,
    training_level=training_level,
    enable_engineered_explanations=engineered_explanations,
)

#### Configure training pipeline steps

In [14]:
from azureml.contrib.automl.pipeline.steps import AutoMLPipelineBuilder


training_pipeline_steps = AutoMLPipelineBuilder.get_many_models_train_steps(
    experiment=experiment,
    train_data=registered_train,
    compute_target=compute_target,
    node_count=1,
    process_count_per_node=8,
    train_pipeline_parameters=hts_parameters,
    run_invocation_timeout=3900,
)



A partitioned tabular dataset will be created with the name training after hts_raw_partitioned_1677581889. You may use it for future training.
Aggregation dataset is created with the name hts_agg_1677581889


In [15]:
from azureml.pipeline.core import Pipeline

training_pipeline = Pipeline(ws, steps=training_pipeline_steps)

In [16]:
training_run = experiment.submit(training_pipeline)

Created step hts-training-dataset-partition [301bfc38][01d2c796-bebb-41f0-b61c-f6ece57c735b], (This step will run and generate new outputs)
Created step hts-hierarchy-builder [d8dd7a66][dc328a63-75b6-4f5b-9e3b-680e70781c46], (This step will run and generate new outputs)
Created step hts-data-aggregation [5bb3f585][ac445ab4-9942-4492-90b9-cbbffbe6ead2], (This step will run and generate new outputs)
Created step hts-automl-training [49a77619][7a908da2-c3ea-48c0-861a-fc57bc6d97a5], (This step will run and generate new outputs)
Created step hts-proportions-calculation [7f015aed][d4e65533-027e-4cc1-b8dd-6f9f0b3ad536], (This step will run and generate new outputs)
Created step hts-explanation-allocation [09a19425][a0479697-9620-46f5-939e-95f775cebd09], (This step will run and generate new outputs)
Submitted PipelineRun e2be9091-7f3b-435a-b1a8-a98071a09ec3
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/e2be9091-7f3b-435a-b1a8-a98071a09ec3?wsid=/subscriptions/d9412b06-e31c-4c

In [17]:
training_run.wait_for_completion(show_output=False)

PipelineRunId: e2be9091-7f3b-435a-b1a8-a98071a09ec3
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/e2be9091-7f3b-435a-b1a8-a98071a09ec3?wsid=/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourcegroups/demo-rg/workspaces/demo-ws&tid=dd1f2c2d-fea2-4bb0-a462-dfeb75d6a2e7


'Finished'

#### Download model explanations locally

In [18]:
if model_explainability:
    expl_output = training_run.get_pipeline_output("explanations")
    expl_output.download("training_explanations")
else:
    print(
        "Model explanations are available only if model_explainability is set to True."
    )

In [19]:
import os

if model_explainability:
    explanations_dirrectory = os.listdir(
        os.path.join("training_explanations", "azureml")
    )
    if len(explanations_dirrectory) > 1:
        print(
            "Warning! The directory contains multiple explanations, only the first one will be displayed."
        )
    print("The explanations are located at {}.".format(explanations_dirrectory[0]))
    # Now we will list all the explanations.
    explanation_path = os.path.join(
        "training_explanations",
        "azureml",
        explanations_dirrectory[0],
        "training_explanations",
    )
    print("Available explanations")
    print("==============================")
    print("\n".join(os.listdir(explanation_path)))
else:
    print(
        "Model explanations are available only if model_explainability is set to True."
    )

The explanations are located at ec07a45e-83fb-462e-ad06-272949cdf215.
Available explanations
raw_explanations_AUTOML_TOP_LEVEL.csv
raw_explanations_product_category.csv
raw_explanations_SKU.csv
raw_explanations_state.csv
raw_explanations_store_id.csv


In [20]:
from IPython.display import display

explanation_type = "raw"
level = "state"

if model_explainability:
    display(
        pd.read_csv(
            os.path.join(explanation_path, "{}_explanations_{}.csv").format(
                explanation_type, level
            )
        )
    )

Unnamed: 0,state,date,price,quantity
0,CA,2.88,1.28,0.05
1,FL,4.88,4.37,49.47
2,WA,2.8,2.05,1.34


#### Setup inference params and compute needed

In [24]:
from azureml.train.automl.runtime._hts.hts_parameters import HTSInferenceParameters

inference_parameters = HTSInferenceParameters(
    hierarchy_forecast_level="store_id",  # The setting is specific to this dataset and should be changed based on your dataset.
    allocation_method="proportions_of_historical_average",
)

steps = AutoMLPipelineBuilder.get_many_models_batch_inference_steps(
    experiment=experiment,
    inference_data=registered_inference,
    compute_target=compute_target,
    inference_pipeline_parameters=inference_parameters,
    node_count=1,
    process_count_per_node=8,
)

The training run used for inference is e2be9091-7f3b-435a-b1a8-a98071a09ec3.




A partitioned tabular dataset will be created with the name inference after hts_raw_partitioned_1677588191. You may use it for future inference.


In [25]:
from azureml.pipeline.core import Pipeline

inference_pipeline = Pipeline(ws, steps=steps)

In [26]:
inference_run = experiment.submit(inference_pipeline)
inference_run.wait_for_completion(show_output=False)

Created step hts-inference-dataset-partition [965b7a5c][366371b1-d192-4ec2-a65f-240ceea02472], (This step will run and generate new outputs)
Created step hts-forecast-parallel [e1b25894][68806cc3-e75a-4b71-bd6d-43791a85026d], (This step will run and generate new outputs)
Created step hts-forecast-allocation [10e9942c][4c21f674-4b55-477e-8e93-8830e6963709], (This step will run and generate new outputs)
Submitted PipelineRun 120437da-5147-4254-aea0-4564dc5c7dcd
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/120437da-5147-4254-aea0-4564dc5c7dcd?wsid=/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourcegroups/demo-rg/workspaces/demo-ws&tid=dd1f2c2d-fea2-4bb0-a462-dfeb75d6a2e7
PipelineRunId: 120437da-5147-4254-aea0-4564dc5c7dcd
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/120437da-5147-4254-aea0-4564dc5c7dcd?wsid=/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourcegroups/demo-rg/workspaces/demo-ws&tid=dd1f2c2d-fea2-4bb0-a462-dfeb75d6a2e7


'Finished'

#### download inference output

In [27]:
forecasts = inference_run.get_pipeline_output("forecasts")
forecasts.download("forecast_results")

2

#### Try the run with different params

In [28]:
inference_run = experiment.submit(
    inference_pipeline, pipeline_parameters={"hierarchy_forecast_level": "state"}
)
inference_run.wait_for_completion(show_output=False)

Submitted PipelineRun 79ccc978-b32f-44b9-81ac-e26e950a902e
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/79ccc978-b32f-44b9-81ac-e26e950a902e?wsid=/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourcegroups/demo-rg/workspaces/demo-ws&tid=dd1f2c2d-fea2-4bb0-a462-dfeb75d6a2e7
PipelineRunId: 79ccc978-b32f-44b9-81ac-e26e950a902e
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/79ccc978-b32f-44b9-81ac-e26e950a902e?wsid=/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourcegroups/demo-rg/workspaces/demo-ws&tid=dd1f2c2d-fea2-4bb0-a462-dfeb75d6a2e7


'Finished'