In [13]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential

from azure.ai.ml import MLClient, Input, command, Output
from azure.ai.ml.dsl import pipeline
from azure.ai.ml.automl import classification, regression
from azure.ai.ml.constants import AssetTypes

#metang:?: why automl has such complex package name
from azure.ai.ml.entities._job.automl.tabular import TabularFeaturizationSettings

In [14]:
sub_id = "b746917e-ceb7-4ae0-81e6-3ccd893cb0de"
rg = "dpv2"
workspace = "dpv2-wks"

ml_client = MLClient(DefaultAzureCredential(), sub_id, rg, workspace)

In [18]:
@pipeline()
def automl_pipe(training_data, validation_data):
    classification_step = classification(
        training_data=training_data,
        validation_data=validation_data,
        target_column_name="y",
        primary_metric="accuracy",
        featurization=TabularFeaturizationSettings(mode="auto"),
        outputs=dict(
            best_model = Output(type="mlflow_model")
        )
    )

    classification_step.set_limits(max_trials=20)
    classification_step.set_training(
        enable_stack_ensemble=False,
        enable_vote_ensemble=False
    )

    register_func = command(
        inputs=dict(
            model_input_path=Input(type="mlflow_model"),
            model_base_name="bankmarketing_best_model",
        ),
        code="./register.py",
        command="python register.py "
        + "--model_input_path ${{inputs.model_input_path}} "
        + "--model_base_name ${{inputs.model_base_name}}",
        environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:1",
    )

    register_step = register_func(model_input_path=classification_step.outputs.best_model)

pipeline_job = automl_pipe(
    # why here the type is not AssetTypes**
    training_data = Input(path="./training-mltable-folder/", type=AssetTypes.MLTABLE),
    validation_data=Input(path="./validation-mltable-folder/", type=AssetTypes.MLTABLE),
)

pipeline_job.settings.default_compute="cpu-cluster"


In [19]:
submit_job = ml_client.jobs.create_or_update(pipeline_job, experiment_name="automl_demo")

In [20]:
ml_client.jobs.stream(submit_job.name)

RunId: bold_carpet_tz3fr3d015
Web View: https://ml.azure.com/runs/bold_carpet_tz3fr3d015?wsid=/subscriptions/b746917e-ceb7-4ae0-81e6-3ccd893cb0de/resourcegroups/dpv2/workspaces/dpv2-wks

Streaming logs/azureml/executionlogs.txt

[2022-10-06 08:38:15Z] Submitting 1 runs, first five are: 8d55429f:84684799-dcb7-4fb9-ac57-c0315d37aec6
[2022-10-06 08:51:10Z] Completing processing run id 84684799-dcb7-4fb9-ac57-c0315d37aec6.
[2022-10-06 08:51:10Z] Submitting 1 runs, first five are: 5894141d:5c41a6c0-307d-465f-8769-f84911a168e8
[2022-10-06 08:51:58Z] Completing processing run id 5c41a6c0-307d-465f-8769-f84911a168e8.

Execution Summary
RunId: bold_carpet_tz3fr3d015
Web View: https://ml.azure.com/runs/bold_carpet_tz3fr3d015?wsid=/subscriptions/b746917e-ceb7-4ae0-81e6-3ccd893cb0de/resourcegroups/dpv2/workspaces/dpv2-wks

