In [1]:
from azure.ai.ml import MLClient, Input, Output
from azure.identity import DefaultAzureCredential
from azure.ai.ml.dsl import pipeline
from dotenv import load_dotenv

# specify the details of your subscription
SUBSCRIPTION_ID = "e5615bfe-b43b-41ce-bccb-b78867c2ce63"
RESOURCE_GROUP = "rg-dp100-demo-001"
WORKSPACE_NAME = "mlw-dp100-demo"

# get a handle to the subscription
load_dotenv("python.env")

ml_client = MLClient(DefaultAzureCredential(), 
                     subscription_id=SUBSCRIPTION_ID, 
                     resource_group_name=RESOURCE_GROUP,
                     workspace_name=WORKSPACE_NAME)

In [3]:
# Retrieve an already attached Azure Machine Learning Compute.
cluster_name = "vmcluster-ml-dev"
print(ml_client.compute.get(cluster_name))

enable_node_public_ip: true
id: /subscriptions/e5615bfe-b43b-41ce-bccb-b78867c2ce63/resourceGroups/rg-dp100-demo-001/providers/Microsoft.MachineLearningServices/workspaces/mlw-dp100-demo/computes/vmcluster-ml-dev
idle_time_before_scale_down: 120
location: japaneast
max_instances: 2
min_instances: 0
name: vmcluster-ml-dev
provisioning_state: Succeeded
size: STANDARD_D2_V3
ssh_public_access_enabled: false
tier: dedicated
type: amlcompute



In [4]:
with open("src/components.py") as fin:
    print(fin.read())

from pathlib import Path
from random import randint
from uuid import uuid4

# mldesigner package contains the command_component which can be used to define component from a python function
from mldesigner import command_component, Input, Output


@command_component()
def train_model(
    training_data: Input(type="uri_file"),
    max_epochs: int,
    model_output: Output(type="uri_folder"),
    learning_rate=0.02,
) -> str:
    """A dummy train component.

    Args:
        training_data: a file contains training data
        max_epochs: max epochs
        learning_rate: learning rate
        model_output: target folder to save model output
    """

    lines = [
        f"Training data path: {training_data}",
        f"Max epochs: {max_epochs}",
        f"Learning rate: {learning_rate}",
        f"Model output path: {model_output}",
    ]

    for line in lines:
        print(line)

    # Do the train and save the trained model as a file into the output folder.
    # Here only output 

In [10]:
# import the components as functions
from src.components import train_model, score_data, eval_model


custom_path = "azureml://datastores/workspaceblobstore/paths/custom_path/${{name}}/"

# define a pipeline with component
# the function name will be reflected on AML pipeline as the overall component
# the variables like train_with_sample_data are component nodes in the pipeline
@pipeline(default_compute=cluster_name)
def pipeline_with_python_function_components(input_data, test_data, learning_rate):
    """E2E dummy train-score-eval pipeline with components defined via python function components"""

    # Call component obj as function: apply given inputs & parameters to create a node in pipeline
    train_with_sample_data = train_model(training_data=input_data, 
                                         max_epochs=5, 
                                         learning_rate=learning_rate
                                         )
    score_with_sample_data = score_data(model_input=train_with_sample_data.outputs.model_output,
                                        test_data=test_data,
                                        model_file=train_with_sample_data.outputs.output,
                                        )
    # example how to change path of output on step level,
    # please note if the output is promoted to pipeline level you need to change path in pipeline job level
    score_with_sample_data.outputs.score_output = Output(
        type="uri_folder", mode="rw_mount", path=custom_path
    )
    eval_with_sample_data = eval_model(
        scoring_result=score_with_sample_data.outputs.score_output,
        scoring_file=score_with_sample_data.outputs.output,
    )

    # Return: pipeline outputs
    return {
        "eval_output": eval_with_sample_data.outputs.eval_output,
        "model_output": train_with_sample_data.outputs.model_output,
    }


pipeline_job = pipeline_with_python_function_components(
    input_data=Input(
        path="wasbs://demo@dprepdata.blob.core.windows.net/Titanic.csv", type="uri_file"
    ),
    test_data=Input(
        path="wasbs://demo@dprepdata.blob.core.windows.net/Titanic.csv", type="uri_file"
    ),
    learning_rate=0.1,
)
# example how to change path of output on pipeline level
pipeline_job.outputs.model_output = Output(
    type="uri_folder", mode="rw_mount", path=custom_path
)

In [11]:
# submit job to workspace
pipeline_job = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="titanic_pipeline_samples"
)
pipeline_job

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
[32mUploading src (0.01 MBs): 100%|##

Experiment,Name,Type,Status,Details Page
titanic_pipeline_samples,yellow_lychee_v9v60xkdjk,pipeline,Preparing,Link to Azure Machine Learning studio


In [12]:
# Wait until the job completes
ml_client.jobs.stream(pipeline_job.name)

RunId: yellow_lychee_v9v60xkdjk
Web View: https://ml.azure.com/runs/yellow_lychee_v9v60xkdjk?wsid=/subscriptions/e5615bfe-b43b-41ce-bccb-b78867c2ce63/resourcegroups/rg-dp100-demo-001/workspaces/mlw-dp100-demo

Streaming logs/azureml/executionlogs.txt

[2024-01-14 15:57:29Z] Submitting 1 runs, first five are: 8786216d:d21b9ab2-601c-4b26-b1a8-650c0a967f6d
[2024-01-14 16:04:29Z] Completing processing run id d21b9ab2-601c-4b26-b1a8-650c0a967f6d.
[2024-01-14 16:04:31Z] Submitting 1 runs, first five are: 2d58c513:85573ac7-880a-4487-abaf-014a189ec80a
[2024-01-14 16:06:26Z] Completing processing run id 85573ac7-880a-4487-abaf-014a189ec80a.
[2024-01-14 16:06:27Z] Submitting 1 runs, first five are: 92c4cad2:57eb887a-a054-487b-91ce-3b44674196c6
[2024-01-14 16:07:18Z] Completing processing run id 57eb887a-a054-487b-91ce-3b44674196c6.

Execution Summary
RunId: yellow_lychee_v9v60xkdjk
Web View: https://ml.azure.com/runs/yellow_lychee_v9v60xkdjk?wsid=/subscriptions/e5615bfe-b43b-41ce-bccb-b78867c2ce