# Install azure-ai-ml

In [1]:
pip show azure-ai-ml


Name: azure-ai-ml
Version: 1.17.0
Summary: Microsoft Azure Machine Learning Client Library for Python
Home-page: https://github.com/Azure/azure-sdk-for-python
Author: Microsoft Corporation
Author-email: azuresdkengsysadmins@microsoft.com
License: MIT License
Location: /anaconda/envs/azureml_py38/lib/python3.9/site-packages
Requires: azure-common, azure-core, azure-mgmt-core, azure-storage-blob, azure-storage-file-datalake, azure-storage-file-share, colorama, isodate, jsonschema, marshmallow, msrest, opencensus-ext-azure, opencensus-ext-logging, pydash, pyjwt, pyyaml, strictyaml, tqdm, typing-extensions
Required-by: 
Note: you may need to restart the kernel to use updated packages.


Create ML handler to interact with ws

In [2]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

# authenticate
credential = DefaultAzureCredential()

SUBSCRIPTION="your-subscription"
RESOURCE_GROUP="your-resource-group-name"
WS_NAME="your-workspace-name"
# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=SUBSCRIPTION,
    resource_group_name=RESOURCE_GROUP,
    workspace_name=WS_NAME,
)

In [3]:
# Verify   

ws = ml_client.workspaces.get(WS_NAME)
print(ws.location,":", ws.resource_group)

westeurope : DP100-course


In [4]:
# get a handle of the data asset and print the URI
airline_prepped_data = ml_client.data.get("airline-prep-data", version="2")
print(f"Data asset URI: {airline_prepped_data.path}")

Data asset URI: azureml://subscriptions/4088dff7-3800-4da9-abf9-00ca052f929a/resourcegroups/dp100-course/workspaces/dp100-courseproject/datastores/blob_training_data/paths/UI/2024-06-27_155152_UTC/prep_data.csv


In [5]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

my_path = 'azureml://subscriptions/4088dff7-3800-4da9-abf9-00ca052f929a/resourcegroups/dp100-course/workspaces/dp100-courseproject/datastores/blob_training_data/paths/UI/2024-06-27_155152_UTC/prep_data.csv'
my_data = Data(
    path=my_path,
    type=AssetTypes.URI_FILE,
    description="Data asset created for pipeline",
    name="airline_prep_data",
    version="2"
)

ml_client.data.create_or_update(my_data)

Data({'path': 'azureml://subscriptions/4088dff7-3800-4da9-abf9-00ca052f929a/resourcegroups/dp100-course/workspaces/dp100-courseproject/datastores/blob_training_data/paths/UI/2024-06-27_155152_UTC/prep_data.csv', 'skip_validation': False, 'mltable_schema_url': None, 'referenced_uris': None, 'type': 'uri_file', 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'airline_prep_data', 'description': 'Data asset created for pipeline', 'tags': {}, 'properties': {}, 'print_as_yaml': False, 'id': '/subscriptions/4088dff7-3800-4da9-abf9-00ca052f929a/resourceGroups/dp100-course/providers/Microsoft.MachineLearningServices/workspaces/dp100-courseproject/data/airline_prep_data/versions/2', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/notebooks-compute/code/Users/sanelatasnik', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7faffd33ae50>, 'serialize': <msrest.serialization.Serializer

In [7]:
import os

train_src_dir = "./train-src"
os.makedirs(train_src_dir, exist_ok=True)
predict_src_dir = "./predict-src"
os.makedirs(predict_src_dir, exist_ok=True)

In [8]:

from azure.ai.ml import MLClient, Input
from azure.ai.ml.dsl import pipeline
from azure.ai.ml import load_component
from azure.ai.ml.sweep import Choice
from azure.ai.ml.sweep import TruncationSelectionPolicy

train_component_func = load_component(source="./train.yml")
score_component_func = load_component(source="./predict.yml")

# define a pipeline
@pipeline()
def pipeline_with_hyperparameter_sweep():

    """Tune hyperparameters"""
    train_model = train_component_func(
        data=Input(
            type="uri_file",
            path='azureml://subscriptions/4088dff7-3800-4da9-abf9-00ca052f929a/resourcegroups/dp100-course/workspaces/dp100-courseproject/datastores/blob_training_data/paths/UI/2024-06-27_155152_UTC/prep_data.csv',
        ),
        n_estimators=Choice(values=[10, 50, 100]),
        max_depth = Choice(values=[1,5,10])
        )
    sweep_step = train_model.sweep(
        primary_metric="training_accuracy_score",
        goal="maximize",
        sampling_algorithm="random",
        compute="serverless",
    )
    sweep_step.set_limits(max_total_trials=2, max_concurrent_trials=1, timeout=1800)
    
    sweep_step.early_termination = TruncationSelectionPolicy(
    evaluation_interval=1, 
    truncation_percentage=20, 
    delay_evaluation=3 
)

    score_data = score_component_func(
        model=sweep_step.outputs.model_output, test_data=sweep_step.outputs.test_data
    )

pipeline_job = pipeline_with_hyperparameter_sweep()

# set pipeline compute
pipeline_job.settings.default_compute = "serverless"

In [9]:
# submit job to workspace
pipeline_job = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="pipeline_samples-sweep"
)
pipeline_job

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


Experiment,Name,Type,Status,Details Page
pipeline_samples-sweep,stoic_yak_8xg7vj7srp,pipeline,NotStarted,Link to Azure Machine Learning studio


In [10]:
# Wait until the job completes
ml_client.jobs.stream(pipeline_job.name)

RunId: stoic_yak_8xg7vj7srp
Web View: https://ml.azure.com/runs/stoic_yak_8xg7vj7srp?wsid=/subscriptions/4088dff7-3800-4da9-abf9-00ca052f929a/resourcegroups/dp100-course/workspaces/dp100-courseproject

Streaming logs/azureml/executionlogs.txt

[2024-07-01 19:04:07Z] Submitting 1 runs, first five are: 52383773:f40e2e4d-90cc-49c7-ac86-24e07526573e
