# AML Workspace & Artifacts navigation

## Init & Login

In [1]:
## Check core SDK version number
import azureml.core
# import mlflow
import os

from azureml.core import (Datastore, Dataset, Environment, Experiment, ScriptRunConfig,
                          Workspace)
from azureml.core.authentication import InteractiveLoginAuthentication
from IPython.display import display



print("[INFO] SDK version:", azureml.core.VERSION)

## due to diferent tenant -> typically customer tenant
# interactive_auth = InteractiveLoginAuthentication(tenant_id="72f988bf-86f1-41af-91ab-2d7cd011db47")

ws = Workspace.from_config()
print("[SUCCESS] LOGGED IN: ",ws.name, ws.resource_group, ws.location, ws.subscription_id, sep=' @ ')

[INFO] SDK version: 1.17.0
[SUCCESS] LOGGED IN:  @ mlops-demo @ mlops-rg @ westeurope @ 6ee947fa-0d77-4915-bf68-4a83a8bec2a4


## Experiments

In [2]:
 ws.experiments

{'mlopspython': Experiment(Name: mlopspython,
 Workspace: mlops-demo),
 'automlstep-classification': Experiment(Name: automlstep-classification,
 Workspace: mlops-demo),
 'train-on-amlcompute-automl': Experiment(Name: train-on-amlcompute-automl,
 Workspace: mlops-demo),
 'covid-test': Experiment(Name: covid-test,
 Workspace: mlops-demo),
 'cvd_od_yolo_mma': Experiment(Name: cvd_od_yolo_mma,
 Workspace: mlops-demo),
 'car_price_prediction': Experiment(Name: car_price_prediction,
 Workspace: mlops-demo),
 'MSIssues': Experiment(Name: MSIssues,
 Workspace: mlops-demo),
 'NYCTaxi_Tutorial_Pipelines': Experiment(Name: NYCTaxi_Tutorial_Pipelines,
 Workspace: mlops-demo),
 'diabetes-training-pipeline': Experiment(Name: diabetes-training-pipeline,
 Workspace: mlops-demo),
 'batch_prediction_pipeline': Experiment(Name: batch_prediction_pipeline,
 Workspace: mlops-demo),
 'experiment_with_mlflow_from_databricks': Experiment(Name: experiment_with_mlflow_from_databricks,
 Workspace: mlops-demo),
 

In [3]:
experiment = ws.experiments["HOL-train-on-compute-simple"]
experiment.__dict__

{'_workspace': Workspace.create(name='mlops-demo', subscription_id='6ee947fa-0d77-4915-bf68-4a83a8bec2a4', resource_group='mlops-rg'),
 '_name': 'HOL-train-on-compute-simple',
 '_workspace_client': <azureml._restclient.workspace_client.WorkspaceClient at 0x7fd85ed1ea20>,
 '_id': 'd99992f3-bd1d-45f4-9ec1-bbc493027300',
 '_archived_time': None,
 '_experiment_dto': <azureml._restclient.models.experiment_dto.ExperimentDto at 0x7fd85ed1e240>,
 '_tags': {},
 '_identity': 'Experiment#HOL-train-on-compute-simple',
 '_portal_url': 'https://ml.azure.com',
 '_workspace_url': 'https://ml.azure.com?wsid=/subscriptions/6ee947fa-0d77-4915-bf68-4a83a8bec2a4/resourcegroups/mlops-rg/workspaces/mlops-demo',
 '_experiment_url': 'https://ml.azure.com/experiments/HOL-train-on-compute-simple?wsid=/subscriptions/6ee947fa-0d77-4915-bf68-4a83a8bec2a4/resourcegroups/mlops-rg/workspaces/mlops-demo'}

In [5]:
for run in experiment.get_runs():
    print(run)

Run(Experiment: HOL-train-on-compute-simple,
Id: HOL-train-on-compute-simple_1603961113_5e9ecdce,
Type: azureml.scriptrun,
Status: Completed)
Run(Experiment: HOL-train-on-compute-simple,
Id: HOL-train-on-compute-simple_1603915170_b67764a3,
Type: azureml.scriptrun,
Status: Completed)


In [7]:
properties = run.get_properties()

In [18]:
properties

{'_azureml.ComputeTargetType': 'amlcompute',
 'ContentSnapshotId': '6032c74d-9ce1-445d-8681-d96056488900',
 'azureml.git.repository_uri': 'git@github.com:michalmar/azure-labs.git',
 'mlflow.source.git.repoURL': 'git@github.com:michalmar/azure-labs.git',
 'azureml.git.branch': 'master',
 'mlflow.source.git.branch': 'master',
 'azureml.git.commit': '14b2066b1b1660a76afa7e03e1fcddc361a6b8d5',
 'mlflow.source.git.commit': '14b2066b1b1660a76afa7e03e1fcddc361a6b8d5',
 'azureml.git.dirty': 'False',
 'ProcessInfoFile': 'azureml-logs/process_info.json',
 'ProcessStatusFile': 'azureml-logs/process_status.json'}

In [15]:
import pandas as pd

# children = list(parent_run.get_children())
metricslist = {}
for run in experiment.get_runs():
#     properties = run.get_properties()
    metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}
#     metricslist[int(properties['iteration'])] = metrics
    metricslist[run._run_number] = metrics

rundata = pd.DataFrame(metricslist).sort_index(1)
rundata

Unnamed: 0,1,2
learning_rate,0.05,0.05
mse,27.32622,27.32622


## AutoML

you can grab an existing AutoML parent run like the following by specifying the parent run ID:

In [6]:
from azureml.train.automl.run import AutoMLRun
parent_run = AutoMLRun(experiment, run_id = 'AutoML_581b9e5d-584d-419b-9d4f-3a9f3fda1bee')

In [7]:
parent_run

Experiment,Id,Type,Status,Details Page,Docs Page
train-on-amlcompute-automl,AutoML_581b9e5d-584d-419b-9d4f-3a9f3fda1bee,automl,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [8]:
parent_run.__dict__

{'_jasmine_client': <azureml._restclient.jasmine_client.JasmineClient at 0x7f5bdc1349e8>,
 '_experiment': Experiment(Name: train-on-amlcompute-automl,
 Workspace: mlops-demo),
 '_run_id': 'AutoML_581b9e5d-584d-419b-9d4f-3a9f3fda1bee',
 '_identity': 'AutoMLRun#AutoML_581b9e5d-584d-419b-9d4f-3a9f3fda1bee',
 '_portal_url': 'https://ml.azure.com',
 '_workspace_url': 'https://ml.azure.com?wsid=/subscriptions/6ee947fa-0d77-4915-bf68-4a83a8bec2a4/resourcegroups/mlops-rg/workspaces/mlops-demo',
 '_experiment_url': 'https://ml.azure.com/experiments/train-on-amlcompute-automl?wsid=/subscriptions/6ee947fa-0d77-4915-bf68-4a83a8bec2a4/resourcegroups/mlops-rg/workspaces/mlops-demo',
 '_run_details_url': 'https://ml.azure.com/experiments/train-on-amlcompute-automl/runs/AutoML_581b9e5d-584d-419b-9d4f-3a9f3fda1bee?wsid=/subscriptions/6ee947fa-0d77-4915-bf68-4a83a8bec2a4/resourcegroups/mlops-rg/workspaces/mlops-demo',
 '_client': <azureml._run_impl.run_history_facade.RunHistoryFacade at 0x7f5b625b7be0>,

In a similar way that you were getting a model’s name for a specific child run, you could also get all the metrics for all the child runs

In [9]:
import pandas as pd

children = list(parent_run.get_children())
metricslist = {}
for run in children:
    properties = run.get_properties()
    metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}
    metricslist[int(properties['iteration'])] = metrics

rundata = pd.DataFrame(metricslist).sort_index(1)
rundata

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
explained_variance,0.86,0.85,0.85,0.84,0.84,0.83,0.84,0.78,0.75,0.85,0.02,0.8,0.79,0.7,0.85,0.83,0.42,0.71,0.86,0.86
mean_absolute_error,2.07,2.25,2.21,2.26,2.33,2.51,2.31,2.64,2.84,2.23,7.18,2.58,2.52,3.11,2.23,2.19,4.89,3.37,2.08,2.08
mean_absolute_percentage_error,19.59,21.19,20.67,21.27,22.24,24.21,21.94,24.92,26.83,20.92,61.67,23.95,21.74,26.5,20.92,20.12,42.24,31.1,19.69,19.87
median_absolute_error,1.24,1.5,1.45,1.51,1.63,1.86,1.59,1.73,1.96,1.47,5.84,1.62,1.47,1.64,1.47,1.22,3.15,2.41,1.27,1.28
normalized_mean_absolute_error,0.01,0.02,0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.05,0.02,0.02,0.02,0.02,0.01,0.03,0.02,0.01,0.01
normalized_median_absolute_error,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.04,0.01,0.01,0.01,0.01,0.01,0.02,0.02,0.01,0.01
normalized_root_mean_squared_error,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.04,0.03,0.07,0.03,0.03,0.04,0.03,0.03,0.05,0.04,0.03,0.03
normalized_root_mean_squared_log_error,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.05,0.05,0.04,0.11,0.04,0.04,0.05,0.04,0.04,0.08,0.06,0.04,0.04
r2_score,0.86,0.85,0.85,0.84,0.84,0.83,0.84,0.78,0.75,0.85,0.02,0.8,0.79,0.7,0.85,0.83,0.42,0.71,0.86,0.86
root_mean_squared_error,3.97,4.13,4.1,4.13,4.19,4.36,4.17,4.92,5.27,4.12,10.4,4.73,4.75,5.78,4.12,4.32,8.02,5.65,3.96,3.95


In [10]:
best_run, best_model = parent_run.get_output()

In [11]:
best_model

RegressionPipeline(pipeline=Pipeline(memory=None,
     steps=[('datatransformer', DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
        feature_sweeping_config=None, feature_sweeping_timeout=None,
        featurization_config=None, force_text_dnn=None,
        is_cross_validation=None, is_onnx_compatible=None, logger=None,
        obser...   random_state=None, selection='cyclic', tol=0.0001, verbose=0),
            training_cv_folds=5))]),
          stddev=None)

In [12]:
third_run, third_model = parent_run.get_output(iteration = 3)
print(third_model)

RegressionPipeline(pipeline=Pipeline(memory=None,
     steps=[('datatransformer', DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
        feature_sweeping_config=None, feature_sweeping_timeout=None,
        featurization_config=None, force_text_dnn=None,
        is_cross_validation=None, is_onnx_compatible=None, logger=None,
        obser...      precompute=False, random_state=None, selection='cyclic', tol=0.0001,
      warm_start=False))]),
          stddev=None)


In [13]:
from pprint import pprint

def print_model(model, prefix=""):
    for step in model.steps:
        print(prefix + step[0])
        if hasattr(step[1], 'estimators') and hasattr(step[1], 'weights'):
            pprint({'estimators': list(e[0] for e in step[1].estimators), 'weights': step[1].weights})
            print()
            for estimator in step[1].estimators:
                print_model(estimator[1], estimator[0]+ ' - ')
        elif hasattr(step[1], '_base_learners') and hasattr(step[1], '_meta_learner'):
            print("\nMeta Learner")
            pprint(step[1]._meta_learner)
            print()
            for estimator in step[1]._base_learners:
                print_model(estimator[1], estimator[0]+ ' - ')
        else:
            pprint(step[1].get_params())
            print()

In [14]:
print_model(third_model)

datatransformer
{'enable_dnn': None,
 'enable_feature_sweeping': None,
 'feature_sweeping_config': None,
 'feature_sweeping_timeout': None,
 'featurization_config': None,
 'force_text_dnn': None,
 'is_cross_validation': None,
 'is_onnx_compatible': None,
 'logger': None,
 'observer': None,
 'task': None,
 'working_dir': None}

RobustScaler
{'copy': True,
 'quantile_range': [25, 75],
 'with_centering': True,
 'with_scaling': False}

ElasticNet
{'alpha': 0.8422631578947368,
 'copy_X': True,
 'fit_intercept': True,
 'l1_ratio': 1,
 'max_iter': 1000,
 'normalize': False,
 'positive': False,
 'precompute': False,
 'random_state': None,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}



### AzureML Pipeline data

In [28]:
from azureml.core.run import Run

run_id = "010d9d91-42e6-4071-9417-cd713b0b25f3"
experiment = ws.experiments["car_price_prediction"]
parent_run = Run(experiment, run_id)

In [30]:
children = list(parent_run.get_children())
metricslist = {}
# for run in children:
#     properties = run.get_properties()
#     metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}
#     metricslist[int(properties['iteration'])] = metrics

# rundata = pd.DataFrame(metricslist).sort_index(1)
# rundata

In [47]:
for run in children:
    properties = run.get_properties()
#     print(run.__dict__)
    print("-"*20)
    if (run._run_id== '17823edf-2660-41c8-82c1-b3873f5eb79e'):
#         print(run.__dict__)
        print(run.get_file_names())
        run.download_file('Trained_model', output_file_path='Trained_model.pkl')

--------------------
--------------------
--------------------
['Trained_model', 'azureml-logs/55_azureml-execution-tvmps_232d526c849e9eee7bcecdce6780a0a0c04b4387b1d517963d9786c49c0ade92_p.txt', 'azureml-logs/65_job_prep-tvmps_232d526c849e9eee7bcecdce6780a0a0c04b4387b1d517963d9786c49c0ade92_p.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_232d526c849e9eee7bcecdce6780a0a0c04b4387b1d517963d9786c49c0ade92_p.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/106_azureml.log', 'logs/azureml/executionlogs.txt', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'logs/azureml/stderrlogs.txt', 'logs/azureml/stdoutlogs.txt', 'module_statistics/error_info.json']
--------------------
--------------------
--------------------
--------------------
