In [1]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.16.0 to work with agogemls


In [2]:
import os
# Create a folder for the pipeline step files
experiment_folder = 'titanic_automl_pipeline'
os.makedirs(experiment_folder, exist_ok=True)

In [3]:
%%writefile $experiment_folder/titanic_automl_dataprep.py

from azureml.core import Run
from azureml.train.automl import AutoMLConfig
import pandas
import numpy as np 
import pyarrow as pa
import pyarrow.parquet as pq
import argparse
import os

# Get the experiment run context
run = Run.get_context()

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument('--output_folder', type=str, dest='output_folder', help='output folder')
args = parser.parse_args()
output_folder = args.output_folder

# load the titanic dataset
print("Loading Data...")
train_df = run.input_datasets['Titanic'].to_pandas_dataframe()

train_df = train_df.drop(['PassengerId'], axis=1)

# Save prepared data
os.makedirs(os.path.dirname(output_folder), exist_ok=True)
pq.write_table(pa.Table.from_pandas(train_df), output_folder)

Overwriting titanic_automl_pipeline/titanic_automl_dataprep.py


In [4]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core import Environment 

automl_env = Environment("automl_env")
automl_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies
automl_env.docker.enabled = True # Use a docker container

packages = CondaDependencies.create(
    conda_packages=['pandas','scikit-learn'], 
    pip_packages=['azureml-sdk[automl]', 'azureml-dataprep[fuse,pandas]'], 
    pin_sdk_version=False)

automl_env.python.conda_dependencies = packages

aml_run_config = RunConfiguration()

cluster_name = "d12compute"
aml_run_config.target = cluster_name

# Assign the environment to the run configuration
aml_run_config.environment = automl_env

In [5]:
from azureml.core import Dataset
from azureml.pipeline.core import PipelineData
from azureml.pipeline.steps import PythonScriptStep

titanic_ds = Dataset.get_by_name(ws, 'Titanic')

datastore = ws.get_default_datastore()
prepped_data_path = PipelineData("processed_data",datastore=datastore).as_dataset()

cluster_name = "d12compute"
compute_target = ws.compute_targets[cluster_name]

dataprep_step = PythonScriptStep(
    name="Data preparation", 
    script_name="titanic_automl_dataprep.py", 
    source_directory=experiment_folder,
    compute_target=compute_target, 
    runconfig=aml_run_config,
    arguments=["--output_folder", prepped_data_path],
    inputs=[titanic_ds.as_named_input('Titanic')],
    outputs=[prepped_data_path],
    allow_reuse=True
)

In [6]:
prepped_data = prepped_data_path.parse_parquet_files(file_extension=None)

from azureml.pipeline.core import TrainingOutput
from azureml.pipeline.core import PipelineData

datastore = ws.get_default_datastore()

metrics_data = PipelineData(name='metrics_data',
                            datastore=datastore,
                            pipeline_output_name='metrics_output',
                            training_output=TrainingOutput(type='Metrics'))

model_data = PipelineData(name='best_model_data',
                          datastore=datastore,
                          pipeline_output_name='model_output',
                          training_output=TrainingOutput(type='Model'))

In [7]:
from azureml.train.automl import AutoMLConfig
from azureml.pipeline.steps import AutoMLStep

automl_settings = {
    "iteration_timeout_minutes" : 10,
    "iterations" : 5,
    "experiment_timeout_hours" : 0.25,
    "primary_metric" : 'AUC_weighted'
}

automl_config = AutoMLConfig(task = 'classification',
                             path = experiment_folder,
                             debug_log = 'automated_ml_errors.log',
                             compute_target = compute_target,
                             run_configuration = aml_run_config,
                             featurization = 'auto',
                             training_data = prepped_data,
                             label_column_name = 'Survived',
                             **automl_settings)

train_step = AutoMLStep(name='AutoML Classification',
    automl_config=automl_config,
    outputs=[metrics_data,model_data],
    enable_default_model_output=False,
    enable_default_metrics_output=False,
    allow_reuse=True)

In [8]:
%%writefile $experiment_folder/register_model.py
from azureml.core.model import Model, Dataset
from azureml.core.run import Run, _OfflineRun
from azureml.core import Workspace
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--model_name", required=True)
parser.add_argument("--model_path", required=True)
args = parser.parse_args()

print(f"model_name : {args.model_name}")
print(f"model_path: {args.model_path}")

run = Run.get_context()
ws = Workspace.from_config() if type(run) == _OfflineRun else run.experiment.workspace

model = Model.register(workspace=ws,
                       model_path=args.model_path,
                       model_name=args.model_name)

print("Registered version {0} of model {1}".format(model.version, model.name))

Overwriting titanic_automl_pipeline/register_model.py


In [9]:
from azureml.pipeline.core.graph import PipelineParameter

# The model name with which to register the trained model in the workspace.
model_name = PipelineParameter("model_name", default_value="TitanicAutoML")

register_step = PythonScriptStep(script_name="register_model.py",
                                 name="Register model",
                                 source_directory=experiment_folder,
                                 allow_reuse=False,
                                 arguments=["--model_name", model_name, "--model_path", model_data],
                                 inputs=[model_data],
                                 compute_target=compute_target,
                                 runconfig=aml_run_config)

In [10]:
from azureml.pipeline.core import Pipeline
from azureml.core import Experiment

pipeline = Pipeline(ws, [dataprep_step, train_step, register_step])

experiment = Experiment(workspace=ws, name='titanic_automl_pipeline')

run = experiment.submit(pipeline, show_output=True, regenerate_outputs=True)
run.wait_for_completion()

Created step Data preparation [65cfaf04][ace8e2ef-60f3-4749-8d18-775389fb000e], (This step will run and generate new outputs)
Created step AutoML Classification [0053e8d3][1a5524e3-e1d4-4a72-81c2-ce3a8a4ec9ba], (This step will run and generate new outputs)
Created step Register model [819a88b4][c49a5e30-6658-4f73-8e35-16100266e957], (This step will run and generate new outputs)
Submitted PipelineRun 51b1d042-6cab-44be-8c1b-600041df7ac1
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/titanic_automl_pipeline/runs/51b1d042-6cab-44be-8c1b-600041df7ac1?wsid=/subscriptions/da21a094-26a3-472f-991b-e2b11979af40/resourcegroups/agoge/workspaces/agogemls
PipelineRunId: 51b1d042-6cab-44be-8c1b-600041df7ac1
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/titanic_automl_pipeline/runs/51b1d042-6cab-44be-8c1b-600041df7ac1?wsid=/subscriptions/da21a094-26a3-472f-991b-e2b11979af40/resourcegroups/agoge/workspaces/agogemls
PipelineRun Status: NotStarted
Pipel


Streaming azureml-logs/75_job_post-tvmps_c3c42b1d407ce7aa5f1f6fdc60e187febe91cf473ed923fd5cd91bd427a4b902_d.txt
Entering job release. Current time:2020-10-23T20:01:50.880231
Failure while loading azureml_run_type_providers. Failed to load entrypoint automl = azureml.train.automl.run:AutoMLRun._from_run_dto with exception (azureml-dataprep 2.4.0 (/azureml-envs/azureml_663500cfa4b9cc072dcb8ef4745bc3e1/lib/python3.6/site-packages), Requirement.parse('azureml-dataprep<2.4.0a,>=2.3.0a'), {'azureml-dataset-runtime'}).
Starting job release. Current time:2020-10-23T20:01:52.270708
Logging experiment finalizing status in history service.
Starting the daemon thread to refresh tokens in background for process with pid = 371
[2020-10-23T20:01:52.272097] job release stage : upload_datastore starting...
[{}] job release stage : start importing azureml.history._tracking in run_history_release.
[2020-10-23T20:01:52.275271] job release stage : execute_job_release starting...
[2020-10-23T20:01:52.28223




StepRunId: 84c2b86d-9781-41b0-91cd-80ffb632dcf1
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/titanic_automl_pipeline/runs/84c2b86d-9781-41b0-91cd-80ffb632dcf1?wsid=/subscriptions/da21a094-26a3-472f-991b-e2b11979af40/resourcegroups/agoge/workspaces/agogemls
StepRun( AutoML Classification ) Status: Queued
StepRun( AutoML Classification ) Status: Running

StepRun(AutoML Classification) Execution Summary
StepRun( AutoML Classification ) Status: Finished




StepRunId: d9ebf1bd-8391-4ee0-bf55-2c2044b7d5b4
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/titanic_automl_pipeline/runs/d9ebf1bd-8391-4ee0-bf55-2c2044b7d5b4?wsid=/subscriptions/da21a094-26a3-472f-991b-e2b11979af40/resourcegroups/agoge/workspaces/agogemls
StepRun( Register model ) Status: Queued



Streaming azureml-logs/55_azureml-execution-tvmps_c3c42b1d407ce7aa5f1f6fdc60e187febe91cf473ed923fd5cd91bd427a4b902_d.txt
2020-10-23T20:15:52Z Starting output-watcher...
2020-10-23T20:15:52Z IsDedicatedCompute == True, won't poll for Low Pri Preemption
StepRun( Register model ) Status: Running
2020-10-23T20:15:53Z Executing 'Copy ACR Details file' on 10.0.0.6
2020-10-23T20:15:53Z Copy ACR Details file succeeded on 10.0.0.6. Output: 
>>>   
>>>   
Login Succeeded
Using default tag: latest
latest: Pulling from azureml/azureml_c71c2dd00420ca5ef079938de3d158a6
Digest: sha256:5bfe9272c0773cd28bf60c7bf52483caf3574b141e5c57f03616146a2fb16c2c
Status: Image is up to date for 8bc346f9f7404e63952f4e472e3c4b3e.azurecr.io/azureml/azureml_c71c2dd00420ca5ef079938de3d158a6:latest
35fb53b33fb42856605b138ed7334c9796ed8c920cd623f0a9f31994ad3305c3
2020/10/23 20:15:55 setuptask.go:390: Starting App Insight Logger for task:  containerSetup
2020/10/23 20:15:55 logger.go:297: Version: 3.0.01381.0008 Branch: .

2020-10-23T20:16:11Z job exited with code 0
2020-10-23T20:16:11Z Executing 'JobRelease task' on 10.0.0.6
2020-10-23T20:16:16Z JobRelease task succeeded on 10.0.0.6. Output: 
>>>   2020/10/23 20:16:12 setuptask.go:390: Starting App Insight Logger for task:  jobRelease
>>>   2020/10/23 20:16:12 logger.go:297: Version: 3.0.01381.0008 Branch: .SourceBranch Commit: 9725c87
>>>   2020/10/23 20:16:12 logger.go:297: runSpecialJobTask: os.GetEnv constants.StdouterrDir: /mnt/batch/tasks/shared/LS_root/jobs/agogemls/azureml/d9ebf1bd-8391-4ee0-bf55-2c2044b7d5b4/mounts/workspaceblobstore/azureml/d9ebf1bd-8391-4ee0-bf55-2c2044b7d5b4/azureml_compute_logs
>>>   2020/10/23 20:16:12 logger.go:297: runSpecialJobTask: Raw cmd for postprocessing is passed is: export AZ_BATCHAI_RUN_STATUS='SUCCEEDED';export AZ_BATCHAI_LOG_UPLOAD_FAILED='false';/azureml-envs/azureml_f99ad24afe6497c240474d96867e1719/bin/python $AZ_BATCHAI_JOB_MOUNT_ROOT/workspaceblobstore/azureml/d9ebf1bd-8391-4ee0-bf55-2c2044b7d5b4/azureml-s



PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': '51b1d042-6cab-44be-8c1b-600041df7ac1', 'status': 'Completed', 'startTimeUtc': '2020-10-23T19:59:05.315971Z', 'endTimeUtc': '2020-10-23T20:16:31.614834Z', 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': 'SDK', 'runType': 'SDK', 'azureml.parameters': '{"model_name":"TitanicAutoML"}'}, 'inputDatasets': [], 'outputDatasets': [], 'logFiles': {'logs/azureml/executionlogs.txt': 'https://agogemls1641700925.blob.core.windows.net/azureml/ExperimentRun/dcid.51b1d042-6cab-44be-8c1b-600041df7ac1/logs/azureml/executionlogs.txt?sv=2019-02-02&sr=b&sig=qh5YVbqQUzv4bB2aDb4Y5cIGpWRWor8FBURhmffbgro%3D&st=2020-10-23T19%3A49%3A28Z&se=2020-10-24T03%3A59%3A28Z&sp=r', 'logs/azureml/stderrlogs.txt': 'https://agogemls1641700925.blob.core.windows.net/azureml/ExperimentRun/dcid.51b1d042-6cab-44be-8c1b-600041df7ac1/logs/azureml/stderrlogs.txt?sv=2019-02-02&sr=b&sig=YLD1JwIHb3WmmNgho8y9BwWorfjVZqpiwVVhgNDuAHI%3D&st=2020-1

'Finished'

In [5]:
metrics_output_port = run.get_pipeline_output('metrics_output')
metrics_output_port.download('.', show_progress=True)

NameError: name 'run' is not defined

In [9]:
import json
import pandas

metrics_filename = "/mnt/batch/tasks/shared/LS_root/mounts/clusters/d12/code/users/marcin.szeliga/Titanic/titanic_automl_pipeline/azureml/84c2b86d-9781-41b0-91cd-80ffb632dcf1/metrics_data"
with open(metrics_filename) as f:
   metrics_output_result = f.read()
   
deserialized_metrics_output = json.loads(metrics_output_result)
df = pandas.DataFrame(deserialized_metrics_output)
df

Unnamed: 0,84c2b86d-9781-41b0-91cd-80ffb632dcf1_2,84c2b86d-9781-41b0-91cd-80ffb632dcf1_1,84c2b86d-9781-41b0-91cd-80ffb632dcf1_0,84c2b86d-9781-41b0-91cd-80ffb632dcf1_3,84c2b86d-9781-41b0-91cd-80ffb632dcf1_4
accuracy,[0.7822721598002497],[0.8114357053682895],[0.8226342072409487],[0.8249063670411985],[0.8271535580524345]
weighted_accuracy,[0.7993546876147485],[0.8272922308943723],[0.8401804293095866],[0.8425669320329565],[0.8450896226748789]
AUC_weighted,[0.8223131948829036],[0.8616129428134338],[0.8596816567387477],[0.8642864902136613],[0.8631395666738195]
recall_score_micro,[0.7822721598002497],[0.8114357053682895],[0.8226342072409487],[0.8249063670411985],[0.8271535580524345]
average_precision_score_micro,[0.8249884026874194],[0.8569943727187834],[0.8564750480554808],[0.8592012705193633],[0.8531383868295661]
AUC_macro,[0.8223131948829037],[0.8616129428134338],[0.8596816567387477],[0.8642864902136616],[0.8631395666738193]
precision_score_weighted,[0.7861474438916974],[0.8136736476157264],[0.8226627383643459],[0.8276450855067768],[0.8291227836115889]
f1_score_micro,[0.7822721598002496],[0.8114357053682897],[0.8226342072409487],[0.8249063670411985],[0.8271535580524343]
f1_score_macro,[0.7635483547071399],[0.7948636533379357],[0.8062247829157666],[0.8077676792927777],[0.8101304607121632]
recall_score_weighted,[0.7822721598002497],[0.8114357053682895],[0.8226342072409487],[0.8249063670411985],[0.8271535580524345]
