In [1]:
import os
import azureml.core
from azureml.core import Workspace, Experiment, Datastore
from azureml.widgets import RunDetails

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.22.0


In [2]:
from azureml.pipeline.core import Pipeline
from azureml.pipeline.steps import PythonScriptStep

print("Pipeline SDK-specific imports completed")

Pipeline SDK-specific imports completed


In [3]:

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

maynard_vscode
maynard_vscode
northeurope
ae0accf0-cf61-4cac-954a-b3979188ad7c


In [4]:
# Default datastore
def_blob_store = ws.get_default_datastore() 
print(def_blob_store)

{
  "name": "workspaceblobstore",
  "container_name": "azureml-blobstore-b482fff2-25ca-4c52-b29e-27ca5bb428c4",
  "account_name": "maynardvssaudipxv55vl3xi",
  "protocol": "https",
  "endpoint": "core.windows.net"
}


In [5]:
print("Blobstore's name: {}".format(def_blob_store.name))

Blobstore's name: workspaceblobstore


In [6]:
def_blob_store.upload_files(["./20news.pkl"], target_path="20newsgroups", overwrite=True)
print("Upload call completed")

Uploading an estimated of 1 files
Uploading ./20news.pkl
Uploaded ./20news.pkl, 1 files out of an estimated total of 1
Uploaded 1 files
Upload call completed


In [7]:
cts = ws.compute_targets
for ct in cts:
    print(ct)

cpu-cluster
jupyter-compute


In [8]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

aml_compute_target = "jupyter-compute"
try:
    aml_compute = AmlCompute(ws, aml_compute_target)
    print("found existing compute target.")
except ComputeTargetException:
    print("creating new compute target")
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = 1, 
                                                                max_nodes = 4)    
    aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
print("Azure Machine Learning Compute attached")

found existing compute target.
Azure Machine Learning Compute attached


In [9]:

source_directory = './train'
print('Source directory for the step is {}.'.format(os.path.realpath(source_directory)))

# Syntax
# PythonScriptStep(
#     script_name, 
#     name=None, 
#     arguments=None, 
#     compute_target=None, 
#     runconfig=None, 
#     inputs=None, 
#     outputs=None, 
#     params=None, 
#     source_directory=None, 
#     allow_reuse=True, 
#     version=None, 
#     hash_paths=None)
# This returns a Step
step1 = PythonScriptStep(name="train_step",
                         script_name="train.py", 
                         compute_target=aml_compute, 
                         source_directory=source_directory,
                         allow_reuse=True)
print("Step1 created")

Source directory for the step is /mnt/batch/tasks/shared/LS_root/mounts/clusters/jupyter-compute/code/Users/katarzyna.lenczewska/amlpipelines/train.
Step1 created


## Running a few steps in parallel

In [10]:
# For this step, we use a different source_directory
source_directory = './compare'
print('Source directory for the step is {}.'.format(os.path.realpath(source_directory)))

# All steps use the same Azure Machine Learning compute target as well
step2 = PythonScriptStep(name="compare_step",
                         script_name="compare.py", 
                         compute_target=aml_compute, 
                         source_directory=source_directory)

# Use a RunConfiguration to specify some additional requirements for this step.
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE

# create a new runconfig object
run_config = RunConfiguration()

# enable Docker 
run_config.environment.docker.enabled = True

# set Docker base image to the default CPU-based image
run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE

# use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_config.environment.python.user_managed_dependencies = False

# specify CondaDependencies obj
run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])

# For this step, we use yet another source_directory
source_directory = './extract'
print('Source directory for the step is {}.'.format(os.path.realpath(source_directory)))

step3 = PythonScriptStep(name="extract_step",
                         script_name="extract.py", 
                         compute_target=aml_compute, 
                         source_directory=source_directory,
                         runconfig=run_config)

# list of steps to run
steps = [step1, step2, step3]
print("Step lists created")

Source directory for the step is /mnt/batch/tasks/shared/LS_root/mounts/clusters/jupyter-compute/code/Users/katarzyna.lenczewska/amlpipelines/compare.
Source directory for the step is /mnt/batch/tasks/shared/LS_root/mounts/clusters/jupyter-compute/code/Users/katarzyna.lenczewska/amlpipelines/extract.
Step lists created


## Build the pipeline

In [11]:
# Syntax
# Pipeline(workspace, 
#          steps, 
#          description=None, 
#          default_datastore_name=None, 
#          default_source_directory=None, 
#          resolve_closure=True, 
#          _workflow_provider=None, 
#          _service_endpoint=None)

pipeline1 = Pipeline(workspace=ws, steps=steps)
print ("Pipeline is built")

Pipeline is built


## validate the pipeline

In [12]:
pipeline1.validate()
print("Pipeline validation complete")

Step train_step is ready to be created [6a37762a]
Step compare_step is ready to be created [1e403e6b]Step extract_step is ready to be created [5b4e9379]

Pipeline validation complete


## SUBMIT the pipeline

In [14]:
# Submit syntax
# submit(experiment_name, 
#        pipeline_parameters=None, 
#        continue_on_step_failure=False, 
#        regenerate_outputs=False)

pipeline_run1 = Experiment(ws, 'Hello_World1').submit(pipeline1, regenerate_outputs=False)
print("Pipeline is submitted for execution")

Created step train_step [6a37762a][5661186b-1849-448e-a702-0dfe55de651a], (This step will run and generate new outputs)
Created step compare_step [1e403e6b][28119922-d27f-4115-a8aa-7f0b93fdd065], (This step will run and generate new outputs)
Created step extract_step [5b4e9379][2cf85295-d91e-4c62-b9cf-fce096a29501], (This step will run and generate new outputs)
Submitted PipelineRun 8d45e66b-0edd-4b73-9e65-94fa59b71934
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/Hello_World1/runs/8d45e66b-0edd-4b73-9e65-94fa59b71934?wsid=/subscriptions/ae0accf0-cf61-4cac-954a-b3979188ad7c/resourcegroups/maynard_vscode/workspaces/maynard_vscode
Pipeline is submitted for execution


## Examine the pipeline run

In [15]:
RunDetails(pipeline_run1).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [17]:
step_runs = pipeline_run1.get_children()
for step_run in step_runs:
    status = step_run.get_status()
    print('Script:', step_run.name, 'status:', status)
    
    # Change this if you want to see details even if the Step has succeeded.
    if status == "Failed":
        joblog = step_run.get_job_log()
        print('job log:', joblog)

Script: extract_step status: Finished
Script: compare_step status: Finished
Script: train_step status: Finished


## Running a few steps in sequence

In [18]:
step2.run_after(step1)
step3.run_after(step2)

# Try a loop
#step2.run_after(step3)

# Now, construct the pipeline using the steps.

# We can specify the "final step" in the chain, 
# Pipeline will take care of "transitive closure" and 
# figure out the implicit or explicit dependencies
# https://www.geeksforgeeks.org/transitive-closure-of-a-graph/
pipeline2 = Pipeline(workspace=ws, steps=[step3])
print ("Pipeline is built")

pipeline2.validate()
print("Simple validation complete")

Pipeline is built
Step extract_step is ready to be created [2cb01abc]
Step compare_step is ready to be created [6f577f21]
Simple validation complete


In [19]:
pipeline_run2 = Experiment(ws, 'Hello_World2').submit(pipeline2)
print("Pipeline is submitted for execution")

Created step extract_step [2cb01abc][2b9f5aa3-47cb-40b8-ab9d-4d4aaba5d931], (This step will run and generate new outputs)
Created step compare_step [6f577f21][85ebc211-2e75-46ff-af29-669d51ce7c7c], (This step will run and generate new outputs)
Created step train_step [771b0600][5661186b-1849-448e-a702-0dfe55de651a], (This step is eligible to reuse a previous run's output)
Submitted PipelineRun 48437c50-272e-4abe-abe8-0c19014d1c1c
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/Hello_World2/runs/48437c50-272e-4abe-abe8-0c19014d1c1c?wsid=/subscriptions/ae0accf0-cf61-4cac-954a-b3979188ad7c/resourcegroups/maynard_vscode/workspaces/maynard_vscode
Pipeline is submitted for execution


In [20]:
RunDetails(pipeline_run2).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …