In [None]:
# First import the configuration notebook

#### Import the modules required to work
workspace , experiment , datastore , RunDetails

In [None]:
import os
import azureml.core
from azureml.core import Workspace, Experiment, Datastore
from azureml.widgets import RunDetails

print("azure-ML SKD version :" azureml.core.VERSION)

#### Import Pipeline modules

In [None]:
from azureml.pipeline.core import Pipeline
from azureml.pipeline.steps import PythonScriptStep


In [None]:
# Initialize Workspace using configuration notebook

ws = Workspace.from_config()
print(ws.name , ws.resource_group , ws.location , ws.subscription_id , sep ='\n')


In [None]:
# Define a default datastore
def_blob_store = ws.get_default_datastore()
def_blob_store = Datastore(ws , "workspaceblobstore")
#workspaceblobstore this must be used as is
print("Name of blobstore : {}".format(def_blob_store.name))


#### Uploading data to default datastore (Az file storage)

In [None]:
def_blob_store.upload_files(["./20news.pkl"] , target_path = "20newsgroups" , overwrite = True)
print("Upload is now done!")

#### Compute targets

In [None]:
# list all available compute targets in your workspace

cts = ws.compute_targets
for i in cts:
    print(i)

In [None]:
# Retrive an existing compute or create a new compute 
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

aml_compute_target = "cpu-cluster"
try:
    aml_compute = AmlCompute(ws, aml_compute_target)
    print("found existing compute target.")
except ComputeTargetException:
    print("creating new compute target")
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = 1, 
                                                                max_nodes = 4)    
    aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
print("Azure Machine Learning Compute attached")

### Creating a step in Pipeline

A Step is a unit of execution. Step typically needs a target of execution (compute target), a script to execute, and may require script arguments and inputs, and can produce outputs.

Ensure to have a separate folder for scripts and dependencies and give this directory as source directory to avoid re run.


In [None]:
# create PythonScriptStep

source_directory = './train'
print('Source directory for step is : {}'.format(os.path.realpath(source_directory)))

step1 = PythonScriptStep(name="train_step" , script_name="train.py", 
                         compute_target=aml_compute,
                        source_directory=source_directory,
                        allow_reuse=True)
print("Step1 creation complete.")

#### Run multiple steps in parallel


In [None]:
# new source directory

source_directory = './compare'
print('Source directory for step is: {}'.format(os.path.realpath(source_directory)))

# define step

step2 = PythonScriptStep(name="compare_step",
                        script_name="compare.py",
                         compute_target=aml_compute,
                         source_directory=source_directory)

# Use RunConfiguration to create conda dependencies and their # respective environments
# https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.runconfiguration?view=azure-ml-py

from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE

# create new run config object 

run_config = RunConfiguration()

# enable docker runtime
run_config.environment.docker.enabled = True

# set docker image to default CPU image 
run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE

# use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_config.environment.python.user_managed_dependencies = False


# now give dependencies for conda which you created above
# for scikit-learn

run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])

# Now again a new source directory 

source_directory = './extract'
print('Source directory for this step is : {}'.format(os.path.realpath(source_directory)))

# create a new step

step3 = PythonScriptStep(name="extract_step",
                        script_name="extract.py",
                        compute_target=aml_compute,
                        source_directory=source_directory,
                        runconfig=run_config)

# now list of steps to be run

steps = [step1,step2,step3]
print("Step lists done!")


### Building the Pipeline

All steps run in parallel once we submit.When submit is called, a PipelineRun is created which in turn creates StepRun objects for each step in the workflow.

In [None]:
# create new pipeline

pipeline1 = Pipeline(workspace=ws, steps=steps)
print("Pipeline is created!")

#### VAlidate the pipeline built in last step before submitting it.

runs validation steps such as checking for circular dependencies and parameter checks etc.

In [None]:
pipeline1.validate()
print("Pipeline validation completed.")

### Submit the Pipeline.

involves creating an Experiment object and providing the built pipeline for submission.

In [None]:
pipeline_run1 = Experiment(ws, 'Hello_world').submit(pipeline1, regenerate_outputs=False)
print("Pipeline is submitted for execution!")

#### Examine the pipeline run

In [None]:
# Use RunDetails Widget

runDetails(pipeline_run1).show()


Job logs , metric , stdout , stderr files

In [None]:
step_runs = pipeline_run1.get_children()
for i in step_runs:
    status = step_run.get_status()
    print('Script :' , i.name , 'status :',status)
    
    # if job is failing then get the details as:
    if status =="Failed":
        joblog = i.get_job_log()
        print('job log :' , joblog)