In [1]:
import azureml.core
# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.0.17


In [2]:
import os

In [3]:
from azureml.core import Workspace, Datastore

ws = Workspace.from_config("../config2.json")
print("Workspace {} is loaded".format(ws.name))

Found the config file in: /Users/sleebapaul/AML_MSFT/config2.json
Workspace Auria is loaded


In [4]:
ws.get_default_datastore().container_name

'azureml-blobstore-963d160e-9bdd-40a8-852c-1fc06dfe7c7a'

In [30]:
datastores = ws.datastores
for name, ds in datastores.items():
    print(name, ds.datastore_type, ds.container_name, ds.account_name)

workspaceblobstore AzureBlob azureml-blobstore-963d160e-9bdd-40a8-852c-1fc06dfe7c7a auria5226806917
workspacefilestore AzureFile azureml-filestore-963d160e-9bdd-40a8-852c-1fc06dfe7c7a auria5226806917
videos AzureBlob videos happypathspublic
models AzureBlob styletransfer pipelinedata


### Upload only once

In [None]:
all_files = os.listdir("/Users/sleebapaul/Desktop/coco/text")
print(all_files[0])
len(all_files)
new_files = []
for file in all_files:
    file = "/Users/sleebapaul/Desktop/coco/text/" + file
    new_files.append(file)  
print(new_files[0])

In [None]:
ds = Datastore.get(ws, datastore_name='workspacefilestore')

ds.upload_files(new_files,
                  target_path='coco/text',
                  overwrite=True,
                  show_progress=True)

In [None]:
ds = Datastore.get(ws, datastore_name='workspaceblobstore')

ds.upload_files(["/Users/sleebapaul/Downloads/coco_AttnGAN2.pth", "/Users/sleebapaul/Downloads/image_encoder100.pth",
                "/Users/sleebapaul/Downloads/text_encoder100.pth"],
                  target_path='attnGAN',
                  overwrite=True,
                  show_progress=True)

## Enabling widget
The widget will help visualize the pipeline and monitor the progress of the steps.

In [6]:
!jupyter nbextension install --py --user azureml.widgets
!jupyter nbextension enable --py --user azureml.widgets

Installing /usr/local/lib/python3.7/site-packages/azureml/widgets/static -> azureml_widgets
Up to date: /Users/sleebapaul/Library/Jupyter/nbextensions/azureml_widgets/index.js
Up to date: /Users/sleebapaul/Library/Jupyter/nbextensions/azureml_widgets/extension.js
Up to date: /Users/sleebapaul/Library/Jupyter/nbextensions/azureml_widgets/packages/labextension/azureml_widgets-1.0.0.tgz
- Validating: [32mOK[0m

    To initialize this nbextension in the browser every time the notebook (or other app) loads:
    
          jupyter nbextension enable azureml.widgets --user --py
    
Enabling notebook extension azureml_widgets/extension...
      - Validating: [32mOK[0m


### Upload data to the datastore
We are uploading the training data and current production model to the datastore that is attached to the workspace. We will use these files later.

In [7]:
cts = ws.compute_targets
for ct in cts:
    print(ct)

cpucluster
gpuclusterNCv2
gpuclusterNCv3


In [8]:
from azureml.core.compute import AmlCompute

# CPU Cluster
cpu_compute_target = "cpucluster"

try:
    cpu_compute = AmlCompute(ws, cpu_compute_target)
    print("Found existing compute target: {}".format(cpu_compute.name))
except:
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = 1,
                                                                max_nodes = 4)
    cpu_compute = AmlCompute.create(ws, cpu_compute_target, provisioning_config)
    cpu_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    print("Created new compute target: {}".format(cpu_compute.name))

# GPU Cluster
gpu_compute_target = "gpuclusterNCv2"

try:
    gpu_compute = AmlCompute(ws, gpu_compute_target)
    print("Found existing compute target: {}".format(gpu_compute.name))
except:
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_NC6S_V2",
                                                                min_nodes = 1,
                                                                max_nodes = 4)
    gpu_compute = AmlCompute.create(ws, gpu_compute_target, provisioning_config)
    gpu_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    print("Created new compute target: {}".format(gpu_compute.name))
    
    
gpu_compute_target = "gpuclusterNCv3"

try:
    gpu_compute = AmlCompute(ws, gpu_compute_target)
    print("Found existing compute target: {}".format(gpu_compute.name))
except:
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_NC6S_V3",
                                                                min_nodes = 1,
                                                                max_nodes = 4)
    gpu_compute = AmlCompute.create(ws, gpu_compute_target, provisioning_config)
    gpu_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    print("Created new compute target: {}".format(gpu_compute.name))

Found existing compute target: cpucluster
Found existing compute target: gpuclusterNCv2
Found existing compute target: gpuclusterNCv3


In [9]:
gpu_compute.name

'gpuclusterNCv3'

## Create a Pipeline
A Step is a unit of execution. Step typically needs a target of execution (compute target), a script to execute, and may require script arguments and inputs, and can produce outputs. The step also could take a number of other parameters. Azure Machine Learning Pipelines currently has these built-in Steps: [PythonScriptStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.python_script_step.pythonscriptstep?view=azure-ml-py), [EstimatorStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.estimator_step.estimatorstep?view=azure-ml-py), [MpiStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.mpi_step.mpistep?view=azure-ml-py), [AdlaStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.adla_step.adlastep?view=azure-ml-py), [DataTransferStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.data_transfer_step.datatransferstep?view=azure-ml-py), [DatabricksStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.databricks_step.databricksstep?view=azure-ml-py), and [HyperDriveStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.hyper_drive_step.hyperdrivestep?view=azure-ml-py).

### Define a Pipeline Step with inputs and outputs 
#### Modeling input data
A step in the pipeline can take data as input. This data can be a data source that lives in one of the accessible data locations, or intermediate data produced by a previous step in the pipeline. An already existing data is typically called a Datasource and is represented by [DataReference](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.data_reference.datareference?view=azure-ml-py) object. A DataReference could be a pointer to a file or a directory.

In [40]:
from azureml.data.data_reference import DataReference
from azureml.pipeline.core import PipelineData

bs = Datastore.get(ws, datastore_name='workspaceblobstore')
coco_AttnGAN2 = DataReference(
    datastore=bs,
    data_reference_name="coco_AttnGAN2",
    path_on_datastore="coco_AttnGAN2.pth")

image_encoder100 = DataReference(
    datastore=bs,
    data_reference_name="image_encoder100",
    path_on_datastore="image_encoder100.pth")

text_encoder100 = DataReference(
    datastore=bs,
    data_reference_name="text_encoder100",
    path_on_datastore="text_encoder100.pth")

fs = Datastore.get(ws, datastore_name='workspacefilestore')
data_dir = DataReference(
    datastore=fs,
    data_reference_name="coco",
    path_on_datastore="coco")

output_dir = PipelineData(name="generated_images", 
                          datastore=fs, 
                          output_path_on_compute="generated_images")
                          
input_text = PipelineParameter(name="input_text", default_value="sky beach evening")

print("DataReference object created")

DataReference object created


In [43]:
from azureml.pipeline.steps import MpiStep
from azureml.pipeline.core import PipelineData
from azureml.pipeline.core.graph import PipelineParameter


project_folder = "../attnGAN/"

from azureml.core.runconfig import CondaDependencies, RunConfiguration

nodecount_param = PipelineParameter(name="nodecount", default_value=3)

cd = CondaDependencies()
print("Python Version: ", cd.get_python_version())

cd.add_channel("conda-forge")
cd.add_channel("pytorch")
cd.add_conda_package("pytorch")
cd.add_conda_package("torchvision")

# Runconfig
amlcompute_run_config = RunConfiguration(conda_dependencies=cd)
amlcompute_run_config.environment.docker.enabled = True
amlcompute_run_config.environment.docker.gpu_support = True
amlcompute_run_config.environment.docker.base_image = "pytorch/pytorch"
amlcompute_run_config.environment.spark.precache_packages = False

trainStep = MpiStep(
    name="text_to_image_step",
    script_name="gen_art.py",
    arguments=["--gpu", 0,  
               "--data_dir", data_dir, 
               "--model_path", coco_AttnGAN2,
               "--textencoder_path", text_encoder100,
               "--output_dir", output_dir,
               "--input_text", input_text
              ],
    inputs=[data_dir, coco_AttnGAN2, text_encoder100],
    outputs=[output_dir],
    compute_target=gpu_compute,
    node_count=nodecount_param, 
    process_count_per_node=1,
    pip_packages=["mpi4py", "torch", "torchvision", "numpy", "nltk","easydict","pandas", "Pillow" ,"scikit-image"],
    source_directory=project_folder,
    runconfig=amlcompute_run_config,
    use_gpu=True,
    allow_reuse=True)

print("trainStep step created")

Python Version:  3.6.2
trainStep step created


In [44]:
from azureml.pipeline.core import Pipeline

# steps = [dataprepStep, trainStep, compareStep]

steps = [ trainStep ]

training_pipeline = Pipeline(workspace=ws, steps=steps)
print("Pipeline is built")

training_pipeline.validate()
print("Pipeline validation complete")

Pipeline is built
Step text_to_image_step is ready to be created [5a891898]
Pipeline validation complete


## Submit the pipeline
[Submitting](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipeline.pipeline?view=azure-ml-py#submit) the pipeline involves creating an [Experiment](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.experiment?view=azure-ml-py) object and providing the built pipeline for submission. 

In [45]:
from azureml.core import Experiment

training_pipeline_run = Experiment(ws, 'Text_to_Image_Experiment').submit(
    training_pipeline, 
    pipeline_params={"input_text": "I love my mother"},
    regenerate_outputs=False)
print("Pipeline is submitted for execution")

Created step text_to_image_step [5a891898][a55aee04-c0c9-4d52-b0fd-431aced44b39], (This step will run and generate new outputs)
Using data reference coco for StepId [e94a902c][3b008c6a-fd9f-4adb-9065-5d40e43a2cfd], (Consumers of this data are eligible to reuse prior runs.)
Using data reference coco_AttnGAN2 for StepId [09a1acd8][16966b0d-2190-4995-b0e1-4d113d20ff63], (Consumers of this data are eligible to reuse prior runs.)
Using data reference text_encoder100 for StepId [bb06aee2][20934fa7-7db5-477f-9e8a-59eb1a0376af], (Consumers of this data are eligible to reuse prior runs.)
Submitted pipeline run: d0e3df3c-a995-4e47-8b28-3afbbc63cd81
Pipeline is submitted for execution


## Publish the pipeline
Once you are satisfied with the results of your experiment, you may want to publish the pipeline to get a REST endpoint so the pipeline can be invoked later.

In [None]:
published_training_pipeline = training_pipeline.publish(name="Compare_Models_Pipeline",
                                                        description="This pipeline compares models")
print("The published pipeline ID is {}".format(published_training_pipeline.id))

## Run published pipeline using its REST endpoint
To invoke the run of the preceding pipeline, you need an Azure Active Directory authentication header token, as described in [AzureCliAuthentication](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.authentication.azurecliauthentication?view=azure-ml-py) class.

In [None]:
from azureml.core.authentication import InteractiveLoginAuthentication
import requests

cli_auth = InteractiveLoginAuthentication()
aad_token = cli_auth.get_authentication_header()

training_pipeline_rest_ep = published_training_pipeline.endpoint

print("The published pipeline REST endpoint is {}".format(training_pipeline_rest_ep))

# specify the param when running the pipeline
response = requests.post(training_pipeline_rest_ep,
                         headers=aad_token,
                         json={"ExperimentName": "Compare_v2_Models_Experiment",
                               "RunSource": "SDK",
                               "ParameterAssignments": {"model_version": 2.0}})
pipeline_run_id = response.json()["Id"]

print("The run ID is {}".format(pipeline_run_id))

## Examine the run
We can examine the run of the pipeline that we just invoked via the REST endpoint.

In [None]:
from azureml.pipeline.core import PipelineRun
pub_pipeline_run = PipelineRun(Experiment(ws, "Compare_v2_Models_Experiment"), pipeline_run_id)
RunDetails(pub_pipeline_run).show()

### Thank you!
Thank you! 