In [1]:
from azureml.core import Workspace, Experiment, Datastore
from azureml.data.datapath import DataPath, DataPathComputeBinding
from azureml.data.data_reference import DataReference
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.widgets import RunDetails
from azureml.train.estimator import Estimator

NameError: name 'VERSION' is not defined

# Register/Reference a Datastore

In [2]:
# workspace
ws = Workspace.from_config()
ws.datastores

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


{'workspacefilestore': <azureml.data.azure_storage_datastore.AzureFileDatastore at 0x23b03aadd48>,
 'workspaceblobstore': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x23b2032e288>,
 'smtprodwestus21globaluploadedresources': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x23b20369408>,
 'seer': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x23b203698c8>}

In [3]:
# data
datastore = ws.datastores['seer']

# compute target
compute = ws.compute_targets['gandalf']

# Define Pipeline!
The following will be created and then run:
1. Pipeline Parameters (path on datastore)
2. Data Prep Step
3. Training Step
4. Model Registration Step


## Pipeline Parameters
We need to tell the Pipeline what it needs to learn to see!

In [4]:
datapath = DataPath(datastore=datastore, path_on_datastore='burrito_tacos')
data_path_pipeline_param = (PipelineParameter(name="data", 
                                             default_value=datapath), 
                                             DataPathComputeBinding(mode='mount'))

## Data Process Step

In [5]:
seer_tfrecords = PipelineData(
    "tfrecords_set",
    datastore=datastore,
    is_directory=True
)

prep = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='prep.py',
                      use_gpu=True,
                      pip_requirements_file='requirements.txt')

prepStep = EstimatorStep(
    name='Data Preparation',
    estimator=prep,
    estimator_entry_script_arguments=["--source_path", data_path_pipeline_param, 
                                      "--target_path", seer_tfrecords],
    inputs=[data_path_pipeline_param],
    outputs=[seer_tfrecords],
    compute_target=compute
)

## Training Step

In [7]:
seer_training = PipelineData(
    "train",
    datastore=datastore,
    is_directory=True
)

train = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='train.py',
                      use_gpu=True,
                      pip_requirements_file='requirements.txt')

trainStep = EstimatorStep(
    name='Model Training',
    estimator=train,
    estimator_entry_script_arguments=["--source_path", seer_tfrecords, 
                                      "--target_path", seer_training,
                                      "--epochs", 5,
                                      "--batch", 10,
                                      "--lr", 0.001],
    inputs=[seer_tfrecords],
    outputs=[seer_training],
    compute_target=compute
)

# Register Model Step

In [8]:
seer_model = PipelineData(
    "model",
    datastore=datastore,
    is_directory=True
)

register = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='register.py',
                      use_gpu=True)

registerStep = EstimatorStep(
    name='Model Registration',
    estimator=register,
    estimator_entry_script_arguments=["--source_path", seer_training, 
                                      "--target_path", seer_model],
    inputs=[seer_training],
    outputs=[seer_model],
    compute_target=compute
)

## Test Run

In [9]:
pipeline1 = Pipeline(workspace=ws, steps=[prepStep, trainStep, registerStep])



In [10]:
# Submit the pipeline to be run
pipeline_run1 = Experiment(ws, 'seer').submit(pipeline1)
RunDetails(pipeline_run1).show()

Created step Data Preparation [d5c771b0][38d05de8-f403-433c-98d8-d3cd348c8305], (This step will run and generate new outputs)
Created step Model Training [8dc2ac8e][3e903d1d-4bd3-4132-a853-86b891cf7514], (This step will run and generate new outputs)
Created step Model Registration [fe3f1ef7][bace4d1b-0337-45a3-9025-d579dfb3b56e], (This step will run and generate new outputs)
Created data reference seer_1740449a for StepId [afded426][a3ea6644-c238-4064-b05b-e1cf3522c3ca], (Consumers of this data will generate new runs.)
Submitted PipelineRun d162971e-b6cb-4271-8188-d75b1d1c74ed
Link to Azure Machine Learning studio: https://ml.azure.com/experiments/seer/runs/d162971e-b6cb-4271-8188-d75b1d1c74ed?wsid=/subscriptions/91d27443-f037-45d9-bb0c-428256992df6/resourcegroups/robots/workspaces/hal


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [22]:
#pipeline_run1.cancel()

In [11]:
published_pipeline1 = pipeline1.publish(
    name="Food Pipeline", 
    description="Transfer learned image classifier. Uses folders as labels.")