In [None]:
import azureml
from azureml.core import Workspace, Experiment, Datastore, Environment
from azureml.core.runconfig import RunConfiguration
from azureml.data.datapath import DataPath, DataPathComputeBinding
from azureml.data.data_reference import DataReference
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.widgets import RunDetails
from azureml.train.estimator import Estimator
import os

print("Azure ML SDK Version: ", azureml.core.VERSION)

Error: Error: Jupyter server crashed. Unable to connect. 
Error code from jupyter: 1

# Setup Variables

In [None]:
os.environ['STORAGE_ACCOUNT_KEY'] = 'bQAcA/nfOxQCn1zQX/3U/KwG51o7xLIYQsCBlIqaFJ6w6IPn6Rp7IKBtL84n+fbdpWhyhFdNzFN3CWk/XH5mgQ=='

In [None]:
datastorename='seerdata'
datastorepath='hardware'
containername='seer-container'
storageaccountname='mlopsws6982005979'
storageaccountkey=os.environ.get('STORAGE_ACCOUNT_KEY')
computetarget='mlopscluster'

# Register/Reference a Datastore

In [2]:
# workspace
ws = Workspace.from_config(
    path='./azureml-config.json')
print(ws.datastores)

{'workspaceblobstore': <azureml.data.azure_storage_datastore.AzureBlobDatastore object at 0x7f926c220630>, 'workspacefilestore': <azureml.data.azure_storage_datastore.AzureFileDatastore object at 0x7f926c228048>, 'damoseerdata': <azureml.data.azure_storage_datastore.AzureBlobDatastore object at 0x7f926c228550>}


In [None]:
# See if that datastore already exists and unregister it if so
try:
    datastore = ws.datastores[datastorename]
    print ('Unregistering existing datastore')
    datastore.unregister()
except:
    print ('Data store doesn\'t exist, no need to remove')
finally:
    # register the datastore
    datastore = Datastore.register_azure_blob_container(workspace=ws,
                                        datastore_name=datastorename,
                                        container_name=containername,
                                        account_name=storageaccountname,
                                        account_key=storageaccountkey,
                                        create_if_not_exists=True)

print('Datastore registered: ', datastore)

In [3]:
# data
datastore = ws.datastores['seerdata']
datareference = DataReference(datastore=datastore, 
                    data_reference_name="seerdata", 
                    path_on_datastore=datastorepath)


<azureml.data.azure_storage_datastore.AzureBlobDatastore object at 0x7f926c220278>
AmlCompute(workspace=Workspace.create(name='damo-mlworkspace', subscription_id='bc202ec2-54ef-4576-b7fb-a961c983398e', resource_group='damo-aiml'), name=damoseercompute, id=/subscriptions/bc202ec2-54ef-4576-b7fb-a961c983398e/resourceGroups/damo-aiml/providers/Microsoft.MachineLearningServices/workspaces/damo-mlworkspace/computes/damoseercompute, type=AmlCompute, provisioning_state=Succeeded, location=australiaeast, tags=None)


# Create Compute Resources

In [None]:
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=computetarget)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(
        vm_size='STANDARD_NC6', 
        min_nodes=1, 
        max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, computetarget, compute_config)

cpu_cluster.wait_for_completion(show_output=True)
compute = ws.compute_targets[computetarget]

print('Compute registered: ', compute)

# Define Pipeline!

The following will be created and then run:

  1. Pipeline Parameters
  2. Data Process Step
  3. Training Step
  4. Model Registration Step
  5. Pipeline registration
  6. Submit the pipeline for execution


## Pipeline Parameters
We need to tell the Pipeline what it needs to learn to see!

In [4]:
datapath = DataPath(datastore=datastore, path_on_datastore=datastorepath)
data_path_pipeline_param = (PipelineParameter(name="data", 
                                             default_value=datapath), 
                                             DataPathComputeBinding(mode='mount'))
print(data_path_pipeline_param)

# Configuration for data prep and training steps
dataprepEnvironment = Environment.from_pip_requirements('dataprepenv', 'requirements-dataprepandtraining.txt')
dataprepRunConfig = RunConfiguration()
dataprepRunConfig.environment = dataprepEnvironment

(<azureml.pipeline.core.graph.PipelineParameter object at 0x7f926c23e320>, <azureml.data.datapath.DataPathComputeBinding object at 0x7f926c23e358>)


## Data Process Step

In [5]:
seer_tfrecords = PipelineData(
    "tfrecords_set",
    datastore=datastore,
    is_directory=True
)

prepStep = PythonScriptStep(
    'parse.py',
    source_directory='.',
    name='Data Preparation',
    compute_target=compute,
    arguments=["--source_path", data_path_pipeline_param, "--target_path", seer_tfrecords],
    runconfig=dataprepRunConfig,
    inputs=[data_path_pipeline_param],
    outputs=[seer_tfrecords]
)

print(prepStep)

<azureml.pipeline.steps.python_script_step.PythonScriptStep object at 0x7f926c2205f8>


## Training Step

In [6]:
seer_training = PipelineData(
    "train",
    datastore=datastore,
    is_directory=True
)

train = Estimator(source_directory='.',
                    compute_target=compute,
                    entry_script='train.py',
                    pip_requirements_file='requirements-dataprepandtraining.txt')

trainStep = EstimatorStep(
    name='Model Training',
    estimator=train,
    estimator_entry_script_arguments=["--source_path", seer_tfrecords, 
                                    "--target_path", seer_training,
                                    "--epochs", 5,
                                    "--batch", 10,
                                    "--lr", 0.001],
    inputs=[seer_tfrecords],
    outputs=[seer_training],
    compute_target=compute
)

print(trainStep)



<azureml.pipeline.steps.estimator_step.EstimatorStep object at 0x7f926c228320>


# Register Model Step

In [7]:
registerEnvironment = Environment.from_pip_requirements('registerenv', 'requirements-registration.txt')
registerRunConfig = RunConfiguration()
registerRunConfig.environment = registerEnvironment

seer_model = PipelineData(
    "model",
    datastore=datastore,
    is_directory=True
)

registerStep = PythonScriptStep(
    'register.py',
    source_directory='.',
    name='Model Registration',
    arguments=["--source_path", seer_training, 
               "--target_path", seer_model],
    inputs=[seer_training],
    outputs=[seer_model],
    compute_target=compute,
    runconfig=registerRunConfig
)

print(registerStep)

<azureml.pipeline.steps.python_script_step.PythonScriptStep object at 0x7f926c23ebe0>


## Create and publish the Pipeline

In [8]:
pipeline = Pipeline(workspace=ws, steps=[prepStep, trainStep, registerStep])

published_pipeline = pipeline.publish(
    name="Seer Pipeline", 
    description="Transfer learned image classifier. Uses folders as labels.")



Created step Data Preparation [245a32d6][bb74677f-84f0-4646-a63c-55d831860987], (This step will run and generate new outputs)
Created step Model Training [ff56996a][884edd1b-e363-4a0d-b5e9-6d3a90fb0237], (This step will run and generate new outputs)
Created step Model Registration [df06b3ce][13374cdd-5994-426e-983f-4c0de03d2c1b], (This step will run and generate new outputs)
Created data reference damoseerdata_daf26998 for StepId [999c4b39][a1dc6fcc-1bc5-4644-859b-9cc91cdfb1d7], (Consumers of this data will generate new runs.)


In [9]:
# Submit the pipeline to be run
pipeline_run = Experiment(ws, 'seer',).submit(published_pipeline)
print('Run created with ID: ', pipeline_run.id)

RunDetails(pipeline_run).show()

Run created with ID:  7103c1bc-807a-4e25-8874-e578511eb035


A Jupyter Widget