In [1]:
import json
import azureml.core
from azureml.core import Workspace, Datastore, Dataset, Environment, Experiment
from azureml.data import FileDataset
from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter
from azureml.core.compute import ComputeTarget, AmlCompute


print(azureml.core.VERSION)
version = dict(zip(['major','minor','patch'], azureml.core.VERSION.split('.')))
ws = Workspace.from_config()

1.10.0


In [2]:
if int(version['major']) >= 1: 
    if int(version['minor']) == 10:
        from azureml.pipeline.steps import ParallelRunConfig, ParallelRunStep        
    else:
        from azureml.contrib.pipeline.steps import ParallelRunConfig, ParallelRunStep 

In [3]:
compute_name = "aml-compute1"
vm_size = "STANDARD_DS1_v2"
if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('Found compute target: ' + compute_name)
else:
    print('compute target not found, refer to 02_create_compute_cluster to create compute target...')

Found compute target: aml-compute1


In [4]:
dataset_name = 'anpr_images'
if dataset_name in ws.datasets: 
    anpr_images = ws.datasets.get(dataset_name)
    if anpr_images and type(anpr_images) is FileDataset:
        print('Found FileDataset: ' + dataset_name)
else:
    print('file dataset not found, refer to 01_config_notebook to setup file datasets...')


Found FileDataset: anpr_images


In [5]:
output_dir = PipelineData(name="scores", 
                          datastore=ws.get_default_datastore(), 
                          output_path_on_compute="batchscoring/results")

In [6]:
output_dir

$AZUREML_DATAREFERENCE_scores

# pipeline parameters

In [7]:
# kv = ws.get_default_keyvault()
# print(len(kv.get_secret(pipeline_kv_readapi)))
pipeline_inpart = PipelineParameter(name="pipeline_inpart", default_value='2020/07/28')
pipeline_kv_customimg = PipelineParameter(name="pipeline_kv_customimg", default_value='api-custom-vision')
pipeline_kv_readapi = PipelineParameter(name="pipeline_kv_readapi", default_value='api-readapi')



In [8]:
print(pipeline_inpart)

PipelineParameter_Name:pipeline_inpart_Default:2020/07/28


# python envinronment configuration 

In [9]:
# conda dependencies 
env_name = 'MAG-ParallelRunEnv'
print(env_name in ws.environments)
if env_name in ws.environments: 
    env = ws.environments.get(env_name)
    if env and type(env) is Environment: 
        print('Found environment: ' + env_name)
else: 
    print('environment not found, refer to 01_config_notebook to register envinroment...')


True
Found environment: MAG-ParallelRunEnv


# parallel run config and step

In [10]:
from azureml.data.datapath import DataPath, DataPathComputeBinding
print(pipeline_inpart.default_value)
datastore = Datastore(workspace=ws, name="godzilla")
print(datastore)
print(anpr_images.as_named_input('anpr_images').as_mount())
datapath = DataPath(datastore=datastore, path_on_datastore='images')
print(datapath.path_on_datastore)
data_path2 = (pipeline_inpart, DataPathComputeBinding(mode='mount'))
print(data_path2)


2020/07/28
{
  "name": "godzilla",
  "container_name": "batchml",
  "account_name": "godzillasinastorage",
  "protocol": "https",
  "endpoint": "core.windows.net"
}
<azureml.data.dataset_consumption_config.DatasetConsumptionConfig object at 0x000002349A1EA460>
images
(<azureml.pipeline.core.graph.PipelineParameter object at 0x000002349A1F36D0>, <azureml.data.datapath.DataPathComputeBinding object at 0x000002349A1A69D0>)


In [11]:
# from azureml.core.datastore import Datastore
# from azureml.data.datapath import DataPath, DataPathComputeBinding
# from azureml.pipeline.steps import PythonScriptStep
# from azureml.pipeline.core import PipelineParameter

#    datastore = Datastore(workspace=workspace, name="godzilla")
#    datapath = DataPath(datastore=datastore, path_on_datastore='images')
#    data_path_pipeline_param = (PipelineParameter(name="input_data", default_value=datapath),
#                                DataPathComputeBinding(mode='mount'))

#    train_step = PythonScriptStep(script_name="train.py",
#                                  arguments=["--input", data_path_pipeline_param],
#                                  inputs=[data_path_pipeline_param],
#                                  target=compute_target,
#                                  source_directory=project_folder)

In [15]:
parallelrunconfig = ParallelRunConfig(
    environment=env, 
    entry_script='minibatch_process.py', 
    error_threshold=1,
    output_action='append_row', 
    compute_target=compute_target, 
    node_count=1, 
    process_count_per_node=2,
    mini_batch_size='2',
    source_directory='scripts', 
    description='description of batch step config',
    logging_level='INFO'
)


#parallelrunconfig('script', 'other stuff')
parallelrunstep = ParallelRunStep(
    name='cv-detection-expandedoutput', 
    parallel_run_config=parallelrunconfig, 
    inputs=[anpr_images.as_named_input('anpr_images').as_mount()], 
    # inputs=[ Dataset.File.from_files((godzilla_datastore, 'images')).as_named_input('anpr_images').as_mount()],
    # inputs=[]
    side_inputs=[], 
    output=output_dir,
    arguments=['--input_partition', pipeline_inpart, '--kv_customimage', pipeline_kv_customimg, '--kv_readapi', pipeline_kv_readapi], 
    allow_reuse=False
)


# ('config', 'inputs as mount', 'arguments passing in pipeline args')

In [16]:
parallelrunstep

<azureml.pipeline.steps.parallel_run_step.ParallelRunStep at 0x2349a1a1f10>

# prepare pipeline 

In [17]:
#create pipeline
#publish pipeline 

pipeline = Pipeline(workspace=ws, steps=[parallelrunstep])
pipeline_run = Experiment(ws, 'MAG-batch-score1').submit(pipeline)
pipeline_run.wait_for_completion(show_output=True)

achine Learning Portal: https://ml.azure.com/experiments/MAG-batch-score1/runs/a44260b1-e820-4a0c-a931-a2770f0ad3dd?wsid=/subscriptions/907c8efc-c2c8-4c49-a4e1-aeb880e10c88/resourcegroups/aml/workspaces/magaml
StepRun( cv-detection-expandedoutput ) Status: NotStarted
StepRun( cv-detection-expandedoutput ) Status: Running

Streaming azureml-logs/55_azureml-execution-tvmps_9d3c8af28162c92e62f4d07f051c323a23531e20452c997f3e8444b1790c9c65_d.txt
2020-08-07T01:03:50Z Executing 'Copy ACR Details file' on 10.0.0.4
2020-08-07T01:03:51Z Starting output-watcher...
2020-08-07T01:03:51Z IsDedicatedCompute == True, won't poll for Low Pri Preemption
2020-08-07T01:03:51Z Copy ACR Details file succeeded on 10.0.0.4. Output: 
>>>   
Login Succeeded
Using default tag: latest
latest: Pulling from azureml/azureml_8c501e56007dc25550ab4b175a43b5f1
f7277927d38a: Pulling fs layer
8d3eac894db4: Pulling fs layer
edf72af6d627: Pulling fs layer
3e4f86211d23: Pulling fs layer
d6e9603ff777: Pulling fs layer
5cad4227

'Finished'