In [72]:
import json
import azureml.core
from azureml.core import Workspace, Datastore, Dataset, Environment, Experiment
from azureml.data import FileDataset
from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter
from azureml.core.compute import ComputeTarget, AmlCompute


print(azureml.core.VERSION)
version = dict(zip(['major','minor','patch'], azureml.core.VERSION.split('.')))
ws = Workspace.from_config()

1.10.0


In [73]:
if int(version['major']) >= 1: 
    if int(version['minor']) == 10:
        from azureml.pipeline.steps import ParallelRunConfig, ParallelRunStep        
    else:
        from azureml.contrib.pipeline.steps import ParallelRunConfig, ParallelRunStep 

In [74]:
compute_name = "aml-compute1"
vm_size = "STANDARD_DS1_v2"
if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('Found compute target: ' + compute_name)
else:
    print('compute target not found, refer to 02_create_compute_cluster to create compute target...')

Found compute target: aml-compute1


In [75]:
dataset_name = 'anpr_images'
if dataset_name in ws.datasets: 
    anpr_images = ws.datasets.get(dataset_name)
    if anpr_images and type(anpr_images) is FileDataset:
        print('Found FileDataset: ' + dataset_name)
else:
    print('file dataset not found, refer to 01_config_notebook to setup file datasets...')


Found FileDataset: anpr_images


In [76]:
output_dir = PipelineData(name="scores", 
                          datastore=ws.get_default_datastore(), 
                          output_path_on_compute="batchscoring/results")

In [77]:
output_dir

$AZUREML_DATAREFERENCE_scores

# pipeline parameters

In [78]:
# kv = ws.get_default_keyvault()
# print(len(kv.get_secret(pipeline_kv_readapi)))
pipeline_inpart = PipelineParameter(name="pipeline_inpart", default_value='2020/07/28')
pipeline_kv_customimg = PipelineParameter(name="pipeline_kv_customimg", default_value='api-custom-vision')
pipeline_kv_readapi = PipelineParameter(name="pipeline_kv_readapi", default_value='api-readapi')



# python envinronment configuration 

In [79]:
# conda dependencies 
env_name = 'MAG-ParallelRunEnv'
print(env_name in ws.environments)
if env_name in ws.environments: 
    env = ws.environments.get(env_name)
    if env and type(env) is Environment: 
        print('Found environment: ' + env_name)
else: 
    print('environment not found, refer to 01_config_notebook to register envinroment...')


True
Found environment: MAG-ParallelRunEnv


# parallel run config and step

In [80]:
parallelrunconfig = ParallelRunConfig(
    environment=env, 
    entry_script='minibatch_process.py', 
    error_threshold=100,
    output_action='append_row', 
    compute_target=compute_target, 
    node_count=1, 
    mini_batch_size='2',
    source_directory='scripts', 
    description='description of batch step config',
    logging_level='DEBUG'
)

#parallelrunconfig('script', 'other stuff')
parallelrunstep = ParallelRunStep(
    name='cv-detection-batch-step', 
    parallel_run_config=parallelrunconfig, 
    inputs=[anpr_images.as_named_input('anpr_images').as_mount()], 
    side_inputs=[], 
    output=output_dir,
    arguments=['--input_partition', pipeline_inpart, '--kv_customimage', pipeline_kv_customimg, '--kv_readapi', pipeline_kv_readapi], 
    allow_reuse=False
)


# ('config', 'inputs as mount', 'arguments passing in pipeline args')

In [81]:
parallelrunstep

<azureml.pipeline.steps.parallel_run_step.ParallelRunStep at 0x1694b355a60>

# prepare pipeline 

In [82]:
#create pipeline
#publish pipeline 

pipeline = Pipeline(workspace=ws, steps=[parallelrunstep])
pipeline_run = Experiment(ws, 'MAG-batch-score1').submit(pipeline)
pipeline_run.wait_for_completion(show_output=True)

c0de-4e61-b2f5-3c98e304c089?wsid=/subscriptions/907c8efc-c2c8-4c49-a4e1-aeb880e10c88/resourcegroups/aml/workspaces/magaml
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: cc4a0a67-1f23-4bc4-a0ef-18acae5d592e
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/MAG-batch-score1/runs/cc4a0a67-1f23-4bc4-a0ef-18acae5d592e?wsid=/subscriptions/907c8efc-c2c8-4c49-a4e1-aeb880e10c88/resourcegroups/aml/workspaces/magaml
StepRun( cv-detection-batch-step ) Status: NotStarted
StepRun( cv-detection-batch-step ) Status: Running

Streaming azureml-logs/55_azureml-execution-tvmps_772f87f608467c7b669a9494312e17b4611ab96df3e782447d87bd966c43ee51_d.txt
2020-07-29T12:43:18Z Executing 'Copy ACR Details file' on 10.0.0.4
2020-07-29T12:43:18Z Copy ACR Details file succeeded on 10.0.0.4. Output: 
>>>   
>>>   
2020-07-29T12:43:18Z Starting output-watcher...
2020-07-29T12:43:18Z IsDedicatedCompute == True, won't poll for Low Pri Preemption
Login Succeeded
Using default 

'Finished'