# Call a published pipeline


The process is to: 

1. authenticate and get a token
1. create a dataset using dataset REST api that points to the location of files to be processed
2. call the pipeline REST api and pass in the dataset id in the `DataSetDefinitionValueAssignments` with pipeline parameter name as key
3. poll the pipeline run status api until pipeline completes

In [69]:
import requests
import json
from collections import namedtuple 
import configparser
import time

In [45]:
config = configparser.ConfigParser()
config.read('.azureml/secrets.ini')

tenant_id = config['service_principal']['tenant_id']
client_id = config['service_principal']['client_id']
client_secret = config['service_principal']['svc_pr_password']
auth_request_url = f'https://login.microsoftonline.com/{tenant_id}/oauth2/token'
auth_request_data = f'grant_type=client_credentials&resource=https%3A%2F%2Fmanagement.azure.com%2F&client_id={client_id}&client_secret={client_secret}'


workspaceregion = 'australiaeast'
subscriptionid = config['service_principal']['subscriptionid']
resourcegroupname = config['aml']['resourcegroupname']
workspacename = config['aml']['workspacename']
datastorename = config['aml']['datastorename']


relativepath = 'images/2020/07/28'
dataset_body = {
    'DatastoreName': f'{datastorename}',
    'RelativePath': f'{relativepath}'}


create_dataset_rest = f'https://{workspaceregion}.experiments.azureml.net/dataset/v1.0/subscriptions/{subscriptionid}/resourceGroups/{resourcegroupname}/providers/Microsoft.MachineLearningServices/workspaces/{workspacename}/saveddatasets/from-data-path'

pipeline_endpoint = config['aml']['pipeline_endpoint']

## authenticate and get a token

In [46]:

auth_response = requests.post(auth_request_url,data=auth_request_data)
auth_resp_dict = json.loads(auth_response.content)
auth_header = {'Authorization': 'Bearer ' + auth_resp_dict['access_token']}


## create dataset using REST 

```
https://<your-workspace-region>.experiments.azureml.net/dataset/v1.0/subscriptions/<your-subscription-id>/resourceGroups/<your-resource-group-name>/providers/Microsoft.MachineLearningServices/workspaces/<your-workspace-name>/saveddatasets/from-data-path
```

In [47]:
dataset_rest_response = requests.post(create_dataset_rest,
                         headers=auth_header,
                         json=dataset_body)
dataset_resp_dict = json.loads(dataset_rest_response.content)
dataset_resp_o = json.loads(dataset_resp_dict['dataflowJson'], object_hook = lambda d: namedtuple('X', d.keys())(*d.values()))
saved_dataset_id = dataset_resp_o.meta.savedDatasetId

json.loads(dataset_resp_dict['dataflowJson'])['meta']

{'savedDatasetId': '69bfe260-7f14-4de7-a33b-7bf894858e4c',
 'datasetType': 'file',
 'subscriptionId': '907c8efc-c2c8-4c49-a4e1-aeb880e10c88',
 'workspaceId': 'c744e648-3d1a-4b53-9b48-f8dc50fd0d3f',
 'workspaceLocation': 'australiaeast'}

In [48]:
dataset_resp_dict

{'id': '69bfe260-7f14-4de7-a33b-7bf894858e4c',
 'datasetType': 'file',
 'properties': {},
 'dataflowJson': '{\n  "blocks": [\n    {\n      "id": "558e7bdf-7d66-455a-8bd4-6a8b0303f08c",\n      "type": "Microsoft.DPrep.GetDatastoreFilesBlock",\n      "arguments": {\n        "datastores": [\n          {\n            "datastoreName": "godzilla",\n            "path": "images/2020/07/28",\n            "resourceGroup": "aml",\n            "subscription": "907c8efc-c2c8-4c49-a4e1-aeb880e10c88",\n            "workspaceName": "magaml"\n          }\n        ]\n      },\n      "localData": {},\n      "isEnabled": true,\n      "name": null,\n      "annotation": null\n    }\n  ],\n  "inspectors": [],\n  "meta": {\n    "savedDatasetId": "69bfe260-7f14-4de7-a33b-7bf894858e4c",\n    "datasetType": "file",\n    "subscriptionId": "907c8efc-c2c8-4c49-a4e1-aeb880e10c88",\n    "workspaceId": "c744e648-3d1a-4b53-9b48-f8dc50fd0d3f",\n    "workspaceLocation": "australiaeast"\n  }\n}',
 'dataChanged': None,
 'd

In [49]:
print(saved_dataset_id)

69bfe260-7f14-4de7-a33b-7bf894858e4c


## call pipeline with saved_dataset_id

Must use `DataSetDefinitionValueAssignments`  to pass in a dataset as a pipeline parameter.


In [50]:
pipeline_endpoint

'https://australiaeast.api.azureml.ms/pipelines/v1.0/subscriptions/907c8efc-c2c8-4c49-a4e1-aeb880e10c88/resourceGroups/aml/providers/Microsoft.MachineLearningServices/workspaces/magaml/PipelineRuns/PipelineSubmit/0cae7283-36d6-4b77-84d3-7a38f9c2a0a5'

In [51]:

response = requests.post(pipeline_endpoint,
                         headers=auth_header,
                         json={'ExperimentName': 'BatchScoringPipelineExp-datasetinput',
                               'ParameterAssignments': {
                                   'pipeline_inpart': 'partition1',
                                    'pipeline_kv_customimg': '123',
                                    'pipeline_kv_readapi': '342'},
                                'DataSetDefinitionValueAssignments': {
                                    'pipeline_dataset_id': {
                                        'SavedDataSetReference': {'Id': saved_dataset_id}}}})

In [52]:
response.json().get('Id')

'9e7683b2-c0b1-4e36-a4f0-54544fa686bb'

In [53]:
pipeline_run_id =  response.json().get('Id')

## poll for pipeline run status

poll to [get status](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelinerun?view=azure-ml-py#get-status--)


* NotStarted - This is a temporary state client-side Run objects are in before cloud submission
* Running - The job started to run in the compute target.
* Failed - The run failed. Usually the Error property on a run will provide details as to why.
* Finished - The run completed successfully.
* Canceled - Following cancellation request, the run is now successfully cancelled.


In [54]:
for k,v in config['aml'].items(): 
    print(f'{k}: {v}')

resourcegroupname: aml
workspacename: magaml
datastorename: godzilla
experiment_name: BatchScoringPipelineExp-datasetinput
pipeline_endpoint: https://australiaeast.api.azureml.ms/pipelines/v1.0/subscriptions/907c8efc-c2c8-4c49-a4e1-aeb880e10c88/resourceGroups/aml/providers/Microsoft.MachineLearningServices/workspaces/magaml/PipelineRuns/PipelineSubmit/0cae7283-36d6-4b77-84d3-7a38f9c2a0a5
pipeline_run: https://australiaeast.experiments.azureml.net/history/v1.0/subscriptions/{subscriptionid}/resourceGroups/{resourcegroupname}/providers/Microsoft.MachineLearningServices/workspaces/{workspacename}/experiments/{experiment}/runs/{runid}
subscriptionid: 907c8efc-c2c8-4c49-a4e1-aeb880e10c88


In [66]:
pipeline_run_endpoint = config['aml']['pipeline_run']

In [67]:
pipeline_run_endpoint = pipeline_run_endpoint.format_map({
    'subscriptionid': config['aml']['subscriptionid'],
    'resourcegroupname': config['aml']['resourcegroupname'],
    'workspacename': config['aml']['workspacename'],
    'experiment': config['aml']['experiment_name'],
    'runid': pipeline_run_id
})

def run_get_status(pipeline_run_endpoint, auth_header): 
    return requests.get(pipeline_run_endpoint, headers=auth_header).json().get('status')

In [68]:
run_status = run_get_status(pipeline_run_endpoint, auth_header)
while run_status not in ['Completed', 'Finished', 'Failed']: 
    print(f'Run status {run_status}')
    time.sleep(10)
    run_status = run_get_status(pipeline_run_endpoint, auth_header)
print(f'Run complted with status code : {run_status}')

Run status Running


NameError: name 'time' is not defined

In [62]:
pipeline_run_endpoint

'https://australiaeast.experiments.azureml.net/history/v1.0/subscriptions/907c8efc-c2c8-4c49-a4e1-aeb880e10c88/resourceGroups/aml/providers/Microsoft.MachineLearningServices/workspaces/magaml/experiments/BatchScoringPipelineExp-datasetinput/runs/32b5ce12-0344-4e5e-b644-1d6a81581efd'

In [61]:
requests.get(pipeline_run_endpoint, headers=auth_header).json()

{'runNumber': 9,
 'rootRunId': '32b5ce12-0344-4e5e-b644-1d6a81581efd',
 'experimentId': '537f7c81-7878-4e1b-b5ad-09179b5e0055',
 'createdUtc': '2020-08-04T03:41:25.4124854+00:00',
 'createdBy': {'userObjectId': 'd82fa8db-df2b-4a62-a4d8-6f010546e85d',
  'userPuId': None,
  'userIdp': 'https://sts.windows.net/72f988bf-86f1-41af-91ab-2d7cd011db47/',
  'userAltSecId': None,
  'userIss': 'https://sts.windows.net/72f988bf-86f1-41af-91ab-2d7cd011db47/',
  'userTenantId': '72f988bf-86f1-41af-91ab-2d7cd011db47',
  'userName': '86a53f45-d011-4ba4-8679-ed3a32d4add1'},
 'userId': 'd82fa8db-df2b-4a62-a4d8-6f010546e85d',
 'token': None,
 'tokenExpiryTimeUtc': None,
 'error': None,
 'revision': 4,
 'runUuid': '58fa3166-12b5-48b1-8679-cb5fff8124d8',
 'parentRunUuid': None,
 'rootRunUuid': '58fa3166-12b5-48b1-8679-cb5fff8124d8',
 'runId': '32b5ce12-0344-4e5e-b644-1d6a81581efd',
 'parentRunId': None,
 'status': 'Completed',
 'startTimeUtc': '2020-08-04T03:41:27.4785361+00:00',
 'endTimeUtc': '2020-08-04