In [9]:
import requests
import json
from collections import namedtuple 
import configparser

In [10]:
config = configparser.ConfigParser()
config.read('.azureml/secrets.ini')

tenant_id = config['service_principal']['tenant_id']
client_id = config['service_principal']['client_id']
client_secret = config['service_principal']['svc_pr_password']
auth_request_url = f'https://login.microsoftonline.com/{tenant_id}/oauth2/token'
auth_request_data = f'grant_type=client_credentials&resource=https%3A%2F%2Fmanagement.azure.com%2F&client_id={client_id}&client_secret={client_secret}'


workspaceregion = 'australiaeast'
subscriptionid = config['service_principal']['subscriptionid']
resourcegroupname = config['aml']['resourcegroupname']
workspacename = config['aml']['workspacename']
datastorename = config['aml']['datastorename']


relativepath = 'images/2020/07/28'
dataset_body = {
    'DatastoreName': f'{datastorename}',
    'RelativePath': f'{relativepath}'}


create_dataset_rest = f'https://{workspaceregion}.experiments.azureml.net/dataset/v1.0/subscriptions/{subscriptionid}/resourceGroups/{resourcegroupname}/providers/Microsoft.MachineLearningServices/workspaces/{workspacename}/saveddatasets/from-data-path'

pipeline_endpoint = config['aml']['pipeline_endpoint']

In [11]:

auth_response = requests.post(auth_request_url,data=auth_request_data)
auth_resp_dict = json.loads(auth_response.content)
auth_header = {'Authorization': 'Bearer ' + auth_resp_dict['access_token']}


# create dataset using REST 

```
https://<your-workspace-region>.experiments.azureml.net/dataset/v1.0/subscriptions/<your-subscription-id>/resourceGroups/<your-resource-group-name>/providers/Microsoft.MachineLearningServices/workspaces/<your-workspace-name>/saveddatasets/from-data-path
```

In [12]:
dataset_rest_response = requests.post(create_dataset_rest,
                         headers=auth_header,
                         json=dataset_body)
dataset_resp_dict = json.loads(dataset_rest_response.content)
dataset_resp_o = json.loads(dataset_resp_dict['dataflowJson'], object_hook = lambda d: namedtuple('X', d.keys())(*d.values()))
saved_dataset_id = dataset_resp_o.meta.savedDatasetId

json.loads(dataset_resp_dict['dataflowJson'])['meta']

{'savedDatasetId': '69bfe260-7f14-4de7-a33b-7bf894858e4c',
 'datasetType': 'file',
 'subscriptionId': '907c8efc-c2c8-4c49-a4e1-aeb880e10c88',
 'workspaceId': 'c744e648-3d1a-4b53-9b48-f8dc50fd0d3f',
 'workspaceLocation': 'australiaeast'}

In [13]:
dataset_resp_dict

{'id': '69bfe260-7f14-4de7-a33b-7bf894858e4c',
 'datasetType': 'file',
 'properties': {},
 'dataflowJson': '{\n  "blocks": [\n    {\n      "id": "a81cd1d3-517e-4501-87e6-c2d184d6d833",\n      "type": "Microsoft.DPrep.GetDatastoreFilesBlock",\n      "arguments": {\n        "datastores": [\n          {\n            "datastoreName": "godzilla",\n            "path": "images/2020/07/28",\n            "resourceGroup": "aml",\n            "subscription": "907c8efc-c2c8-4c49-a4e1-aeb880e10c88",\n            "workspaceName": "magaml"\n          }\n        ]\n      },\n      "localData": {},\n      "isEnabled": true,\n      "name": null,\n      "annotation": null\n    }\n  ],\n  "inspectors": [],\n  "meta": {\n    "savedDatasetId": "69bfe260-7f14-4de7-a33b-7bf894858e4c",\n    "datasetType": "file",\n    "subscriptionId": "907c8efc-c2c8-4c49-a4e1-aeb880e10c88",\n    "workspaceId": "c744e648-3d1a-4b53-9b48-f8dc50fd0d3f",\n    "workspaceLocation": "australiaeast"\n  }\n}',
 'dataChanged': None,
 'd

In [14]:
print(saved_dataset_id)

69bfe260-7f14-4de7-a33b-7bf894858e4c


# call pipeline with saved_dataset_id

Must use `DataSetDefinitionValueAssignments`  to pass in a dataset as a pipeline parameter.


In [15]:
pipeline_endpoint

'https://australiaeast.api.azureml.ms/pipelines/v1.0/subscriptions/907c8efc-c2c8-4c49-a4e1-aeb880e10c88/resourceGroups/aml/providers/Microsoft.MachineLearningServices/workspaces/magaml/PipelineRuns/PipelineSubmit/0cae7283-36d6-4b77-84d3-7a38f9c2a0a5'

In [18]:

response = requests.post(pipeline_endpoint,
                         headers=auth_header,
                         json={'ExperimentName': 'BatchScoringPipelineExp-datasetinput',
                               'ParameterAssignments': {
                                   'pipeline_inpart': 'partition1',
                                    'pipeline_kv_customimg': '123',
                                    'pipeline_kv_readapi': '342'},
                                'DataSetDefinitionValueAssignments': {
                                    'pipeline_dataset_id': {
                                        'SavedDataSetReference': {'Id': saved_dataset_id}}}})

In [19]:
json.loads(response.content)

{'Description': None,
 'Status': {'StatusCode': 0,
  'StatusDetail': None,
  'CreationTime': '2020-08-04T01:06:39.2244314Z',
  'EndTime': None},
 'GraphId': '1f47a4e3-30b0-46da-a39d-ed8dd5afaca4',
 'IsSubmitted': False,
 'HasErrors': False,
 'UploadState': 0,
 'ParameterAssignments': {'pipeline_inpart': 'partition1',
  'pipeline_kv_customimg': '123',
  'pipeline_kv_readapi': '342'},
 'DataPathAssignments': {},
 'DataSetDefinitionValueAssignments': {'pipeline_dataset_id': {'LiteralValue': None,
   'DataSetReference': None,
   'SavedDataSetReference': {'Id': '69bfe260-7f14-4de7-a33b-7bf894858e4c'}}},
 'RunHistoryExperimentName': 'BatchScoringPipelineExp-datasetinput',
 'PipelineId': '0cae7283-36d6-4b77-84d3-7a38f9c2a0a5',
 'RunSource': 'Unavailable',
 'RunType': 0,
 'TotalRunSteps': 1,
 'ScheduleId': None,
 'RunUrl': 'https://ml.azure.com/experiments/BatchScoringPipelineExp-datasetinput/runs/c56bf521-1595-46fd-9695-3994258d4ee3?tid=72f988bf-86f1-41af-91ab-2d7cd011db47&wsid=/subscriptions