**Bulk Assign Owners**

This notebook demonstrates how to assign all the existing assets to an owners (and experts) with one user account (AAD OID). The approach uses the bulk update method and uses an order list of guids to effectively paginate through the search results which can be filtered by fqdn prefix in the queryForNextBatch function.

In [7]:

import  requests, json

tenant_id = ""
client_id = ""
client_secret = ""
purview_account_name = ""
owner_oid = "" 

purview_endpoint = f"https://{purview_account_name}.purview.azure.com"
authority = f"https://login.microsoftonline.com/{tenant_id}"

access_token = ""

StatementMeta(genSpark, 4, 8, Finished, Available)

In [8]:
from msal import ConfidentialClientApplication

app = ConfidentialClientApplication(client_id, authority=authority, client_credential=client_secret)
scope = f"https://purview.azure.net/.default"

result = app.acquire_token_for_client(scopes=scope)
access_token = result['access_token']

StatementMeta(genSpark, 4, 9, Finished, Available)

In [9]:
def getHeaders():
    headers = {
        'Authorization': f'Bearer {access_token}',
        'Content-Type': 'application/json'
    }
    return headers;
    
def queryForNextBatch(limit, fqdn, guid):

    search_uri = f"{purview_endpoint}/catalog/api/search/query?api-version=2022-03-01-preview"
    payload = json.dumps({
        "keywords": None,
        "limit": limit,
        "filter": {
            "and": [
                {
                    "id": {
                        "operator": "gt",
                        "value": guid
                    }

                },
                {
                    "attributeName": "qualifiedName",
                    "operator": "startswith",
                    "attributeValue": fqdn           
                }
            ]
        },
        "offset": 0,
        "limit": limit,
        "orderby": [{
            "id": "asc"
        }]
    })

    response = json.loads(requests.request("POST", search_uri, data=payload, headers=getHeaders()).content)
    return response

StatementMeta(genSpark, 4, 10, Finished, Available)

In [10]:
import math

uri = f'{purview_endpoint}/catalog/api/atlas/v2/entity/bulk?api-version=2022-03-01-preview'
#Add a prefix here to restrict the search results to a certain datasource name 
fqdn_prefix = "https://"
batch_size = 100

# Get the first batch (search using the fqdn and using zero guid, effectively starting from the first ordered guid)
results = queryForNextBatch(batch_size, fqdn_prefix, "0")

# Get the total number of results from the search
total_num_of_entities = int(results.get("@search.count"))
print(f"Total number of entities to update: {total_num_of_entities}")

# Calculate the number of batches needed
number_of_batches = total_num_of_entities / batch_size
number_of_batches = math.ceil(number_of_batches)

print(f"number of batches needed with batch size of {batch_size}: {int(number_of_batches)}")

# Loop through each batch 
for batch in range(int(number_of_batches)):
  print(f"Processing batch {batch+1} of {int(number_of_batches)}")
  
  # Reset the list of guids
  batch_guids = []

  # Get the list of guids from the current query
  for guids in results['value']:
    #to assign both experts and owners change the contacts section to "contacts": {Expert":[{"id":"3b224ebd-aa02-4b46-b8b7-63d6831f1734","info":""}],"Owner":[{"id":f"{owner_oid}","info":"ext 2553"}]}
    this_guid = {"typeName": f"{guids['entityType']}","guid": f"{guids['id']}",
                "attributes": {
                    "name": f"{guids['name']}", 
                    "qualifiedName": f"{guids['qualifiedName']}"
                },
                "contacts":{"Owner":[{"id":f"{owner_oid}","info":"ext 2553"}]}}
    batch_guids.append(this_guid)

  # For this guid batch, call the bulk assign API
  payload = json.dumps({"referredEntities": {},"entities": batch_guids})
  response = requests.request("POST", uri, headers=getHeaders(), data=payload)
  
  # Any throttling or timeouts will show up in the response so
  # we can catch exceptions here if response other than 204.
  # We still have the guids for this batch so could implement some retry logic,
  # but for now just print the response
  print(response)

  # Query for the next batch passing in the last guid processed
  results = queryForNextBatch(batch_size, fqdn_prefix, this_guid['guid'])
print("Done")

StatementMeta(genSpark, 4, 11, Finished, Available)

Total number of entities to update: 75
number of batches needed with batch size of 100: 1
Processing batch 1 of 1
<Response [200]>
Done
