#### Bulk Delete
This notebook demonstrates an approach to bulk deleting entities, it demonstrates eeleting the entire contents of a collection using a given batch size.  This example also includes code to synthetically create a number of entities to allow testing of different volumes.

#### Configuration

In [None]:
import yaml, requests, json

# Open the config file to read the client details
with open("../config/config.yaml", "r") as yamlfile:
    config = yaml.load(yamlfile, Loader=yaml.FullLoader)

client_id = config[0]['client_id']
client_secret = config[0]['client_secret']
tenant_id = config[0]['tenant_id']
purview_account_name = config[0]['purview_account_name']
scope = f"{config[0]['resource']}/.default"
purview_endpoint = f"https://{purview_account_name}.purview.azure.com"
authority = f"https://login.microsoftonline.com/{tenant_id}"

access_token = ""

#### Functions

In [None]:
# Function to construct header for HTTP requests
# Parameters:  None
# Returns: Dictionary with header information
##
def getHeaders():
    headers = {
        'Authorization': f'Bearer {access_token}',
        'Content-Type': 'application/json'
    }
    return headers;

In [None]:
# Function to search for all entities in a given Collection.  Returns a searchresult object
# Parameters:  
#   limit:          Batch size to return (50-1000)
#   collection_id:  Collection ID to search
# Returns:  Ordered dict object with Search Results sorted by guid (https://learn.microsoft.com/en-us/rest/api/purview/catalogdataplane/discovery/query?tabs=HTTP#searchresult)
##
def queryCollection(limit, collection_id):

    search_uri = f"{purview_endpoint}/catalog/api/search/query?api-version=2022-03-01-preview"
    payload = json.dumps({
        "keywords": None,
        "limit": limit,
        "filter": {
            "collectionId": collection_id
        }
    })

    response = json.loads(requests.request("POST", search_uri, data=payload, headers=getHeaders()).content)
    return response

#### OAuth Login
Perform Authentication using the Microsoft Authentication Library and get a bearer token for subsequent API calls

In [None]:
from msal import ConfidentialClientApplication

app = ConfidentialClientApplication(client_id, authority=authority, client_credential=client_secret)
result = app.acquire_token_for_client(scopes=scope)

access_token = result['access_token']

#### Initial Setup
(Optional) Create a collection and populate it with some assets

In [None]:
# Initial setup:  Create a collection and keep a reference to it

collection_id = "bulk-delete-collection"
collection_name = "Bulk Delete Collection"

# Create a collection for the assets
uri = f"{purview_endpoint}/account/collections/{collection_id}?api-version=2019-11-01-preview"
payload = json.dumps({
    "friendlyName": collection_name,
    "parentCollection": {
        "referenceName": purview_account_name
    }
})
response = json.loads(requests.request("PUT", uri, headers=getHeaders(), data=payload).content)

In [None]:
num_entities_to_create = 1000         # Define the number of entities to create (creates Azure SQL tables)
entities = []                         # Initialise a list to store each entity JSON
fqdn_prefix = "mssql://foo-sqlsrvr.database.windows.net/foo-sqldb/foo-schema/"

# Loop and create the required asset definitions
i = 1
while i <= num_entities_to_create:

    id = str(i).zfill(5)
    entity = {
      "typeName": "azure_sql_table",
      "guid": f"-{i}",
      "attributes": {
        "name": f"Table {id}", 
        "qualifiedName": f"{fqdn_prefix}/foo-table-{id}",
        "description": f"Bulk created asset Test-Table-{id}"
      }
    }
    entities.append(entity)
    i += 1

# Define the (collection) bulk create or update URI and push the JSON payload to it
uri = f'{purview_endpoint}/catalog/api/collections/{collection_id}/entity/bulk?api-version=2022-03-01-preview'
payload = json.dumps({
  "referredEntities": {},
  "entities": entities
})
response = json.loads(requests.request("POST", uri, headers=getHeaders(), data=payload).content)


#### Delete the entities in the Collection

In [None]:
num_entities_for_deletion = 1
delete_batch_size = 100     # Do not set this value too high as it will cause potential 414 errors (URI too long)

while num_entities_for_deletion > 0:
  
  entities_for_deletion = queryCollection(delete_batch_size, collection_id)
  num_entities_for_deletion = entities_for_deletion['@search.count']
  print(f"{num_entities_for_deletion} entities remaining for deletion")

  delete_uri = f"{purview_endpoint}/catalog/api/atlas/v2/entity/bulk?"
  for i in entities_for_deletion['value']:
    delete_uri = delete_uri + f"guid={i['id']}&"
  
  response = requests.request("DELETE", delete_uri[:-1], headers=getHeaders())
  print(response)

In [None]:
#Delete the collection
delete_uri = f"{purview_endpoint}/collections/{collection_id}?api-version=2019-11-01-preview"
headers = getHeaders()
requests.request("DELETE", delete_uri, headers=headers)