#### Glossary Terms to Entity Scale Test
This notebook was created in response to a question that came up from a colleague, namely how many Terms can be added to an entity?  This is not a question that is easily answered, as it depends on the size of the Terms, the size of the entity, number of relationships etc.  This notebook does attempt to get a rough baseline based on a simple entity definition.

#### Configuration

In [1]:
import yaml, requests, json

# Open the config file to read the client details
with open("../config/config.yaml", "r") as yamlfile:
    config = yaml.load(yamlfile, Loader=yaml.FullLoader)

client_id = config[0]['client_id']
client_secret = config[0]['client_secret']
tenant_id = config[0]['tenant_id']
purview_account_name = config[0]['purview_account_name']
scope = f"{config[0]['resource']}/.default"
purview_endpoint = f"https://{purview_account_name}.purview.azure.com"
authority = f"https://login.microsoftonline.com/{tenant_id}"

access_token = ""

#### Functions

In [2]:
# Function to construct header for HTTP requests
# Parameters:  None
# Returns: Dictionary with header information
##
def getHeaders():
    headers = {
        'Authorization': f'Bearer {access_token}',
        'Content-Type': 'application/json'
    }
    return headers;

#### OAuth Login
Perform Authentication using the Microsoft Authentication Library and get a bearer token for subsequent API calls

In [65]:
from msal import ConfidentialClientApplication

app = ConfidentialClientApplication(client_id, authority=authority, client_credential=client_secret)
result = app.acquire_token_for_client(scopes=scope)

access_token = result['access_token']

#### Initial Glossary Setup
Let's start by creating some glossary terms to work with.  We'll create a new glossary and create 1000 terms using the Excel upload, each with a random name and description..

In [None]:
# Initial setup:  Create the Glossary and get a reference to it's guid
glossary_name = "Test Glossary"

glossary_uri = f"{purview_endpoint}/catalog/api/atlas/v2/glossary"
payload = json.dumps({
    "name": f"{glossary_name}",
    "longDescription": "Test Glossary",
    "Language": "en",
    "usage": "Used to test API calls"
})
response = json.loads(requests.request("POST", glossary_uri, headers=getHeaders(), data=payload).content)

glossary_guid = response['guid']
print(response)

In [6]:
import csv
num_terms_to_create = 1000

filename = "glossary_template.csv"
file_headers = ["Name", "Nick Name", "Status", "Definition", "Acronymn", "Resources", "Related Terms", "Synonymns", "Stewards", "Experts", "Parent Term Name", "IsDefinitionRichText", "Term Template Names" ]
file_rows = []


# Create a new file and add the header row
with open(filename, 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(file_headers)

    for row in range(num_terms_to_create):
        file_rows.append([
            f"Name {row}",
            f"Nick Name {row}",
            "Draft",
            f"Definition {row}",
            f"Acronymn {row}",
            f"Microsoft Purview Project:https://web.purview.azure.com;Azure portal:https://portal.azure.com;",
            "",
            f"Synonymns {row}",
            "",
            "",
            "",
            "false",
            "System default"
        ])
    
    csvwriter.writerows(file_rows)


#### Uploading the Glossary Term csv
Now that we have created the template, we can upload it using the Import Glossary Terms Via CSV API (https://learn.microsoft.com/en-us/rest/api/purview/catalogdataplane/glossary/import-glossary-terms-via-csv-by-glossary-name?tabs=HTTP)

In [None]:
# Define the URI for the import operation
import_uri = f"{purview_endpoint}/catalog/api/glossary/name/{requests.utils.quote(glossary_name)}/terms/import?includeTermHierarchy=true&api-version=2022-08-01-preview"

payload = {}

# Define a tuple with the file to upload
files = [
    ('file', (filename, open(filename, 'rb'), 'text/csv'))
    ]

# Construct the header but remove the Content-Type
headers = getHeaders()
headers.pop('Content-Type')

response = requests.request("POST", import_uri, headers=headers, data=payload, files=files)

# Get the operation id so we can check the status in the next cell
operation_guid = json.loads(response.content)['id']

print(response.text)

In [None]:
# Uri to check the status of the import operation
status_uri = f"{purview_endpoint}/catalog/api/glossary/terms/import/{operation_guid}?api-version=2022-08-01-preview"

headers = getHeaders()
response = json.loads(requests.request("GET", status_uri, headers=headers).content)
print(response)

#### Create the entity and the glossary term relationships

In [66]:
# Start by getting a list of glossaries
glossaries_uri = f"{purview_endpoint}/catalog/api/atlas/v2/glossary?ignoreTermsAndCategories=true&api-version=2022-08-01-preview"

headers = getHeaders()
response = json.loads(requests.request("GET", glossaries_uri, headers=headers).content)

# Get the guid for the glossary we created
glossary_guid = [(x['guid']) for x in response if x['name'] == glossary_name][0]

In [67]:
# Now get the terms guids from the glossary
term_headers_uri = f"{purview_endpoint}/catalog/api/atlas/v2/glossary/{glossary_guid}/terms/headers?api-version=2022-08-01-preview"

headers = getHeaders()
response = json.loads(requests.request("GET", term_headers_uri, headers=headers).content)

In [None]:
term_guids = []
for term in response:
    #print(term)
    term_json = {"guid": term['termGuid'], "typeName": "AtlasGlossaryTerm"}
    term_guids.append(term_json)

In [90]:
# Now create a basic entity and add the term relationships to it
entity_uri = f"{purview_endpoint}/catalog/api/atlas/v2/entity"

headers = getHeaders()
payload = json.dumps({
    "entity": {
        "typeName": "DataSet",
        "guid": "-1",
        "attributes": {
            "name": "Test Entity",
            "qualifiedName": "Test Entity",
            "description": "Test Entity"
        },
        "relationshipAttributes": {
            "meanings": term_guids
        }
    }
})

response = json.loads(requests.request("POST", entity_uri, headers=headers, data=payload).content)
entity_guid = response['guidAssignments'].get('-1')

#### Cleanup
Delete the Entity and the Glossary that were created.  

In [None]:
# Delete the entity
delete_guids_uri = f"{purview_endpoint}/catalog/api/atlas/v2/entity/guid/{entity_guid}"
requests.request("DELETE", delete_guids_uri, headers=getHeaders())

In [None]:
# Delete the Glossary Terms
term_guids_to_delete = [guid['guid'] for guid in term_guids]

delete_bulk_terms_uri = f"{purview_endpoint}/catalog/api/glossary/terms:delete?forceDeleteEntityAssignment=true&api-version=2022-08-01-preview"
payload = json.dumps(term_guids_to_delete)

headers =getHeaders()
response = json.loads(requests.request("POST", delete_bulk_terms_uri, headers=headers, data=payload).content)

operation_guid = json.loads(response.content)['id']

In [None]:
# Uri to check the status of the delete operation
status_uri = f"{purview_endpoint}/catalog/api/glossary/terms/bulkDeletion/{operation_guid}?api-version=2022-08-01-preview"

headers = getHeaders()
response = json.loads(requests.request("GET", status_uri, headers=headers).content)
print(response)

In [None]:
#Delete the glossary
delete_uri = f"{purview_endpoint}/catalog/api/atlas/v2/glossary/{glossary_guid}"
headers = getHeaders()
requests.request("DELETE", delete_uri, headers=headers)
