In [None]:
# RUN ONCE IF NEEDED. MAY NEED TO ADJUST IF MULTIPLE VERSIONS OF PYTHON INSTALLED

# !pip install azure-functions
# !pip install azure-core
# !pip install azure-identity
# !pip install azure-purview-catalog
# !pip install azure-purview-administration
# !pip install pandas

In [97]:
import os
import json
import datetime
import pandas as pd

from io import BytesIO
from azure.identity import DefaultAzureCredential
from azure.purview.catalog import PurviewCatalogClient

In [2]:
purview_account = 'purviewaccountname'
file_path = 'C:\\temp\\purviewexport\\purview_export.csv'

In [3]:
def purview_client(purview_account):
    credential = DefaultAzureCredential()
    client = PurviewCatalogClient(
        endpoint=f'https://{purview_account}.purview.azure.com', 
        credential=credential,
        logging_enable=True)
    return client

In [63]:
def create_filter():
    filter = {
        "and": [
            {"objectType": "Glossary terms"},
        ]}
    return filter

def create_search_body(keywords, filter):
    search_body = {
        'keywords': keywords if keywords else None,
        'facets': None,
        'filter': filter if filter else None,
    }
    return search_body

def query_to_dataframe(purview_client, keywords, filter):
    search_request = create_search_body(keywords, filter)
    purview_search = purview_client.discovery.query(search_request=search_request)
    search_df = pd.DataFrame.from_dict(purview_search['value'])
    return search_df

In [4]:
def load_file_to_df(file_path):
    df = pd.read_csv(file_path)
    return df

In [80]:
def get_glossaryMappings_df(input_df):
    glossaryMappings_df = pd.DataFrame(columns=[
        'column_guid',
        'glossaryTerm',
    ])

    for index, row in input_df.iterrows():
        if not isinstance(row['glossaryTerms'], float):
            #split content of row into array and trim whitespace
            glossary_terms = [x.strip() for x in row['glossaryTerms'].split(',')]
            for term in glossary_terms:
                item_df =  pd.DataFrame.from_records([{
                    "column_guid": row['column_guid'],
                    "glossaryTerm": term,
                }])
                glossaryMappings_df = pd.concat([glossaryMappings_df, item_df], ignore_index=True)

    glossaryMappings_unique_df = pd.DataFrame(columns=[
        'glossaryTerm_guid',
        'glossaryTerm',
        'column_guids',
    ])

    glossaryTerms = glossaryMappings_df['glossaryTerm'].unique()
    for term in glossaryTerms:
        #get all the rows that have this term
        term_df = glossaryMappings_df[glossaryMappings_df['glossaryTerm'] == term]
        column_guids = term_df['column_guid'].unique()
        item_df =  pd.DataFrame.from_records([{
            "glossaryTerm": term,
            "column_guids": column_guids,
        }])
        glossaryMappings_unique_df = pd.concat([glossaryMappings_unique_df, item_df], ignore_index=True)
        
    return glossaryMappings_unique_df

In [108]:
def pv_assignTerms_Entities(purview_client, term_guid, entities):
    related_object_ids = [{"guid": entity} for entity in entities]
    purview_client.glossary.assign_term_to_entities(term_guid, related_object_ids)

In [74]:
def getTermGUID(purview_client, term):
    pv_term_search_df = query_to_dataframe(purview_client, term, create_filter())
    for i, search_item in pv_term_search_df.iterrows():
        if search_item['displayText'] == term:
            term_guid = search_item['id']
    return term_guid

In [None]:
try:
    purview_client = purview_client(purview_account)
    pv_updates_df = load_file_to_df(file_path)
    glossaryMappings_df = get_glossaryMappings_df(pv_updates_df)
    
    for index, term in glossaryMappings_df.iterrows():
        term_name = term['glossaryTerm']
        term_guid = getTermGUID(purview_client, term_name)
        entities = term['column_guids']
        #if term_guid is valid length
        if len(term_guid) == 36:
            print(f'Assigning term {term_name} to {len(entities)} entities')
            pv_assignTerms_Entities(purview_client, term_guid, entities)
            print('Done.')
except Exception as e:
    print(e)