In [None]:
# RUN ONCE IF NEEDED. MAY NEED TO ADJUST IF MULTIPLE VERSIONS OF PYTHON INSTALLED

# !pip install azure-functions
# !pip install azure-core
# !pip install azure-identity
# !pip install azure-purview-catalog
# !pip install azure-purview-administration
# !pip install pandas

In [None]:
import os
import datetime
import pandas as pd

from io import BytesIO
from azure.identity import DefaultAzureCredential
from azure.purview.catalog import PurviewCatalogClient

In [None]:
def create_filter(asset_type):
    filter = {
        "and": [
            {"objectType": "Tables"},
            {"assetType": asset_type}
        ]}
    return filter

def create_search_body(keywords, filter):
    search_body = {
        'keywords': keywords if keywords else None,
        'facets': None,
        'filter': filter if filter else None,
    }
    return search_body

In [None]:
def purview_client(purview_account):
    credential = DefaultAzureCredential()
    client = PurviewCatalogClient(
        endpoint=f'https://{purview_account}.purview.azure.com', 
        credential=credential,
        logging_enable=True)
    return client

In [None]:
def query_to_dataframe(purview_client, keywords, filter):
    search_request = create_search_body(keywords, filter)
    purview_search = purview_client.discovery.query(search_request=search_request)
    search_df = pd.DataFrame.from_dict(purview_search['value'])
    return search_df

def get_asset(purview_client, asset_id):
    asset = purview_client.entity.get_by_guid(asset_id)
    return asset

def get_term_guid(purview_client, term):
    term_guid = purview_client.glossary.get_term_by_name(term)['guid']
    return term_guid

def related_entities_to_dataframe(asset):
    related_entities_df = pd.DataFrame.from_dict(asset['referredEntities'])
    return related_entities_df


In [None]:
def export_to_csv(purview_client, search_df, file_path):
    output_df = pd.DataFrame(columns=[
        'table_guid',
        'column_guid',
        'qualifiedName',
        'assetType',
        'schemaName',
        'tableName',
        'columnName',
        'columnDescription'
    ])

    for iAsset, asset in search_df.iterrows():
        pv_asset = get_asset(purview_client, asset['id'])
        related_df = pd.DataFrame.from_dict(pv_asset['referredEntities'])

        for iEntity, relatedEntity in related_df.items():
            if 'column' in relatedEntity['typeName']:
                col_df = pd.DataFrame.from_records([{
                    'table_guid': asset['id'],
                    'column_guid': relatedEntity['guid'],
                    'qualifiedName': relatedEntity['attributes']['qualifiedName'],
                    'assetType': asset['assetType'][0],
                    'schemaName': pv_asset['entity']['relationshipAttributes']['dbSchema']['displayText'] if 'dbSchema' in pv_asset['entity']['relationshipAttributes'] else None,
                    'tableName': asset['name'],
                    'columnName': relatedEntity['attributes']['name'],
                    'columnDescription': relatedEntity['attributes']['userDescription']
                }])
                output_df = pd.concat([output_df, col_df], ignore_index=True)
    
    fileTS = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
    output_file  = f'{file_path}\purview_export_{fileTS}.csv'

    #create directory from download_path if it doesn't exist
    if not os.path.exists(file_path):
        os.makedirs(file_path)

    pd.DataFrame.to_csv(output_df, path_or_buf=output_file, index=False)
    
    return output_file

In [None]:
purview_account = 'purviewaccountname'
keywords = '*'
asset_type = 'Azure SQL Database'
file_path = 'C:\\temp\\purviewexport'

In [None]:
%env AZURE_TENANT_ID 00000000-0000-0000-0000-000000000000
%env AZURE_CLIENT_ID 00000000-0000-0000-0000-000000000000
%env AZURE_CLIENT_SECRET yourclientsecret

In [None]:
try:
    filter = create_filter(asset_type)
    purview_client = purview_client(purview_account)
    pv_search_df = query_to_dataframe(purview_client, keywords, filter)
    filename = export_to_csv(purview_client, pv_search_df, file_path)
    print(f'File exported successfully to {filename}')
except Exception as e:
    print(e)