# Export Data Sources and Collections
This notebook is an example of how to export sources to CSV.

In [1]:
purviewName = "purview-sandbox"

In [2]:
# Helper Methods
import json
def getJSON(raw_output):
    output = ''.join(raw_output)
    json_obj = json.loads(output)
    return json_obj

import csv
def export(table):
    with open('output.csv', 'w',newline='', encoding="utf-8") as output:
        csv_writer = csv.writer(output)
        csv_writer.writerows(table)

In [3]:
# Get list of all data sources
data = !pv scan readDataSources --purviewName {purviewName}
sources = getJSON(data)
count = sources['count']
print(f'We found {count} sources in account {purviewName}.')

We found 26 sources in account purview-sandbox.


In [4]:
# Get source attributes (name, kind, parent)
mapCollectionParent = {}
sourcesList = []
for source in sources['value']:
    name = source['name']
    kind = source['kind']
    if source['properties']['parentCollection'] is not None:
        parent = source['properties']['parentCollection']['referenceName']
    else:
        parent = None

    if kind == 'Collection' and parent is not None:
        mapCollectionParent[name] = parent
    elif kind != 'Collection':
        item = {
            'name': name,
            'kind': kind,
            'parent': parent
        }
        sourcesList.append(item)

numberOfSources = len(sourcesList)
numberOfCollections = count - numberOfSources
print(f'There are {numberOfSources} sources and {numberOfCollections} collections.')
    

There are 20 sources and 6 collections.


In [5]:
# Build a map of collection name to collection path
mapCollectionPath = {}
for collection in mapCollectionParent:
    hasParent = True
    path = collection
    parentCollection = mapCollectionParent[collection]
    while hasParent:
        path = parentCollection + " > " + path
        if parentCollection in mapCollectionParent:
            parentCollection = mapCollectionParent[parentCollection]
        else:
            hasParent = False
    mapCollectionPath[collection] = path
print('Constructed full path names for each collection.')

Constructed full path names for each collection.


In [6]:
# Export to CSV
table = []
headers = ('name','kind','path')
table.append(headers)

for source in sourcesList:
    parent = source['parent']
    path = mapCollectionPath[parent]
    row = (source['name'], source['kind'], path)
    table.append(row) 

export(table)
print('Sources exported to CSV.')

Sources exported to CSV.
