In [None]:
"""
********************************************************************************************************************************************************************************************************
Author: Richardson Nascimento
Creation Date: 10/06/2022
Modification Date: 26/09/2022
Purpose: This notebook demonstrate how to move assets between collections in  Microsoft Purview.
Main parameters:
    - QUALIFIED: Qualified Name of the parent object that should be moved along with its hierarchy
    - SOURCE_COLLECTION: The id of the source collection where the assets are currently located
    - TARGET_COLLECTION: The id of the destination collection where the assets should be moved

Pending:
    - Remove sensitive authentication information and start using Key Vault
    - Use only PyApacheAtlas to perform the collection movement. However, this is not yet available, requiring the use of the REST API, which requires another type of authentication
    - Improve performance by using multi-threading/batch processing for larger collections
*******
"""

In [None]:
!pip install pyapacheatlas

In [1]:
from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core import PurviewClient
import requests
from requests.exceptions import HTTPError
import os

In [2]:
###General parameters. Please, replace the values with your own and use environment variables to store sensitive information

# Tenant ID for your Azure Subscription
TENANT_ID = str(os.getenv('TENANT_ID'))

# Your Service Principal App ID
CLIENT = os.getenv('CLIENT_ID')

# Your Service Principal Password
CLIENT_SECRET = os.getenv('CLIENT_SECRET')

# Retrieve the name of the account (Purview name, with no URL. e.g. ContosoPurview)
ACCOUNT = os.getenv('PURVIEW_NAME')

#Endpoint completo da conta do Purview
PURVIEW_ENDPOINT = f"https://{ACCOUNT}.purview.azure.com"

#Microsoft Purview resource
RESOURCE = '73c2949e-da2d-457a-9607-fcc665198967'

#Defines the authentication URL to be used
ms_auth_url = "https://login.microsoftonline.com/" + TENANT_ID

#Version of the API to move assets - always use the latest version
MOVE_API_VERSION = "2022-03-01-preview"

#Versão da API de listagem das coleções
LIST_API_VERSION="2019-11-01-preview" 

In [None]:
#List all collections in a Purview account
def List_Collections(endpoint, token):
    """
    This function list all collections in a Microsoft Purview account
    """
    
    url = endpoint + f"/account/collections?api-version={LIST_API_VERSION}"
    headers = {
        'Authorization': 'Bearer ' + token,
        'Content-Type': 'application/json'
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()

In [None]:
# Create a Service Principal Authentication object for PyApahceAtlas
auth = ServicePrincipalAuthentication(
    tenant_id = TENANT_ID,
    client_id = CLIENT,
    client_secret = CLIENT_SECRET
)

# Create a PurviewClient object for making API calls using REST APIs
context = adal.AuthenticationContext(ms_auth_url)
token = context.acquire_token_with_client_credentials(
    resource=RESOURCE,
    client_id=CLIENT,
    client_secret=CLIENT_SECRET
)

auth_token = token["accessToken"]

In [None]:
#Retrieve the available collections, so you can get the ID of the source and target collections 
collections = List_Collections(PURVIEW_ENDPOINT, token)

for collection in List_Collections(PURVIEW_ENDPOINT, token):
    print(f"Collection ID: {collection['id']}, Name: {collection['name']}")

In [None]:
###Define parameters related to the assets to be moved

#The qualified name of the objects to be moved. Always be aware of the hierarchy of the assets to be moved
QUALIFIED = "asazure://aspaaseastus2.asazure.windows.net/contosoaas/AAS - Contoso"

#ID of the collection where the assets represented by the QUALIFIED NAME are currently located
SOURCE_COLLECTION = "Contoso_HR"

#ID of the target collection where the assets should be moved to 
TARGET_COLLECTION = "abcd"

In [4]:

#Uses PyApacheAtlas to search for the assets to be moved
client = PurviewClient(
    account_name = ACCOUNT,
    authentication = auth
)

#Setup a filter to ensure only the assets that match a given criteria will be moved
filter_setup={"and":[                     
                    {
                        "attributeName":"qualifiedName", 
                        "attributeValue":QUALIFIED,
                        "operator":"contains"
                    },
                    {"collectionId":SOURCE_COLLECTION}
]
    }
                     

"""Alternative filter setup to move only specified EntityTypes in case you need to filter. Any valid filter can be used
filter_setup={
                "and": [      
                    {
                        "attributeName":"qualifiedName", 
                        "attributeValue":QUALIFIED,
                        "operator":"startswith"
                    },
                    {"collectionId":SOURCE_COLLECTION},
                     {
                        "or": [
                             {"entityType":"aas_table"},
                             {"entityType":"aas_column"},
                             {"entityType":"aas_model"}
                        ]
                     }
                ]
            }

### """

#Search for the assets to be moved according to the filter
search = client.discovery.search_entities("*", search_filter=filter_setup)

In [None]:
#Creates a list to store the GUIDs of the assets to be moved
guids=[]
for entity in search:
    #TO-DO: Evaluate better ways to improve the performance
    guids.append(entity['id'])

#Optionally, you can use this opportunity to print the GUIDs of the assets to be moved to validate before proceeding
print(f"{len(guids)} elements to be moved to the new collection.")

In [6]:
#Use the Purview REST COLLECTION API to move the assets to the target collection
if len(guids) > 0:
    endpoint_move = PURVIEW_ENDPOINT + f"/catalog/api/collections/{TARGET_COLLECTION}/entity/moveHere"  
    params = params = {"api-version": MOVE_API_VERSION}
    hed = {'Authorization': 'Bearer ' + auth_token}   

    #Move the assets to the target collection 
    try:    
        response = requests.post(
                        endpoint_move,
                        json={"entityGuids": guids},
                        params=params,
                        headers = hed)       
        response.raise_for_status()
        #Salva os resultados
        resultado = response.json()

    except HTTPError as http_err:
        print(f'HTTP error occurred: {http_err}')
    except Exception as err:
        print(f'Other error occurred: {err}')

In [None]:
#Print the results of the move operation
if "mutatedEntities" in resultado:
    moved=[]
    for elemento in resultado['mutatedEntities']['UPDATE']:    
        moved.append(elemento['attributes']['qualifiedName'])

    print(f"Number of moved assets:  {len(moved)}. List: ")
    print(moved)
else:
    print("No asset has been moved.")