# Register & Manage Data Sources

You can use the API to register new data sources or update existing ones. For example, registering Azure Data Lake, SQL databases, or other cloud/on-premises data sources.

1. Authentication: The function get_access_token() uses the azure.identity.ClientSecretCredential to obtain an OAuth token to authenticate API requests.

2. Register Data Source: The register_data_source() function uses the Purview REST API to create a new data source by specifying its name, type, and connection string.

3. API Request: The data source is registered by sending a POST request to the /catalog/api/atlas/v2/entity endpoint, with the appropriate headers and JSON body.


In [None]:
import requests
from azure.identity import ClientSecretCredential

In [None]:


# Azure AD and Purview settings
client_id = '<YOUR_CLIENT_ID>'
client_secret = '<YOUR_CLIENT_SECRET>'
tenant_id = '<YOUR_TENANT_ID>'
purview_account_name = '<YOUR_PURVIEW_ACCOUNT_NAME>'
purview_endpoint = f'https://{purview_account_name}.purview.azure.com'

# Authenticate using Azure AD to get the access token
def get_access_token():
    credential = ClientSecretCredential(tenant_id, client_id, client_secret)
    token = credential.get_token('https://purview.azure.net/.default')
    return token.token

# Register a new data source in Purview
def register_data_source(data_source_name, data_source_type, connection_string):
    url = f"{purview_endpoint}/catalog/api/atlas/v2/entity"
    headers = {
        "Authorization": f"Bearer {get_access_token()}",
        "Content-Type": "application/json"
    }

    # Define the body of the request with data source details
    body = {
        "entities": [
            {
                "typeName": data_source_type,  # e.g., 'azure_datalake_gen2_account'
                "attributes": {
                    "name": data_source_name,
                    "qualifiedName": connection_string,
                    "connectionString": connection_string
                }
            }
        ]
    }

    response = requests.post(url, json=body, headers=headers)
    if response.status_code == 200:
        print(f"Data source '{data_source_name}' registered successfully.")
    else:
        print(f"Failed to register data source: {response.text}")

# Example usage
data_source_name = 'my-datalake'
data_source_type = 'azure_datalake_gen2_account'  # This could vary depending on the data source type
connection_string = 'https://mydatalake.dfs.core.windows.net'

register_data_source(data_source_name, data_source_type, connection_string)
