# 1. Authenticate to MS

In [36]:
from azure.identity import DefaultAzureCredential
from azure.mgmt.datafactory import DataFactoryManagementClient
import os
from dotenv import load_dotenv
import json

In [2]:
# Load the .env file
load_dotenv()

True

In [3]:
# Create client
subscription_id = os.getenv("AZURE_SUBSCRIPTION_ID")
credential = DefaultAzureCredential()

client = DataFactoryManagementClient(credential, subscription_id)

# 2. Get data from DataFactory 

In [43]:
resource_group = os.getenv("RESOURCE_GROUP")
data_factory_name = os.getenv("DF_NAME")

In [44]:
# --- Collect Pipelines ---
pipeline_list = []
activities_list = []

pipelines = client.pipelines.list_by_factory(resource_group, data_factory_name)

for pipeline in pipelines:
    # Pipeline Metadata
    pipeline_entry = {
        "name": pipeline.name,
        "id": pipeline.id,
        "type": pipeline.type,
        "description": pipeline.description,
        "url": build_pipeline_url(
            sub_id=subscription_id,
            resource_group=resource_group,
            factory_name=data_factory_name,
            pipeline_name=pipeline.name
        )
    }
    pipeline_list.append(pipeline_entry)

    # Get pipeline definition to extract activities
    if pipeline.activities:
        for act in pipeline.activities:
            activity_entry = {
                "pipeline_name": pipeline.name,
                "activity_name": act.name,
                "type": act.type,
                "description": act.description,
            }
            activities_list.append(activity_entry)

# --- Export to JSON files ---
with open("pipeline.json", "w", encoding="utf-8") as f:
    json.dump(pipeline_list, f, indent=2)

with open("activities.json", "w", encoding="utf-8") as f:
    json.dump(activities_list, f, indent=2)

print("✅ Export complete: pipeline.json and activities.json")

✅ Export complete: pipeline.json and activities.json


In [None]:
# --- Collect Pipelines ---
pipeline_run_list = []
activities_list = []

pipelines = client.pipelines.list_by_factory(resource_group, data_factory_name)

for pipeline in pipelines:
    # Pipeline Metadata
    pipeline_entry = {
        "name": pipeline.name,
        "id": pipeline.id,
        "type": pipeline.type,
        "description": pipeline.description,
        "url": build_pipeline_url(
            sub_id=subscription_id,
            resource_group=resource_group,
            factory_name=data_factory_name,
            pipeline_name=pipeline.name
        )
    }
    pipeline_list.append(pipeline_entry)

    # Get pipeline definition to extract activities
    if pipeline.activities:
        for act in pipeline.activities:
            activity_entry = {
                "pipeline_name": pipeline.name,
                "activity_name": act.name,
                "type": act.type,
                "description": act.description,
            }
            activities_list.append(activity_entry)

# --- Export to JSON files ---
with open("pipeline.json", "w", encoding="utf-8") as f:
    json.dump(pipeline_list, f, indent=2)

with open("activities.json", "w", encoding="utf-8") as f:
    json.dump(activities_list, f, indent=2)

print("✅ Export complete: pipeline.json and activities.json")