In [1]:
# DO NOT DELETE THIS CELL

# API Name: Gateways - List Gateway Role Assignments
# Command:  GET https://api.fabric.microsoft.com/v1/gateways/{gatewayId}/roleAssignments
# Doc:      https://learn.microsoft.com/en-us/rest/api/fabric/core/gateways/list-gateway-role-assignments

# Loads table: fabric_onprem_gateway_role_assignments

# Note: this queries the fabric_onprem_gateways table to get a list of gatewayId values for the API calls. See line 189.

StatementMeta(, 11ec3ce4-fd3e-4b9d-84ac-f70fa01ab54a, 3, Finished, Available, Finished)

In [2]:
# CELL 1 - Title and Introduction
# ==================================
# Microsoft Fabric Gateway Role Assignments to Delta Lake - PySpark Notebook
# This notebook retrieves Microsoft Fabric on-premises gateway role assignments and loads them into a Delta Lake table
# with optimization for analytics workloads
# ==================================


# CELL 2 - Import Libraries
# ==================================
# Import required libraries
import requests
import json
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, current_timestamp, lit, from_json, explode
from pyspark.sql.types import StructType, StructField, StringType, TimestampType, BooleanType, IntegerType, ArrayType
import logging
from typing import Dict, List, Optional
from delta.tables import DeltaTable
import random
import time
# ==================================


# CELL 3 - Configure Logging and Initialize Spark
# ==================================
# Configure logging
# This helps us track what's happening in our code and debug issues
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Initialize Spark Session with Delta Lake configurations
# In Fabric notebooks, Spark is pre-configured with Delta support
spark = SparkSession.builder \
    .appName("FabricGatewayRoleAssignmentsToDelta") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .getOrCreate()

# Set optimal configurations for Delta operations
spark.conf.set("spark.databricks.delta.optimizeWrite.enabled", "true")
spark.conf.set("spark.databricks.delta.autoCompact.enabled", "true")
# ==================================


# CELL 4 - Configuration Parameters
# ==================================
# Configuration Parameters
# These are the settings we'll use throughout the notebook
CONFIG = {
    "API_BASE_URL": "https://api.fabric.microsoft.com/v1",
    "MAX_RETRIES": 5,  # Increased number of retries for handling rate limits
    "INITIAL_BACKOFF_SEC": 1,  # Initial backoff time in seconds
    "MAX_BACKOFF_SEC": 60,  # Maximum backoff time in seconds
    "BACKOFF_FACTOR": 2,  # Exponential backoff multiplier
    "JITTER_FACTOR": 0.1,  # Random jitter to add to backoff (as a fraction)
    "PAGE_SIZE": 50,  # Reduced number of items per page to avoid hitting rate limits
    "TIMEOUT": 30,  # API request timeout in seconds
    "GATEWAY_TABLE_NAME": "fabric_onprem_gateways",  # Name of the source gateways Delta table
    "ROLE_ASSIGNMENTS_TABLE_NAME": "fabric_onprem_gateway_role_assignments",  # Name of the target role assignments Delta table
    "LAKEHOUSE_PATH": "Tables"  # Default Tables folder in Fabric Lakehouse
}
# ==================================


# CELL 5 - Authentication Function
# ==================================
def get_access_token():
    """
    Get Azure AD access token for Fabric API authentication.
    
    In a Fabric notebook, the token is automatically available through mssparkutils.
    This function retrieves the token that's needed to authenticate with the Fabric REST API.
    
    Returns:
        str: The access token
    
    Note:
        mssparkutils is a utility library provided by Microsoft Fabric
        that handles authentication automatically.
    """
    try:
        # In Fabric notebooks, we can get the token using mssparkutils
        from notebookutils import mssparkutils
        token_response = mssparkutils.credentials.getToken("https://api.fabric.microsoft.com")
        return token_response
    except Exception as e:
        logger.error(f"Failed to get access token: {str(e)}")
        raise
# ==================================


# CELL 6 - API Call Function
# ==================================
def call_fabric_api(endpoint: str, access_token: str, params: Optional[Dict] = None) -> Dict:
    """
    Make a REST API call to Microsoft Fabric with advanced rate limit handling.
    
    This function handles the HTTP request to the Fabric API, including:
    - Setting up authentication headers
    - Managing retries with intelligent backoff for rate limiting (429 errors)
    - Implementing jitter to avoid synchronized retries
    - Detailed error handling and logging
    
    Args:
        endpoint: The API endpoint path (e.g., "/gateways/{gatewayId}/roleAssignments")
        access_token: The Azure AD access token
        params: Optional query parameters for the API call
    
    Returns:
        dict: The JSON response from the API
    
    Raises:
        requests.exceptions.RequestException: If the API call fails after all retries
    """
    url = f"{CONFIG['API_BASE_URL']}{endpoint}"
    headers = {
        "Authorization": f"Bearer {access_token}",
        "Content-Type": "application/json"
    }
    
    # Initialize backoff time
    backoff_time = CONFIG['INITIAL_BACKOFF_SEC']
    
    # Retry logic with intelligent backoff
    for attempt in range(CONFIG['MAX_RETRIES']):
        try:
            logger.info(f"Making API call to: {url} (Attempt {attempt + 1})")
            
            response = requests.get(
                url,
                headers=headers,
                params=params,
                timeout=CONFIG['TIMEOUT']
            )
            
            # Rate limit handling (429 Too Many Requests)
            if response.status_code == 429:
                # Get retry-after header if available, otherwise use our backoff
                retry_after = response.headers.get('Retry-After')
                
                if retry_after and retry_after.isdigit():
                    # If server specified a wait time, use it
                    wait_time = int(retry_after)
                else:
                    # Calculate wait time with exponential backoff and jitter
                    jitter = random.uniform(0, CONFIG['JITTER_FACTOR'] * backoff_time)
                    wait_time = backoff_time + jitter
                    # Update backoff for next attempt
                    backoff_time = min(backoff_time * CONFIG['BACKOFF_FACTOR'], CONFIG['MAX_BACKOFF_SEC'])
                
                logger.warning(f"Rate limit exceeded (429). Waiting {wait_time:.2f} seconds before retry.")
                time.sleep(wait_time)
                continue  # Skip to next retry attempt without raising exception
            
            # For all other status codes, use raise_for_status to handle
            response.raise_for_status()
            
            # If we get here, the request was successful
            # Reset backoff for next API call (not next attempt)
            backoff_time = CONFIG['INITIAL_BACKOFF_SEC']
            
            return response.json()
            
        except requests.exceptions.RequestException as e:
            last_attempt = attempt == CONFIG['MAX_RETRIES'] - 1
            
            # Special handling for non-429 errors
            if not (hasattr(e, 'response') and e.response is not None and e.response.status_code == 429):
                logger.warning(f"API call failed (Attempt {attempt + 1}): {str(e)}")
                
                if last_attempt:
                    logger.error(f"All retry attempts failed for endpoint: {endpoint}")
                    logger.error(f"Final error: {str(e)}")
                    raise
                
                # Calculate wait time with exponential backoff and jitter
                jitter = random.uniform(0, CONFIG['JITTER_FACTOR'] * backoff_time)
                wait_time = backoff_time + jitter
                # Update backoff for next attempt
                backoff_time = min(backoff_time * CONFIG['BACKOFF_FACTOR'], CONFIG['MAX_BACKOFF_SEC'])
                
                logger.info(f"Waiting {wait_time:.2f} seconds before retry.")
                time.sleep(wait_time)
# ==================================


# CELL 7 - Read Gateway IDs from Delta Table
# ==================================
def get_gateway_ids_from_delta():
    """
    Retrieve all gateway IDs from the existing gateway Delta table.
    
    This function queries the fabric_onprem_gateways Delta table to get
    the list of gateway IDs that we need to retrieve role assignments for.
    
    Returns:
        list: A list of gateway ID strings
    """
    try:
        # Check if the gateway table exists
        spark.sql(f"DESCRIBE TABLE {CONFIG['GATEWAY_TABLE_NAME']}")
        
        # Query the table to get all gateway IDs
        gateway_ids_df = spark.sql(f"SELECT id FROM {CONFIG['GATEWAY_TABLE_NAME']}")
        
        # Convert to a Python list
        gateway_ids = [row.id for row in gateway_ids_df.collect()]
        
        logger.info(f"Retrieved {len(gateway_ids)} gateway IDs from Delta table")
        return gateway_ids
        
    except Exception as e:
        logger.error(f"Failed to get gateway IDs from Delta table: {str(e)}")
        logger.warning("No existing gateways found in Delta table. Please run the gateway extraction first.")
        return []
# ==================================


# CELL 8 - Get Gateway Role Assignments Function
# ==================================
def get_gateway_role_assignments(gateway_id: str, access_token: str) -> List[Dict]:
    """
    Retrieve all role assignments for a specific gateway, handling pagination if necessary.
    
    This function calls the Gateway Role Assignments API endpoint for a specific gateway ID
    and handles pagination to get all role assignments.
    
    Args:
        gateway_id: The ID of the gateway to get role assignments for
        access_token: The Azure AD access token
    
    Returns:
        list: A list of all gateway role assignment objects for the specified gateway
    """
    all_role_assignments = []
    continuation_token = None
    
    while True:
        # Set up parameters for the API call
        params = {"top": CONFIG['PAGE_SIZE']}
        if continuation_token:
            params["continuationToken"] = continuation_token
        
        # Call the API with the gateway ID in the path
        endpoint = f"/gateways/{gateway_id}/roleAssignments"
        try:
            response = call_fabric_api(endpoint, access_token, params)
            
            # Extract role assignments from the response
            role_assignments = response.get("value", [])
            all_role_assignments.extend(role_assignments)
            
            logger.info(f"Retrieved {len(role_assignments)} role assignments for gateway {gateway_id}. Running total: {len(all_role_assignments)}")
            
            # Check if there are more pages
            continuation_token = response.get("continuationToken")
            if not continuation_token:
                break
                
        except requests.exceptions.RequestException as e:
            # Log the error but don't fail the entire process
            logger.error(f"Failed to get role assignments for gateway {gateway_id}: {str(e)}")
            
            # If we already have some role assignments, return those rather than an empty list
            if all_role_assignments:
                logger.warning(f"Returning partial results ({len(all_role_assignments)} role assignments) for gateway {gateway_id}")
                return all_role_assignments
            
            # Otherwise, return empty list
            logger.warning(f"Returning empty list for gateway {gateway_id} due to API error")
            return []
    
    logger.info(f"Finished retrieving gateway role assignments for gateway {gateway_id}. Total count: {len(all_role_assignments)}")
    return all_role_assignments
# ==================================


# CELL 9 - Create Enhanced DataFrame Function
# ==================================
def create_enhanced_gateway_role_assignments_dataframe(role_assignments_data: List[Dict], gateway_id: str) -> "DataFrame":
    """
    Convert the gateway role assignments data into an enhanced PySpark DataFrame for Delta Lake.
    
    This function:
    - Creates a structured DataFrame with the gateway role assignment data
    - Extracts and flattens nested fields (principal object)
    - Adds metadata columns for tracking
    - Adds the parent gateway ID for relationship tracking
    
    Args:
        role_assignments_data: List of gateway role assignment dictionaries from the API
        gateway_id: The ID of the parent gateway
    
    Returns:
        DataFrame: An enhanced PySpark DataFrame ready for Delta Lake
    """
    # Extract and flatten the fields we need from each role assignment
    simplified_role_assignments = []
    
    for role_assignment in role_assignments_data:
        # Handle the nested principal object
        principal = role_assignment.get("principal", {})
        
        simplified_role_assignment = {
            "id": role_assignment.get("id"),
            "principalId": principal.get("id"),
            "principalType": principal.get("type"),
            "role": role_assignment.get("role"),
            "gatewayId": gateway_id  # Add the parent gateway ID for relationship tracking
        }
        simplified_role_assignments.append(simplified_role_assignment)
    
    # Define the schema with the specific fields we need
    schema = StructType([
        StructField("id", StringType(), False),  # False = not nullable
        StructField("principalId", StringType(), True),
        StructField("principalType", StringType(), True),
        StructField("role", StringType(), True),
        StructField("gatewayId", StringType(), False),  # Parent gateway ID
        StructField("extraction_timestamp", TimestampType(), False)
    ])
    
    # Convert the data to a DataFrame
    if not simplified_role_assignments:
        logger.warning(f"No role assignments found for gateway {gateway_id}. Creating empty DataFrame.")
        # Create an empty DataFrame with the schema
        empty_rdd = spark.sparkContext.emptyRDD()
        enhanced_df = spark.createDataFrame(empty_rdd, schema)
        return enhanced_df
    
    # Convert to pandas DataFrame first
    pandas_df = pd.DataFrame(simplified_role_assignments)
    
    # Create the initial Spark DataFrame
    # We don't include extraction_timestamp here as we'll add it next
    required_columns = ["id", "principalId", "principalType", "role", "gatewayId"]
    
    # Ensure all columns exist in the pandas DataFrame
    for col_name in required_columns:
        if col_name not in pandas_df.columns:
            pandas_df[col_name] = None
    
    spark_df = spark.createDataFrame(pandas_df[required_columns])
    
    # Add metadata column for tracking when this data was extracted
    enhanced_df = spark_df.withColumn("extraction_timestamp", current_timestamp())
    
    return enhanced_df
# ==================================


# CELL 10 - Delta Lake Operations Functions
# ==================================
def ensure_delta_table_exists(table_name: str, df_schema):
    """
    Ensure the Delta table exists, creating it if necessary.
    
    Args:
        table_name: Name of the Delta table
        df_schema: Schema of the DataFrame
    """
    try:
        # Check if table exists
        spark.sql(f"DESCRIBE TABLE {table_name}")
        logger.info(f"Delta table '{table_name}' already exists")
    except Exception:
        # Table doesn't exist, create it
        logger.info(f"Creating Delta table '{table_name}'")
        
        # Create an empty DataFrame with the schema
        empty_df = spark.createDataFrame([], df_schema)
        
        # Create the Delta table (without partitioning)
        empty_df.write \
            .mode("overwrite") \
            .option("overwriteSchema", "true") \
            .saveAsTable(table_name)
        
        logger.info(f"Delta table '{table_name}' created successfully")


def merge_data_to_delta(source_df, table_name: str):
    """
    Merge new gateway role assignment data into the Delta table using MERGE operation.
    
    This function performs an upsert operation:
    - Updates existing records if role assignment ID and gateway ID match
    - Inserts new records if role assignment ID and gateway ID don't exist together
    
    Args:
        source_df: DataFrame with new data
        table_name: Name of the target Delta table
    """
    logger.info(f"Starting merge operation for {table_name}")
    
    # Create a temporary view for the merge operation
    source_df.createOrReplaceTempView("gateway_role_assignment_updates")
    
    # If the table is empty, just insert all records
    if spark.table(table_name).count() == 0:
        logger.info(f"Table {table_name} is empty. Inserting all records.")
        source_df.write.mode("append").saveAsTable(table_name)
        return
    
    # Perform the merge operation
    # Note: We match on both id and gatewayId to handle the case where the same role assignment ID 
    # could appear in multiple gateways
    merge_query = f"""
    MERGE INTO {table_name} AS target
    USING gateway_role_assignment_updates AS source
    ON target.id = source.id AND target.gatewayId = source.gatewayId
    WHEN MATCHED THEN
        UPDATE SET 
            target.principalId = source.principalId,
            target.principalType = source.principalType,
            target.role = source.role,
            target.extraction_timestamp = source.extraction_timestamp
    WHEN NOT MATCHED THEN
        INSERT *
    """
    
    spark.sql(merge_query)
    logger.info("Merge operation completed successfully")


def optimize_delta_table(table_name: str):
    """
    Optimize the Delta table for better query performance.
    
    This function:
    - Updates table statistics for query optimization
    - Uses a more compatible method for Microsoft Fabric
    
    Args:
        table_name: Name of the Delta table to optimize
    """
    logger.info(f"Optimizing Delta table '{table_name}'")
    
    try:
        # Update table statistics for better query planning
        spark.sql(f"ANALYZE TABLE {table_name} COMPUTE STATISTICS")
        logger.info("Table statistics updated successfully")
        
        # Note: In Microsoft Fabric, Delta table optimization may be handled automatically
        # or through different commands than traditional Delta Lake
        # The standard OPTIMIZE and ZORDER commands might not be available
        
        # Alternative approach: Use Delta table properties to hint at optimization
        delta_table = DeltaTable.forName(spark, table_name)
        delta_table_detail = spark.sql(f"DESCRIBE DETAIL {table_name}")
        
        logger.info("Delta table optimization completed via statistics computation")
        logger.info("Note: Microsoft Fabric may automatically optimize Delta tables")
    except Exception as e:
        logger.warning(f"Table optimization step encountered an issue: {str(e)}")
        logger.info("Continuing with process - optimization is not critical for functionality")
# ==================================


# CELL 11 - Main Execution Function
# ==================================
def main():
    """
    Main execution function that orchestrates the entire process.
    
    This function:
    1. Gets the authentication token
    2. Retrieves gateway IDs from the existing Delta table
    3. For each gateway ID, retrieves its role assignments from the API
    4. Creates an enhanced PySpark DataFrame for each gateway's role assignments
    5. Merges all role assignment data into a single DataFrame
    6. Loads data into a Delta Lake table
    7. Optimizes the table for analytics
    """
    try:
        logger.info("Starting Fabric Gateway Role Assignments to Delta Lake process")
        
        # Step 1: Get authentication token
        logger.info("Getting access token...")
        access_token = get_access_token()
        logger.info("Successfully obtained access token")
        
        # Step 2: Retrieve gateway IDs from the Delta table
        logger.info("Retrieving gateway IDs from Delta table...")
        gateway_ids = get_gateway_ids_from_delta()
        
        if not gateway_ids:
            logger.warning("No gateway IDs found in the source table. Please run the gateway extraction first.")
            return None
        
        logger.info(f"Retrieved {len(gateway_ids)} gateway IDs")
        
        # Step 3: For each gateway ID, retrieve its role assignments and create DataFrames
        all_role_assignments_dfs = []
        total_gateways = len(gateway_ids)
        
        for idx, gateway_id in enumerate(gateway_ids):
            logger.info(f"Processing gateway ID: {gateway_id} ({idx+1}/{total_gateways})")
            
            # Get role assignments for this gateway
            role_assignments_data = get_gateway_role_assignments(gateway_id, access_token)
            
            # Create DataFrame for this gateway's role assignments
            role_assignments_df = create_enhanced_gateway_role_assignments_dataframe(role_assignments_data, gateway_id)
            
            # Add to our list of DataFrames
            if role_assignments_df.count() > 0:
                all_role_assignments_dfs.append(role_assignments_df)
                logger.info(f"Added {role_assignments_df.count()} role assignments for gateway {gateway_id}")
            
            # Don't add delay after the last gateway
            if idx < total_gateways - 1:
                # Add a small random delay between gateway processing to avoid hitting rate limits
                delay = random.uniform(0.5, 2.0)
                logger.info(f"Pausing for {delay:.2f} seconds before processing next gateway...")
                time.sleep(delay)
        
        # Step 4: Merge all role assignment DataFrames into a single DataFrame
        if not all_role_assignments_dfs:
            logger.warning("No gateway role assignments found across all gateways")
            # Create empty dataframe with schema for consistent table structure
            empty_schema = StructType([
                StructField("id", StringType(), False),
                StructField("principalId", StringType(), True),
                StructField("principalType", StringType(), True),
                StructField("role", StringType(), True),
                StructField("gatewayId", StringType(), False),
                StructField("extraction_timestamp", TimestampType(), False)
            ])
            combined_role_assignments_df = spark.createDataFrame([], empty_schema)
        else:
            # Union all DataFrames
            combined_role_assignments_df = all_role_assignments_dfs[0]
            for df in all_role_assignments_dfs[1:]:
                combined_role_assignments_df = combined_role_assignments_df.unionByName(df)
        
        # Show sample data
        logger.info("Sample of enhanced gateway role assignments data:")
        combined_role_assignments_df.show(5, truncate=False)
        
        # Step 5: Prepare Delta table
        table_name = CONFIG["ROLE_ASSIGNMENTS_TABLE_NAME"]
        ensure_delta_table_exists(table_name, combined_role_assignments_df.schema)
        
        # Step 6: Merge data into Delta table (if we have data)
        if all_role_assignments_dfs:
            merge_data_to_delta(combined_role_assignments_df, table_name)
            
            # Step 7: Optimize the Delta table
            optimize_delta_table(table_name)
        
        # Step 8: Display final statistics
        logger.info("Loading completed successfully!")
        
        # Show table information
        spark.sql(f"DESCRIBE DETAIL {table_name}").show(truncate=False)
        
        # Show row count
        row_count = spark.table(table_name).count()
        logger.info(f"Total rows in {table_name}: {row_count}")
        
        # Show summary statistics for role assignments
        summary_stats = spark.sql(f"""
            SELECT 
                COUNT(DISTINCT id) as unique_role_assignments,
                COUNT(DISTINCT gatewayId) as unique_gateways,
                COUNT(DISTINCT principalId) as unique_principals,
                COUNT(DISTINCT role) as unique_roles,
                COUNT(DISTINCT principalType) as principal_types,
                MAX(extraction_timestamp) as last_updated
            FROM {table_name}
        """)
        
        logger.info("Summary statistics:")
        summary_stats.show(truncate=False)
        
        # Show role distribution
        role_distribution = spark.sql(f"""
            SELECT 
                role,
                COUNT(*) as assignment_count,
                COUNT(DISTINCT principalId) as unique_principals
            FROM {table_name}
            GROUP BY role
            ORDER BY assignment_count DESC
        """)
        
        logger.info("Role distribution:")
        role_distribution.show(truncate=False)
        
        return combined_role_assignments_df
        
    except Exception as e:
        logger.error(f"Error in main execution: {str(e)}")
        raise
# ==================================


# CELL 12 - Execute Main Function
# ==================================
# Execute the main function
if __name__ == "__main__":
    role_assignments_df = main()
# ==================================


# CELL 13 - Maintenance and Best Practices
# ==================================
"""
MAINTENANCE AND BEST PRACTICES:

1. SCHEDULED UPDATES:
   - Schedule this notebook to run after the gateway extraction notebook
   - Configure dependencies in Fabric pipelines to ensure proper sequence
   - Consider daily/weekly runs to track role assignment changes over time

2. DELTA LAKE MAINTENANCE:
   - Run VACUUM periodically to clean old files (if supported in your Fabric environment):
     spark.sql(f"VACUUM {CONFIG['ROLE_ASSIGNMENTS_TABLE_NAME']} RETAIN 168 HOURS")
   - Monitor history retention and storage usage
   - Review table properties and statistics

3. MONITORING AND ALERTING:
   - Set up alerts for role assignment changes
   - Monitor for unusual permission patterns
   - Track role distribution for security audit purposes

4. POWER BI INTEGRATION:
   - Create dashboards showing gateway permission structures
   - Visualize principal to gateway relationships
   - Create security reports for compliance purposes

5. DATA SECURITY:
   - Implement appropriate access controls on the Delta table
   - Consider sensitive information in role assignment data
   - Document security implications of role assignments

6. PERFORMANCE OPTIMIZATION:
   - Consider partitioning strategies if data grows significantly
   - Create joined views with the gateways table for common analytics
   - Use caching for frequently accessed data

Example analytics query - Find principals with elevated permissions across multiple gateways:
```sql
SELECT 
  principalId,
  principalType,
  role,
  COUNT(DISTINCT gatewayId) as gateway_count,
  COLLECT_LIST(gatewayId) as gateway_list
FROM fabric_onprem_gateway_role_assignments
WHERE role IN ('Admin', 'GatewayAdmin')
GROUP BY principalId, principalType, role
HAVING COUNT(DISTINCT gatewayId) > 1
ORDER BY gateway_count DESC
"""

StatementMeta(, 11ec3ce4-fd3e-4b9d-84ac-f70fa01ab54a, 4, Finished, Available, Finished)

2025-07-16 16:46:32,595 - INFO - Starting Fabric Gateway Role Assignments to Delta Lake process
2025-07-16 16:46:32,596 - INFO - Getting access token...
2025-07-16 16:46:33,366 - INFO - Successfully obtained access token
2025-07-16 16:46:33,367 - INFO - Retrieving gateway IDs from Delta table...
2025-07-16 16:46:49,467 - INFO - Retrieved 14 gateway IDs from Delta table
2025-07-16 16:46:49,469 - INFO - Retrieved 14 gateway IDs
2025-07-16 16:46:49,469 - INFO - Processing gateway ID: 2d42f13d-1ae8-4d16-9726-82385a865ddf (1/14)
2025-07-16 16:46:49,470 - INFO - Making API call to: https://api.fabric.microsoft.com/v1/gateways/2d42f13d-1ae8-4d16-9726-82385a865ddf/roleAssignments (Attempt 1)
2025-07-16 16:46:49,868 - INFO - Retrieved 3 role assignments for gateway 2d42f13d-1ae8-4d16-9726-82385a865ddf. Running total: 3
2025-07-16 16:46:49,869 - INFO - Finished retrieving gateway role assignments for gateway 2d42f13d-1ae8-4d16-9726-82385a865ddf. Total count: 3
2025-07-16 16:46:51,489 - INFO - Ad

+------------------------------+----------------+-----------------+
|role                          |assignment_count|unique_principals|
+------------------------------+----------------+-----------------+
|Admin                         |91              |27               |
|ConnectionCreator             |23              |15               |
|ConnectionCreatorWithResharing|1               |1                |
+------------------------------+----------------+-----------------+



'\nMAINTENANCE AND BEST PRACTICES:\n\n1. SCHEDULED UPDATES:\n   - Schedule this notebook to run after the gateway extraction notebook\n   - Configure dependencies in Fabric pipelines to ensure proper sequence\n   - Consider daily/weekly runs to track role assignment changes over time\n\n2. DELTA LAKE MAINTENANCE:\n   - Run VACUUM periodically to clean old files (if supported in your Fabric environment):\n     spark.sql(f"VACUUM {CONFIG[\'ROLE_ASSIGNMENTS_TABLE_NAME\']} RETAIN 168 HOURS")\n   - Monitor history retention and storage usage\n   - Review table properties and statistics\n\n3. MONITORING AND ALERTING:\n   - Set up alerts for role assignment changes\n   - Monitor for unusual permission patterns\n   - Track role distribution for security audit purposes\n\n4. POWER BI INTEGRATION:\n   - Create dashboards showing gateway permission structures\n   - Visualize principal to gateway relationships\n   - Create security reports for compliance purposes\n\n5. DATA SECURITY:\n   - Impleme

In [3]:
from pyspark.sql import SparkSession

# create Spark session
spark = SparkSession.builder.appName("Refresh SQL Endpoint Metadata").getOrCreate()

# refresh the specific table
spark.sql("REFRESH TABLE fabric_onprem_gateway_role_assignments")
print("Metadata refresh triggered successfully.")


StatementMeta(, 11ec3ce4-fd3e-4b9d-84ac-f70fa01ab54a, 5, Finished, Available, Finished)

Metadata refresh triggered successfully.
