In [1]:
# Simple PySpark Script to Get Workspace Access Details from Microsoft Fabric API
# ------------------------------------------------------------------------------
# This script retrieves the list of users, groups, and service principals that 
# have access to a specific workspace in Microsoft Fabric.
# 
# INSTRUCTIONS FOR USE:
# 1. Set the workspace_id variable below to your target workspace ID
# 2. Run each cell in sequence in your Fabric Notebook
# 3. The script will authenticate automatically using Fabric's built-in authentication

# =============================================================================
# CELL 1: Import Libraries and Initialize Spark Session
# =============================================================================

# Import required libraries
import requests
import json
from pyspark.sql.types import StructType, StructField, StringType, ArrayType
from pyspark.sql import SparkSession
from notebookutils.mssparkutils.credentials import getToken

# Initialize Spark Session - this is already available in Fabric Notebooks
spark = SparkSession.builder.appName("FabricWorkspaceAccessDetails").getOrCreate()

print("Libraries imported and Spark session initialized successfully!")

# =============================================================================
# CELL 2: Set Workspace ID Parameter
# =============================================================================

# PARAMETER: Set your workspace ID here
# Replace this with the actual workspace ID you want to analyze
# You can find workspace IDs by running the workspace list API or from the Fabric portal URL
workspace_id = "7a21dc44-c8b8-446e-9e80-59458a88ece8"  # Example: "f089354e-8366-4e18-aea3-4cb4a3a50b48"

# Basic validation to ensure workspace_id is set
if workspace_id == "YOUR_WORKSPACE_ID_HERE" or not workspace_id:
    print("⚠️  WARNING: Please set the workspace_id variable to your actual workspace ID before running the next cell.")
    print("   Example: workspace_id = 'f089354e-8366-4e18-aea3-4cb4a3a50b48'")
else:
    print(f"✅ Workspace ID set to: {workspace_id}")

# =============================================================================
# CELL 3: Define Function to Get Workspace Access Details
# =============================================================================

def get_workspace_access_details(workspace_id):
    """
    Function to get workspace access details using the Fabric Admin API.
    This returns a list of users, groups, and service principals that have access 
    to the specified workspace along with their permission levels.
    
    Args:
        workspace_id (str): The workspace ID to query for access details
    
    Returns:
        DataFrame: A Spark DataFrame containing the workspace access information
    """
    
    # Get access token using built-in Fabric authentication
    try:
        # This gets a token for the Microsoft Fabric resource
        print("Getting access token using built-in authentication...")
        access_token = getToken("https://api.fabric.microsoft.com")
        
        # Set up API endpoint URL for workspace access details
        # This API endpoint requires the workspace ID as a path parameter
        api_url = f"https://api.fabric.microsoft.com/v1/admin/workspaces/{workspace_id}/users"
        
        # Set up headers with authentication token
        headers = {
            "Authorization": f"Bearer {access_token}",
            "Content-Type": "application/json"
        }
        
        # Make the API request
        print(f"Sending API request to get access details for workspace: {workspace_id}")
        response = requests.get(api_url, headers=headers)
        
        # Check if request was successful
        if response.status_code == 200:
            print("API request successful!")
            
            # Parse the JSON response
            data = response.json()
            
            # Print the raw JSON data for inspection (pretty-printed)
            print("\nRaw API Response (first 3000 characters):")
            print(json.dumps(data, indent=2)[:3000] + "...\n")
            
            # Convert the JSON response to a Spark DataFrame
            # The response contains an "accessDetails" array with user/group information
            access_details = data.get("accessDetails", [])
            
            if not access_details:
                print("No access details found for this workspace.")
                return spark.createDataFrame([], StructType([]))
            
            # Create a Spark DataFrame from the access details list
            # This will automatically infer the schema from the nested JSON structure
            access_details_df = spark.createDataFrame(access_details)
            
            print(f"Successfully retrieved access details for {access_details_df.count()} principals.")
            return access_details_df
            
        else:
            print(f"Error: API request failed with status code {response.status_code}")
            print(f"Response content: {response.text}")
            
            # Common error explanations for troubleshooting
            if response.status_code == 401:
                print("❌ Authentication failed. Check if you have proper permissions.")
            elif response.status_code == 403:
                print("❌ Access forbidden. You need Fabric Administrator permissions to use this API.")
            elif response.status_code == 404:
                print("❌ Workspace not found. Check if the workspace ID is correct.")
            elif response.status_code == 429:
                print("❌ Rate limit exceeded. Wait before making more requests (max 200 per hour).")
            
            return None
            
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None

print("Function defined successfully!")

# =============================================================================
# CELL 4: Execute API Call and Display Results
# =============================================================================

# Run the function to get workspace access details
print("Retrieving workspace access details from Microsoft Fabric...")
print("=" * 60)

access_details_df = get_workspace_access_details(workspace_id)

# Display the results if successful
if access_details_df is not None:
    print("\n" + "=" * 60)
    print("WORKSPACE ACCESS DETAILS PREVIEW:")
    print("=" * 60)
    
    # Show all rows without truncating (since access lists are usually not too large)
    access_details_df.show(100, truncate=False)
    
    print("\n" + "=" * 60)
    print("DATAFRAME SCHEMA:")
    print("=" * 60)
    access_details_df.printSchema()
    
    # Display count summary
    total_count = access_details_df.count()
    print(f"\n📊 SUMMARY: Found {total_count} principals with access to this workspace")
    
else:
    print("❌ Failed to retrieve workspace access details. Check the error messages above.")

# =============================================================================
# CELL 5: Data Analysis Examples
# =============================================================================

# Only run analysis if we have data
if access_details_df is not None and access_details_df.count() > 0:
    
    print("\n" + "=" * 60)
    print("SUGGESTED DATA ANALYSIS EXAMPLES:")
    print("=" * 60)
    
    print("\n1. COUNT USERS BY PRINCIPAL TYPE:")
    print("   access_details_df.groupBy('principal.type').count().show()")
    
    print("\n2. COUNT USERS BY WORKSPACE ROLE:")
    print("   access_details_df.groupBy('workspaceAccessDetails.workspaceRole').count().show()")
    
    print("\n3. FILTER FOR ADMIN USERS ONLY:")
    print("   access_details_df.filter(access_details_df['workspaceAccessDetails.workspaceRole'] == 'Admin').show()")
    
    print("\n4. SHOW ONLY USER PRINCIPALS (EXCLUDE GROUPS/SERVICE PRINCIPALS):")
    print("   access_details_df.filter(access_details_df['principal.type'] == 'User').show()")
    
    print("\n5. GET USER EMAIL ADDRESSES:")
    print("   access_details_df.select('principal.displayName', 'principal.userDetails.userPrincipalName').show()")
    
    print("\n" + "=" * 60)
    print("QUICK ANALYSIS - RUNNING SOME EXAMPLES:")
    print("=" * 60)
    
    # Example 1: Count by principal type
    print("\n🔍 Principal Types in this workspace:")
    access_details_df.groupBy('principal.type').count().show()
    
    # Example 2: Count by workspace role
    print("\n🔍 Workspace Roles distribution:")
    access_details_df.groupBy('workspaceAccessDetails.workspaceRole').count().show()
    
    # Example 3: Show admin users
    admin_users = access_details_df.filter(access_details_df['workspaceAccessDetails.workspaceRole'] == 'Admin')
    admin_count = admin_users.count()
    
    if admin_count > 0:
        print(f"\n🔍 Workspace Administrators ({admin_count} found):")
        admin_users.select('principal.displayName', 'principal.type').show()
    else:
        print("\n🔍 No workspace administrators found.")

print("\n" + "=" * 60)
print("SCRIPT EXECUTION COMPLETED!")
print("=" * 60)
print("💡 TIP: You can now use 'access_details_df' variable for further analysis in subsequent cells.")


StatementMeta(, 076c9524-b31e-4d95-9976-a391a14fee77, 3, Finished, Available, Finished)

Libraries imported and Spark session initialized successfully!
✅ Workspace ID set to: 7a21dc44-c8b8-446e-9e80-59458a88ece8
Function defined successfully!
Retrieving workspace access details from Microsoft Fabric...
Getting access token using built-in authentication...
Sending API request to get access details for workspace: 7a21dc44-c8b8-446e-9e80-59458a88ece8
API request successful!

Raw API Response (first 3000 characters):
{
  "accessDetails": [
    {
      "principal": {
        "id": "123b890b-86fe-4bd3-91b0-54ae0e368745",
        "displayName": "Gatilao,Rey Allen T Gatilao",
        "type": "User",
        "userDetails": {
          "userPrincipalName": "RTGatilao-mda@mdanderson.org"
        }
      },
      "workspaceAccessDetails": {
        "type": "Workspace",
        "workspaceRole": "Admin"
      }
    },
    {
      "principal": {
        "id": "f12096e4-4ee8-4adc-8f64-1b25cc463fc4",
        "displayName": "Brent Hand",
        "type": "User",
        "userDetails": {
    