In [1]:
# Simple PySpark Script to Get Workspaces from Microsoft Fabric API
# ------------------------------------------------------------------------------

# Import required libraries
import requests
import json
from pyspark.sql.types import StructType, StructField, StringType, ArrayType
from pyspark.sql import SparkSession
from notebookutils.mssparkutils.credentials import getToken

# Initialize Spark Session - this is already available in Fabric Notebooks
spark = SparkSession.builder.appName("FabricWorkspacesList").getOrCreate()

def get_fabric_workspaces():
    """
    Function to get list of workspaces using the Fabric API with built-in authentication
    
    Returns:
    DataFrame: A Spark DataFrame containing the workspaces information
    """
    # Get access token using built-in Fabric authentication
    try:
        # This gets a token for the Microsoft Fabric resource
        print("Getting access token using built-in authentication...")
        access_token = getToken("https://api.fabric.microsoft.com")
        
        # Set up API endpoint URL - no need for tenant ID as it's handled by the authentication
        api_url = "https://api.fabric.microsoft.com/v1/admin/workspaces"
        
        # Set up headers with authentication token
        headers = {
            "Authorization": f"Bearer {access_token}",
            "Content-Type": "application/json"
        }
        
        # Make the API request
        print("Sending API request to get workspaces...")
        response = requests.get(api_url, headers=headers)
        
        # Check if request was successful
        if response.status_code == 200:
            print("API request successful!")
            
            # Parse the JSON response
            data = response.json()
            
            # Print the raw JSON data for inspection (pretty-printed)
            print("\nRaw API Response (first 5000 characters):")
            print(json.dumps(data, indent=2)[:5000] + "...\n")
            
            # Convert the JSON response to a Spark DataFrame
            # First, create a Python list from the JSON data
            workspaces = data.get("value", [])
            
            if not workspaces:
                print("No workspaces found.")
                return spark.createDataFrame([], StructType([]))
            
            # Create a Spark DataFrame from the Python list
            workspaces_df = spark.createDataFrame(workspaces)
            
            print(f"Successfully retrieved {workspaces_df.count()} workspaces.")
            return workspaces_df
            
        else:
            print(f"Error: API request failed with status code {response.status_code}")
            print(f"Response content: {response.text}")
            return None
            
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None

# Run the function to get workspaces
print("Retrieving workspaces from Microsoft Fabric...")
workspaces_df = get_fabric_workspaces()

# Display the results
if workspaces_df is not None:
    print("\nWorkspaces Preview:")
    workspaces_df.show(100, truncate=False)  # Show up to 100 rows, don't truncate columns
    
    print("\nDataFrame Schema:")
    workspaces_df.printSchema()  # Display the schema of the DataFrame
    
    # Optional: Suggest further analysis based on schema
    print("\nSuggested analysis examples:")
    print("1. Count workspaces by type:")
    print("   workspaces_df.groupBy('type').count().show()")
    print("2. Find workspaces by name (example):")
    print("   workspaces_df.filter(workspaces_df.displayName.contains('Finance')).show()")


StatementMeta(, e349d9d9-0ecb-4dca-aa45-14a7d03958b2, 3, Finished, Available, Finished)

Retrieving workspaces from Microsoft Fabric...
Getting access token using built-in authentication...
Sending API request to get workspaces...
API request successful!

Raw API Response (first 5000 characters):
{
  "workspaces": [
    {
      "id": "47298625-cc8c-4967-82bf-2c97da0254af",
      "name": "IAI - BI Delivery Team",
      "state": "Active",
      "type": "Workspace",
      "capacityId": "665D4010-E0FD-4821-B5B0-6C2760C1D498"
    },
    {
      "id": "2022d60a-0ea0-460a-b794-8ac87ef3a916",
      "name": "IAI Management",
      "state": "Active",
      "type": "Workspace",
      "capacityId": "6BA0A957-48B9-4AD6-B422-6D779CC7DFF3"
    },
    {
      "id": "49ec4821-c19f-4a69-ab00-53ed07fdc830",
      "name": "EDI Analytics",
      "state": "Active",
      "type": "Workspace",
      "capacityId": "646712A9-B160-4E8C-922D-A32ACB6EA7AA"
    },
    {
      "id": "aad173a4-46ac-4b68-8718-e1ebc008f238",
      "name": "Github 8/26/2024 1:59:22 PM",
      "state": "Active",
      "type"