In [9]:
# Welcome to your new notebook
# Type here in the cell editor to add code!
# CELL 1 - Title and Introduction
# ==================================
# Microsoft Fabric Capacities List - PySpark Notebook
# This notebook retrieves a list of all Microsoft Fabric capacities and displays selected properties
# ==================================


# CELL 2 - Import Libraries
# ==================================
# Import required libraries
import requests
import json
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, explode, when
from pyspark.sql.types import StructType, StructField, StringType, ArrayType
import logging
from typing import Dict, List, Optional
# ==================================


# CELL 3 - Configure Logging and Initialize Spark
# ==================================
# Configure logging
# This helps us track what's happening in our code and debug issues
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Initialize Spark Session (this is already available in Fabric notebooks)
# The Spark session is your entry point to using PySpark
spark = SparkSession.builder.appName("FabricCapacitiesList").getOrCreate()
# ==================================


# CELL 4 - Configuration Parameters
# ==================================
# Configuration Parameters
# These are the settings we'll use throughout the notebook
# You can modify these as needed for your environment
CONFIG = {
    "API_BASE_URL": "https://api.fabric.microsoft.com/v1",
    "MAX_RETRIES": 3,
    "PAGE_SIZE": 100,  # Number of items per page for API calls
    "TIMEOUT": 30  # API request timeout in seconds
}
# ==================================


# CELL 5 - Authentication Function
# ==================================
def get_access_token():
    """
    Get Azure AD access token for Fabric API authentication.
    
    In a Fabric notebook, the token is automatically available through mssparkutils.
    This function retrieves the token that's needed to authenticate with the Fabric REST API.
    
    Returns:
        str: The access token
    
    Note:
        mssparkutils is a utility library provided by Microsoft Fabric
        that handles authentication automatically.
    """
    try:
        # In Fabric notebooks, we can get the token using mssparkutils
        # This token includes the necessary permissions for Fabric API
        from notebookutils import mssparkutils
        token_response = mssparkutils.credentials.getToken("https://api.fabric.microsoft.com")
        return token_response
    except Exception as e:
        logger.error(f"Failed to get access token: {str(e)}")
        raise
# ==================================


# CELL 6 - API Call Function
# ==================================
def call_fabric_api(endpoint: str, access_token: str, params: Optional[Dict] = None) -> Dict:
    """
    Make a REST API call to Microsoft Fabric.
    
    This function handles the HTTP request to the Fabric API, including:
    - Setting up authentication headers
    - Managing retries if the request fails
    - Error handling
    
    Args:
        endpoint: The API endpoint path (e.g., "/capacities")
        access_token: The Azure AD access token
        params: Optional query parameters for the API call
    
    Returns:
        dict: The JSON response from the API
    
    Raises:
        requests.exceptions.RequestException: If the API call fails after all retries
    """
    url = f"{CONFIG['API_BASE_URL']}{endpoint}"
    headers = {
        "Authorization": f"Bearer {access_token}",
        "Content-Type": "application/json"
    }
    
    # Retry logic - sometimes API calls can fail temporarily
    for attempt in range(CONFIG['MAX_RETRIES']):
        try:
            logger.info(f"Making API call to: {url} (Attempt {attempt + 1})")
            
            response = requests.get(
                url,
                headers=headers,
                params=params,
                timeout=CONFIG['TIMEOUT']
            )
            
            # Check if the request was successful
            response.raise_for_status()
            
            return response.json()
            
        except requests.exceptions.RequestException as e:
            logger.warning(f"API call failed (Attempt {attempt + 1}): {str(e)}")
            
            if attempt == CONFIG['MAX_RETRIES'] - 1:
                logger.error(f"All retry attempts failed for endpoint: {endpoint}")
                raise
            
            # Wait before retrying (exponential backoff)
            import time
            time.sleep(2 ** attempt)
# ==================================


# CELL 7 - Get All Capacities Function
# ==================================
def get_all_capacities(access_token: str) -> List[Dict]:
    """
    Retrieve all Fabric capacities, handling pagination if necessary.
    
    The Fabric API may return results in pages if there are many capacities.
    This function handles the pagination automatically to get all capacities.
    
    Args:
        access_token: The Azure AD access token
    
    Returns:
        list: A list of all capacity objects
    """
    all_capacities = []
    continuation_token = None
    
    while True:
        # Set up parameters for the API call
        params = {"top": CONFIG['PAGE_SIZE']}
        if continuation_token:
            params["continuationToken"] = continuation_token
        
        # Call the API
        response = call_fabric_api("/capacities", access_token, params)
        
        # Extract capacities from the response
        capacities = response.get("value", [])
        all_capacities.extend(capacities)
        
        logger.info(f"Retrieved {len(capacities)} capacities. Total so far: {len(all_capacities)}")
        
        # Check if there are more pages
        continuation_token = response.get("continuationToken")
        if not continuation_token:
            break
    
    logger.info(f"Finished retrieving capacities. Total count: {len(all_capacities)}")
    return all_capacities
# ==================================


# CELL 8 - Create DataFrame Function
# ==================================
def create_capacities_dataframe(capacities_data: List[Dict]) -> "DataFrame":
    """
    Convert the capacities data into a PySpark DataFrame with the specified columns.
    
    This function takes the raw JSON data from the API and creates a structured
    PySpark DataFrame with only the columns we want to display.
    
    Args:
        capacities_data: List of capacity dictionaries from the API
    
    Returns:
        DataFrame: A PySpark DataFrame with the specified columns
    """
    # Define the schema for our DataFrame
    # This tells PySpark what columns we want and their data types
    schema = StructType([
        StructField("id", StringType(), True),
        StructField("displayName", StringType(), True),  # True means the field can be null
        StructField("sku", StringType(), True),
        StructField("region", StringType(), True),
        StructField("state", StringType(), True)
    ])
    
    # Extract only the fields we need from each capacity
    filtered_data = []
    for capacity in capacities_data:
        filtered_capacity = {
            "id": capacity.get("id"),
            "displayName": capacity.get("displayName"),
            "sku": capacity.get("sku"),
            "region": capacity.get("region"),
            "state": capacity.get("state")
        }
        filtered_data.append(filtered_capacity)
    
    # Create a PySpark DataFrame
    # First, we create a Pandas DataFrame, then convert it to PySpark
    # This is often easier for small datasets
    pandas_df = pd.DataFrame(filtered_data)
    spark_df = spark.createDataFrame(pandas_df, schema=schema)
    
    return spark_df
# ==================================


# CELL 9 - Main Execution Function
# ==================================
def main():
    """
    Main execution function that orchestrates the entire process.
    
    This function:
    1. Gets the authentication token
    2. Retrieves all capacities from the API
    3. Creates a PySpark DataFrame
    4. Displays the results
    """
    try:
        logger.info("Starting Fabric Capacities retrieval process")
        
        # Step 1: Get authentication token
        logger.info("Getting access token...")
        access_token = get_access_token()
        logger.info("Successfully obtained access token")
        
        # Step 2: Retrieve all capacities
        logger.info("Retrieving capacities from Fabric API...")
        capacities_data = get_all_capacities(access_token)
        
        if not capacities_data:
            logger.warning("No capacities found")
            return
        
        # Step 3: Create PySpark DataFrame
        logger.info("Creating PySpark DataFrame...")
        capacities_df = create_capacities_dataframe(capacities_data)
        
        # Step 4: Display results
        logger.info("Displaying capacities...")
        
        # Show the schema (structure) of our DataFrame
        print("\nDataFrame Schema:")
        capacities_df.printSchema()
        
        # Show the data
        print("\nFabric Capacities:")
        capacities_df.show(truncate=False)  # truncate=False shows full values
        
        # Show row count
        row_count = capacities_df.count()
        print(f"\nTotal number of capacities: {row_count}")
        
        '''
        # Optional: Save to a temporary table for SQL queries
        capacities_df.createOrReplaceTempView("fabric_capacities")
        logger.info("Created temporary view 'fabric_capacities' for SQL queries")
        '''
        '''
        # Example SQL query
        print("\nExample: Capacities by Region (using SQL):")
        spark.sql("""
            SELECT region, COUNT(*) as capacity_count 
            FROM fabric_capacities 
            GROUP BY region 
            ORDER BY capacity_count DESC
        """).show()
        '''
        # Return the DataFrame for further use
        return capacities_df
        
    except Exception as e:
        logger.error(f"Error in main execution: {str(e)}")
        raise
# ==================================


# CELL 10 - Execute Main Function
# ==================================
# Execute the main function
if __name__ == "__main__":
    capacities_df = main()
# ==================================


# CELL 11 - Analysis Helper Functions
# ==================================
# Additional helper functions for data analysis

def analyze_capacities(capacities_df):
    """
    Perform basic analysis on the capacities DataFrame.
    
    This function demonstrates how to use PySpark DataFrame operations
    to analyze the data we've retrieved.
    """
    print("\n=== Capacity Analysis ===")
    
    # Count by SKU
    print("\nCapacities by SKU:")
    capacities_df.groupBy("sku").count().orderBy("count", ascending=False).show()
    
    # Count by state
    print("\nCapacities by State:")
    capacities_df.groupBy("state").count().orderBy("count", ascending=False).show()
    
    # Count by region
    print("\nCapacities by Region:")
    capacities_df.groupBy("region").count().orderBy("count", ascending=False).show()
    
    # Find capacities with specific states
    print("\nActive Capacities:")
    active_capacities = capacities_df.filter(col("state") == "Active")
    active_capacities.select("displayName", "sku", "region").show(truncate=False)
# ==================================


# CELL 12 - Run Analysis (Optional)
# ==================================
# Usage example:
# To run the analysis after getting the data, uncomment the following line:
    analyze_capacities(capacities_df)
# ==================================


# CELL 13 - Usage Notes for Beginners
# ==================================
"""
USAGE NOTES FOR BEGINNERS:

1. AUTHENTICATION:
   - The notebook automatically handles authentication using your Fabric credentials
   - No need to manually provide tokens or credentials

2. RUNNING THE CODE:
   - Simply run the cells in order
   - The main() function will execute the entire process

3. UNDERSTANDING THE OUTPUT:
   - Schema: Shows the structure of your data (column names and types)
   - Data: Shows the actual capacity information
   - Analysis: Shows summary statistics about your capacities

4. CUSTOMIZATION:
   - To filter results: Use DataFrame.filter() method
   - To sort results: Use DataFrame.orderBy() method
   - To select specific columns: Use DataFrame.select() method

5. ERROR HANDLING:
   - The code includes comprehensive error handling
   - Check the logs for detailed error messages
   - Common issues: network connectivity, permissions

6. SAVING RESULTS:
   - To save to a table: df.write.saveAsTable("table_name")
   - To save to CSV: df.write.csv("path/to/file.csv")
   - To save to Parquet: df.write.parquet("path/to/file.parquet")

Example of filtering and saving:
```python
# Filter only active capacities in a specific region
filtered_df = capacities_df.filter(
    (col("state") == "Active") & 
    (col("region") == "West US")
)

# Save to a table
filtered_df.write.mode("overwrite").saveAsTable("active_west_us_capacities")
```
"""
# ==================================

StatementMeta(, 70bcc750-b585-4132-bc60-f21285f06451, 11, Finished, Available, Finished)

2025-05-14 18:21:33,039 - INFO - Starting Fabric Capacities retrieval process
2025-05-14 18:21:33,039 - INFO - Getting access token...
2025-05-14 18:21:33,042 - INFO - Successfully obtained access token
2025-05-14 18:21:33,042 - INFO - Retrieving capacities from Fabric API...
2025-05-14 18:21:33,043 - INFO - Making API call to: https://api.fabric.microsoft.com/v1/capacities (Attempt 1)



DataFrame Schema:
root
 |-- id: string (nullable = true)
 |-- displayName: string (nullable = true)
 |-- sku: string (nullable = true)
 |-- region: string (nullable = true)
 |-- state: string (nullable = true)


Fabric Capacities:
+------------------------------------+---------------------------------+---+----------------+--------+
|id                                  |displayName                      |sku|region          |state   |
+------------------------------------+---------------------------------+---+----------------+--------+
|ab3b62c5-cff1-4341-a584-4ef86a529e8a|f64nonprodsouthcentral001        |F64|South Central US|Inactive|
|250aef2d-b24b-43a8-8564-8fefc5152522|f8nonprodsouthcentral001         |F8 |South Central US|Active  |
|d94bc350-4bb9-4f24-9d89-fd633996eb28|f64x002                          |F64|South Central US|Active  |
|56125c55-2f69-4fa3-bac0-e9407fc17374|f64x001                          |F64|South Central US|Active  |
|8e0020ba-3162-4e4d-9d3f-83b6ce695c5d|f32x001  

'\nUSAGE NOTES FOR BEGINNERS:\n\n1. AUTHENTICATION:\n   - The notebook automatically handles authentication using your Fabric credentials\n   - No need to manually provide tokens or credentials\n\n2. RUNNING THE CODE:\n   - Simply run the cells in order\n   - The main() function will execute the entire process\n\n3. UNDERSTANDING THE OUTPUT:\n   - Schema: Shows the structure of your data (column names and types)\n   - Data: Shows the actual capacity information\n   - Analysis: Shows summary statistics about your capacities\n\n4. CUSTOMIZATION:\n   - To filter results: Use DataFrame.filter() method\n   - To sort results: Use DataFrame.orderBy() method\n   - To select specific columns: Use DataFrame.select() method\n\n5. ERROR HANDLING:\n   - The code includes comprehensive error handling\n   - Check the logs for detailed error messages\n   - Common issues: network connectivity, permissions\n\n6. SAVING RESULTS:\n   - To save to a table: df.write.saveAsTable("table_name")\n   - To save t

In [2]:
# CELL 1 - Title and Introduction
# ==================================
# Microsoft Fabric Capacities List - PySpark Notebook with Service Principal Authentication
# This notebook retrieves a list of all Microsoft Fabric capacities using Service Principal (SPN) authentication
# and displays selected properties
# 
# PREREQUISITES:
# 1. Create a Service Principal in Azure AD
# 2. Grant the SPN appropriate permissions in Fabric (Fabric Administrator or Workspace Admin)
# 3. Configure the SPN credentials in the configuration section
# ==================================


# CELL 2 - Import Libraries
# ==================================
# Import required libraries
import requests
import json
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, explode, when
from pyspark.sql.types import StructType, StructField, StringType, ArrayType
import logging
from typing import Dict, List, Optional
import os
from datetime import datetime, timedelta

# Azure authentication libraries
try:
    from azure.identity import ClientSecretCredential
    AZURE_IDENTITY_AVAILABLE = True
except ImportError:
    AZURE_IDENTITY_AVAILABLE = False
    # If azure-identity is not available, we'll use direct OAuth2 flow
    pass
# ==================================


# CELL 3 - Configure Logging and Initialize Spark
# ==================================
# Configure logging
# This helps us track what's happening in our code and debug issues
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Initialize Spark Session (this is already available in Fabric notebooks)
# The Spark session is your entry point to using PySpark
spark = SparkSession.builder.appName("FabricCapacitiesList_SPN").getOrCreate()
# ==================================


# CELL 4 - Configuration Parameters
# ==================================
# Configuration Parameters
# These are the settings we'll use throughout the notebook

# API Configuration
API_CONFIG = {
    "API_BASE_URL": "https://api.fabric.microsoft.com/v1",
    "MAX_RETRIES": 3,
    "PAGE_SIZE": 100,  # Number of items per page for API calls
    "TIMEOUT": 30,  # API request timeout in seconds
    "FABRIC_SCOPE": "https://api.fabric.microsoft.com/.default"  # Scope for Fabric API
}

# Authentication Method Selection
# Options: "service_principal", "user_auth" (fallback to original method)
AUTH_METHOD = "service_principal"

# Service Principal Configuration
# SECURITY WARNING: Do not hardcode secrets in production code!
# Use environment variables, Azure Key Vault, or Fabric secrets
SPN_CONFIG = {
    # Option 1: Environment Variables (Recommended)
    #"TENANT_ID": os.environ.get("FABRIC_SPN_TENANT_ID", ""),
    #"CLIENT_ID": os.environ.get("FABRIC_SPN_CLIENT_ID", ""),
    #"CLIENT_SECRET": os.environ.get("FABRIC_SPN_CLIENT_SECRET", ""),
    
    # Option 2: Direct Configuration (Use only for testing)
    # Uncomment and fill these if not using environment variables
     "TENANT_ID": "2d51fc70-177a-4852-ba7e-54d34883bb15",
     "CLIENT_ID": "f9d144fe-dc6f-4d3e-98e0-456e84bc6e23",
     "CLIENT_SECRET": "H4y8Q~bS4eBU1lyoJnV4M~nSgB.Not4QB9KZRcUb",
    
    # OAuth2 endpoints
    "AUTHORITY_URL": "https://login.microsoftonline.com/",
    "TOKEN_ENDPOINT": "/oauth2/v2.0/token"
}

# Key Vault Configuration (Optional)
# If you want to retrieve secrets from Azure Key Vault
KEY_VAULT_CONFIG = {
    "USE_KEY_VAULT": False,  # Set to True to use Key Vault
    "VAULT_NAME": "your-key-vault-name",
    "SECRET_NAME_CLIENT_ID": "fabric-spn-client-id",
    "SECRET_NAME_CLIENT_SECRET": "fabric-spn-client-secret"
}
# ==================================


# CELL 5 - Validation Functions
# ==================================
def validate_spn_config() -> bool:
    """
    Validate that all required SPN configuration parameters are present.
    
    Returns:
        bool: True if configuration is valid, False otherwise
    """
    required_fields = ["TENANT_ID", "CLIENT_ID", "CLIENT_SECRET"]
    
    missing_fields = []
    for field in required_fields:
        if not SPN_CONFIG.get(field):
            missing_fields.append(field)
    
    if missing_fields:
        logger.error(f"Missing required SPN configuration: {', '.join(missing_fields)}")
        logger.info("Please set these as environment variables or in the SPN_CONFIG section")
        return False
    
    return True


def get_secret_from_key_vault(secret_name: str) -> str:
    """
    Retrieve a secret from Azure Key Vault.
    
    Note: This requires the azure-keyvault-secrets package and appropriate permissions.
    
    Args:
        secret_name: Name of the secret in Key Vault
    
    Returns:
        str: The secret value
    """
    if not KEY_VAULT_CONFIG["USE_KEY_VAULT"]:
        return ""
    
    try:
        from azure.keyvault.secrets import SecretClient
        from azure.identity import DefaultAzureCredential
        
        vault_url = f"https://{KEY_VAULT_CONFIG['VAULT_NAME']}.vault.azure.net"
        credential = DefaultAzureCredential()
        client = SecretClient(vault_url=vault_url, credential=credential)
        
        secret = client.get_secret(secret_name)
        return secret.value
    except Exception as e:
        logger.error(f"Failed to retrieve secret from Key Vault: {str(e)}")
        raise
# ==================================


# CELL 6 - Service Principal Authentication Functions
# ==================================
def get_token_using_azure_identity(tenant_id: str, client_id: str, client_secret: str) -> str:
    """
    Get access token using azure-identity library.
    
    This is the preferred method as it handles token caching and refresh automatically.
    
    Args:
        tenant_id: Azure AD tenant ID
        client_id: Service Principal client ID
        client_secret: Service Principal client secret
    
    Returns:
        str: Access token for Fabric API
    """
    if not AZURE_IDENTITY_AVAILABLE:
        raise ImportError("azure-identity package is not available. Please install it or use direct OAuth2 method.")
    
    try:
        credential = ClientSecretCredential(
            tenant_id=tenant_id,
            client_id=client_id,
            client_secret=client_secret
        )
        
        # Get token for Fabric API
        token = credential.get_token(API_CONFIG["FABRIC_SCOPE"])
        return token.token
        
    except Exception as e:
        logger.error(f"Failed to get token using azure-identity: {str(e)}")
        raise


def get_token_using_direct_oauth(tenant_id: str, client_id: str, client_secret: str) -> str:
    """
    Get access token using direct OAuth2 client credentials flow.
    
    This method doesn't require additional Azure libraries.
    
    Args:
        tenant_id: Azure AD tenant ID
        client_id: Service Principal client ID
        client_secret: Service Principal client secret
    
    Returns:
        str: Access token for Fabric API
    """
    token_url = f"{SPN_CONFIG['AUTHORITY_URL']}{tenant_id}{SPN_CONFIG['TOKEN_ENDPOINT']}"
    
    # Prepare the token request
    token_data = {
        "grant_type": "client_credentials",
        "client_id": client_id,
        "client_secret": client_secret,
        "scope": API_CONFIG["FABRIC_SCOPE"]
    }
    
    try:
        logger.info("Requesting access token from Azure AD...")
        response = requests.post(
            token_url,
            data=token_data,
            timeout=API_CONFIG["TIMEOUT"]
        )
        
        response.raise_for_status()
        token_response = response.json()
        
        if "access_token" not in token_response:
            raise ValueError("No access token in response")
        
        logger.info("Successfully obtained access token")
        return token_response["access_token"]
        
    except requests.exceptions.RequestException as e:
        logger.error(f"Failed to get token via direct OAuth: {str(e)}")
        if response.text:
            logger.error(f"Response: {response.text}")
        raise


def get_access_token_spn() -> str:
    """
    Get access token using Service Principal authentication.
    
    This function handles the SPN authentication flow, including:
    - Validating configuration
    - Retrieving secrets from Key Vault if configured
    - Getting token using available method
    
    Returns:
        str: Access token for Fabric API
    """
    # Get SPN credentials
    if KEY_VAULT_CONFIG["USE_KEY_VAULT"]:
        logger.info("Retrieving SPN credentials from Key Vault...")
        client_id = get_secret_from_key_vault(KEY_VAULT_CONFIG["SECRET_NAME_CLIENT_ID"])
        client_secret = get_secret_from_key_vault(KEY_VAULT_CONFIG["SECRET_NAME_CLIENT_SECRET"])
        tenant_id = SPN_CONFIG["TENANT_ID"]  # Tenant ID usually doesn't need to be secret
    else:
        client_id = SPN_CONFIG["CLIENT_ID"]
        client_secret = SPN_CONFIG["CLIENT_SECRET"]
        tenant_id = SPN_CONFIG["TENANT_ID"]
    
    # Validate we have all required values
    if not all([tenant_id, client_id, client_secret]):
        raise ValueError("Missing required SPN credentials. Please check configuration.")
    
    # Try to get token using azure-identity first (preferred method)
    if AZURE_IDENTITY_AVAILABLE:
        logger.info("Using azure-identity for authentication...")
        return get_token_using_azure_identity(tenant_id, client_id, client_secret)
    else:
        logger.info("Using direct OAuth2 flow for authentication...")
        return get_token_using_direct_oauth(tenant_id, client_id, client_secret)


def get_access_token_user() -> str:
    """
    Get access token using user authentication (original method).
    
    This is the fallback method that uses the current user's credentials.
    
    Returns:
        str: Access token for Fabric API
    """
    try:
        from notebookutils import mssparkutils
        token_response = mssparkutils.credentials.getToken("https://api.fabric.microsoft.com")
        return token_response
    except Exception as e:
        logger.error(f"Failed to get user access token: {str(e)}")
        raise


def get_access_token() -> str:
    """
    Get access token based on configured authentication method.
    
    This function routes to the appropriate authentication method based on configuration.
    
    Returns:
        str: Access token for Fabric API
    """
    if AUTH_METHOD == "service_principal":
        logger.info("Using Service Principal authentication...")
        return get_access_token_spn()
    else:
        logger.info("Using user authentication...")
        return get_access_token_user()
# ==================================


# CELL 7 - API Call Function (Unchanged)
# ==================================
def call_fabric_api(endpoint: str, access_token: str, params: Optional[Dict] = None) -> Dict:
    """
    Make a REST API call to Microsoft Fabric.
    
    This function handles the HTTP request to the Fabric API, including:
    - Setting up authentication headers
    - Managing retries if the request fails
    - Error handling
    
    Args:
        endpoint: The API endpoint path (e.g., "/capacities")
        access_token: The Azure AD access token
        params: Optional query parameters for the API call
    
    Returns:
        dict: The JSON response from the API
    
    Raises:
        requests.exceptions.RequestException: If the API call fails after all retries
    """
    url = f"{API_CONFIG['API_BASE_URL']}{endpoint}"
    headers = {
        "Authorization": f"Bearer {access_token}",
        "Content-Type": "application/json"
    }
    
    # Retry logic - sometimes API calls can fail temporarily
    for attempt in range(API_CONFIG['MAX_RETRIES']):
        try:
            logger.info(f"Making API call to: {url} (Attempt {attempt + 1})")
            
            response = requests.get(
                url,
                headers=headers,
                params=params,
                timeout=API_CONFIG['TIMEOUT']
            )
            
            # Check if the request was successful
            response.raise_for_status()
            
            return response.json()
            
        except requests.exceptions.RequestException as e:
            logger.warning(f"API call failed (Attempt {attempt + 1}): {str(e)}")
            
            if attempt == API_CONFIG['MAX_RETRIES'] - 1:
                logger.error(f"All retry attempts failed for endpoint: {endpoint}")
                raise
            
            # Wait before retrying (exponential backoff)
            import time
            time.sleep(2 ** attempt)
# ==================================


# CELL 8 - Get All Capacities Function (Minor Update)
# ==================================
def get_all_capacities(access_token: str) -> List[Dict]:
    """
    Retrieve all Fabric capacities, handling pagination if necessary.
    
    The Fabric API may return results in pages if there are many capacities.
    This function handles the pagination automatically to get all capacities.
    
    Args:
        access_token: The Azure AD access token
    
    Returns:
        list: A list of all capacity objects
    """
    all_capacities = []
    continuation_token = None
    
    while True:
        # Set up parameters for the API call
        params = {"top": API_CONFIG['PAGE_SIZE']}
        if continuation_token:
            params["continuationToken"] = continuation_token
        
        # Call the API
        response = call_fabric_api("/capacities", access_token, params)
        
        # Extract capacities from the response
        capacities = response.get("value", [])
        all_capacities.extend(capacities)
        
        logger.info(f"Retrieved {len(capacities)} capacities. Total so far: {len(all_capacities)}")
        
        # Check if there are more pages
        continuation_token = response.get("continuationToken")
        if not continuation_token:
            break
    
    logger.info(f"Finished retrieving capacities. Total count: {len(all_capacities)}")
    return all_capacities
# ==================================


# CELL 9 - Create DataFrame Function (Unchanged)
# ==================================
def create_capacities_dataframe(capacities_data: List[Dict]) -> "DataFrame":
    """
    Convert the capacities data into a PySpark DataFrame with the specified columns.
    
    This function takes the raw JSON data from the API and creates a structured
    PySpark DataFrame with only the columns we want to display.
    
    Args:
        capacities_data: List of capacity dictionaries from the API
    
    Returns:
        DataFrame: A PySpark DataFrame with the specified columns
    """
    # Define the schema for our DataFrame
    # This tells PySpark what columns we want and their data types
    schema = StructType([
        StructField("id", StringType(), True),
        StructField("displayName", StringType(), True),  # True means the field can be null
        StructField("sku", StringType(), True),
        StructField("region", StringType(), True),
        StructField("state", StringType(), True)
    ])
    
    # Extract only the fields we need from each capacity
    filtered_data = []
    for capacity in capacities_data:
        filtered_capacity = {
            "id": capacity.get("id"),
            "displayName": capacity.get("displayName"),
            "sku": capacity.get("sku"),
            "region": capacity.get("region"),
            "state": capacity.get("state")
        }
        filtered_data.append(filtered_capacity)
    
    # Create a PySpark DataFrame
    # First, we create a Pandas DataFrame, then convert it to PySpark
    # This is often easier for small datasets
    pandas_df = pd.DataFrame(filtered_data)
    spark_df = spark.createDataFrame(pandas_df, schema=schema)
    
    return spark_df
# ==================================


# CELL 10 - Test Authentication Function
# ==================================
def test_authentication():
    """
    Test the authentication configuration before running the main process.
    
    This function helps diagnose authentication issues by:
    - Validating configuration
    - Attempting to get a token
    - Making a simple API call
    """
    print("\n=== Testing Authentication Configuration ===")
    
    # Check authentication method
    print(f"Authentication Method: {AUTH_METHOD}")
    
    if AUTH_METHOD == "service_principal":
        # Validate SPN configuration
        print("\nValidating Service Principal configuration...")
        if not validate_spn_config():
            print("❌ Invalid SPN configuration. Please check your settings.")
            return False
        print("✅ SPN configuration is valid")
        
        # Check if azure-identity is available
        print(f"\nazure-identity available: {'✅ Yes' if AZURE_IDENTITY_AVAILABLE else '❌ No'}")
    
    # Try to get a token
    print("\nAttempting to get access token...")
    try:
        token = get_access_token()
        print("✅ Successfully obtained access token")
        
        # Try a simple API call
        print("\nTesting API access...")
        response = call_fabric_api("/capacities", token, {"top": 1})
        print("✅ API call successful")
        
        return True
        
    except Exception as e:
        print(f"❌ Authentication failed: {str(e)}")
        return False
# ==================================


# CELL 11 - Main Execution Function (Updated)
# ==================================
def main():
    """
    Main execution function that orchestrates the entire process.
    
    This function:
    1. Validates authentication configuration
    2. Gets the authentication token
    3. Retrieves all capacities from the API
    4. Creates a PySpark DataFrame
    5. Displays the results
    """
    try:
        logger.info("Starting Fabric Capacities retrieval process")
        
        # Step 1: Validate authentication if using SPN
        if AUTH_METHOD == "service_principal":
            logger.info("Validating Service Principal configuration...")
            if not validate_spn_config():
                raise ValueError("Invalid Service Principal configuration")
        
        # Step 2: Get authentication token
        logger.info("Getting access token...")
        access_token = get_access_token()
        logger.info("Successfully obtained access token")
        
        # Step 3: Retrieve all capacities
        logger.info("Retrieving capacities from Fabric API...")
        capacities_data = get_all_capacities(access_token)
        
        if not capacities_data:
            logger.warning("No capacities found")
            return
        
        # Step 4: Create PySpark DataFrame
        logger.info("Creating PySpark DataFrame...")
        capacities_df = create_capacities_dataframe(capacities_data)
        
        # Step 5: Display results
        logger.info("Displaying capacities...")
        
        # Show the schema (structure) of our DataFrame
        print("\nDataFrame Schema:")
        capacities_df.printSchema()
        
        # Show the data
        print("\nFabric Capacities:")
        capacities_df.show(truncate=False)  # truncate=False shows full values
        
        # Show row count
        row_count = capacities_df.count()
        print(f"\nTotal number of capacities: {row_count}")
        
        # Optional: Save to a temporary table for SQL queries
        capacities_df.createOrReplaceTempView("fabric_capacities")
        logger.info("Created temporary view 'fabric_capacities' for SQL queries")
        
        # Return the DataFrame for further use
        return capacities_df
        
    except Exception as e:
        logger.error(f"Error in main execution: {str(e)}")
        raise
# ==================================


# CELL 12 - Execute Main Function
# ==================================
# Test authentication before running main process
if test_authentication():
    print("\n✅ Authentication test passed. Proceeding with main execution...\n")
    capacities_df = main()
else:
    print("\n❌ Authentication test failed. Please fix configuration before proceeding.")
# ==================================


# CELL 13 - Analysis Helper Functions (Unchanged)
# ==================================
# Additional helper functions for data analysis

def analyze_capacities(capacities_df):
    """
    Perform basic analysis on the capacities DataFrame.
    
    This function demonstrates how to use PySpark DataFrame operations
    to analyze the data we've retrieved.
    """
    print("\n=== Capacity Analysis ===")
    
    # Count by SKU
    print("\nCapacities by SKU:")
    capacities_df.groupBy("sku").count().orderBy("count", ascending=False).show()
    
    # Count by state
    print("\nCapacities by State:")
    capacities_df.groupBy("state").count().orderBy("count", ascending=False).show()
    
    # Count by region
    print("\nCapacities by Region:")
    capacities_df.groupBy("region").count().orderBy("count", ascending=False).show()
    
    # Find capacities with specific states
    print("\nActive Capacities:")
    active_capacities = capacities_df.filter(col("state") == "Active")
    active_capacities.select("displayName", "sku", "region").show(truncate=False)
# ==================================


# CELL 14 - Run Analysis (Optional)
# ==================================
# Usage example:
# To run the analysis after getting the data, uncomment the following line:
# analyze_capacities(capacities_df)
# ==================================


# CELL 15 - Service Principal Setup Guide
# ==================================
"""
SERVICE PRINCIPAL SETUP GUIDE FOR BEGINNERS:

1. CREATE SERVICE PRINCIPAL IN AZURE:
   a. Go to Azure Portal (portal.azure.com)
   b. Navigate to Azure Active Directory > App registrations
   c. Click "New registration"
   d. Give it a name (e.g., "Fabric-API-Access-SPN")
   e. Select "Accounts in this organizational directory only"
   f. Click "Register"
   g. Note down the "Application (client) ID" and "Directory (tenant) ID"

2. CREATE CLIENT SECRET:
   a. In your app registration, go to "Certificates & secrets"
   b. Click "New client secret"
   c. Add a description and select expiration
   d. Click "Add"
   e. IMPORTANT: Copy the secret value immediately (it won't be shown again)

3. GRANT PERMISSIONS IN FABRIC:
   a. Go to the Fabric portal
   b. Navigate to Admin portal > Tenant settings
   c. Find "Service principals can use Fabric APIs"
   d. Enable this setting
   e. Add your service principal to the allowed list
   f. OR grant workspace/capacity admin permissions as needed

4. CONFIGURE IN THIS NOTEBOOK:
   Method 1 - Environment Variables (Recommended):
   ```python
   # Set these in your environment or notebook
   import os
   os.environ["FABRIC_SPN_TENANT_ID"] = "your-tenant-id"
   os.environ["FABRIC_SPN_CLIENT_ID"] = "your-client-id"
   os.environ["FABRIC_SPN_CLIENT_SECRET"] = "your-secret"
   ```
   
   Method 2 - Direct Configuration (Testing only):
   Update the SPN_CONFIG dictionary in Cell 4
   
   Method 3 - Azure Key Vault (Most secure):
   a. Store secrets in Azure Key Vault
   b. Update KEY_VAULT_CONFIG in Cell 4
   c. Grant your notebook identity access to Key Vault

5. TROUBLESHOOTING COMMON ISSUES:
   - "Invalid client" error: Check client ID and tenant ID
   - "Invalid client secret" error: Check the secret hasn't expired
   - "Unauthorized" error: Check SPN has Fabric API permissions
   - "Access denied" error: Check SPN has permissions on specific resources

6. SECURITY BEST PRACTICES:
   - Never hardcode secrets in production code
   - Use Key Vault or environment variables
   - Rotate secrets regularly
   - Use certificate-based auth for production
   - Apply principle of least privilege
"""
# ==================================


# CELL 16 - Usage Notes for Beginners (Updated)
# ==================================
"""
USAGE NOTES FOR BEGINNERS:

1. AUTHENTICATION OPTIONS:
   - Service Principal: For automated/scheduled jobs
   - User Authentication: For interactive notebook sessions
   - Switch between methods using AUTH_METHOD variable

2. RUNNING THE CODE:
   - First run the authentication test to verify setup
   - Then run the main() function for the full process
   - Check logs for detailed information

3. UNDERSTANDING THE OUTPUT:
   - Schema: Shows the structure of your data
   - Data: Shows the actual capacity information
   - Analysis: Shows summary statistics

4. ERROR HANDLING:
   - Authentication errors: Check SPN configuration
   - API errors: Check permissions and network
   - Look at log messages for details

5. SAVING RESULTS:
   ```python
   # Save to Delta table
   capacities_df.write.mode("overwrite").saveAsTable("capacities_table")
   
   # Save to CSV
   capacities_df.write.mode("overwrite").csv("/path/to/file.csv")
   
   # Save to Parquet
   capacities_df.write.mode("overwrite").parquet("/path/to/file.parquet")
   ```

6. SWITCHING AUTHENTICATION METHODS:
   ```python
   # To use Service Principal
   AUTH_METHOD = "service_principal"
   
   # To use user authentication (original method)
   AUTH_METHOD = "user_auth"
   ```

7. USING WITH SCHEDULED JOBS:
   - Service Principal auth is required for automation
   - Store credentials securely (Key Vault recommended)
   - Monitor token expiration and renewal
"""
# ==================================

StatementMeta(, 32251843-d63f-4da0-b33c-bff8ec0dc264, 4, Finished, Available, Finished)

2025-05-14 20:19:17,486 - INFO - Using user authentication...



=== Testing Authentication Configuration ===
Authentication Method: user_auth

Attempting to get access token...
✅ Successfully obtained access token

Testing API access...
✅ API call successful

✅ Authentication test passed. Proceeding with main execution...


DataFrame Schema:
root
 |-- id: string (nullable = true)
 |-- displayName: string (nullable = true)
 |-- sku: string (nullable = true)
 |-- region: string (nullable = true)
 |-- state: string (nullable = true)


Fabric Capacities:
+------------------------------------+---------------------------------+---+----------------+--------+
|id                                  |displayName                      |sku|region          |state   |
+------------------------------------+---------------------------------+---+----------------+--------+
|ab3b62c5-cff1-4341-a584-4ef86a529e8a|f64nonprodsouthcentral001        |F64|South Central US|Inactive|
|250aef2d-b24b-43a8-8564-8fefc5152522|f8nonprodsouthcentral001         |F8 |South Central US|A

'\nUSAGE NOTES FOR BEGINNERS:\n\n1. AUTHENTICATION OPTIONS:\n   - Service Principal: For automated/scheduled jobs\n   - User Authentication: For interactive notebook sessions\n   - Switch between methods using AUTH_METHOD variable\n\n2. RUNNING THE CODE:\n   - First run the authentication test to verify setup\n   - Then run the main() function for the full process\n   - Check logs for detailed information\n\n3. UNDERSTANDING THE OUTPUT:\n   - Schema: Shows the structure of your data\n   - Data: Shows the actual capacity information\n   - Analysis: Shows summary statistics\n\n4. ERROR HANDLING:\n   - Authentication errors: Check SPN configuration\n   - API errors: Check permissions and network\n   - Look at log messages for details\n\n5. SAVING RESULTS:\n   ```python\n   # Save to Delta table\n   capacities_df.write.mode("overwrite").saveAsTable("capacities_table")\n   \n   # Save to CSV\n   capacities_df.write.mode("overwrite").csv("/path/to/file.csv")\n   \n   # Save to Parquet\n   ca