In [7]:
# This gets details about capacities and writes them to a table in the Lakehouse

# Import necessary libraries
import requests
import pandas as pd
from pyspark.sql import SparkSession
import json

# Note: Microsoft Fabric notebooks automatically authenticate your requests
# The authentication token is available through the Fabric runtime

def get_fabric_capacities():
    """
    Retrieves a list of Microsoft Fabric capacities using the REST API.
    This function handles authentication, API calls, and data processing.
    
    Returns:
        DataFrame: A PySpark DataFrame containing capacity information
    """
    
    # Step 1: Get the authentication token from Fabric
    # In Fabric notebooks, the authentication is handled automatically
    # The token is available through the mssparkutils library
    try:
        # Access the token using mssparkutils (available in Fabric notebooks)
        token = mssparkutils.credentials.getToken("https://api.fabric.microsoft.com")
    except Exception as e:
        print(f"Error getting authentication token: {e}")
        print("Make sure you're running this in a Microsoft Fabric notebook")
        return None
    
    # Step 2: Set up the API endpoint and headers
    # The base URL for Fabric REST API
    base_url = "https://api.fabric.microsoft.com/v1"
    
    # The specific endpoint for listing capacities
    endpoint = "/capacities"
    
    # Complete URL
    url = base_url + endpoint
    
    # Headers required for the API call
    headers = {
        "Authorization": f"Bearer {token}",  # Authentication token
        "Content-Type": "application/json"   # Specify we want JSON response
    }
    
    # Step 3: Initialize variables for pagination
    # The API may return results in multiple pages
    all_capacities = []  # List to store all capacities
    continuation_token = None  # Token for getting next page of results
    
    # Step 4: Make API calls with pagination support
    while True:
        try:
            # Prepare the request parameters
            params = {}
            if continuation_token:
                # If we have a continuation token, add it to get the next page
                params['continuationToken'] = continuation_token
            
            # Make the GET request to the API
            response = requests.get(url, headers=headers, params=params)
            
            # Check if the request was successful
            if response.status_code == 200:
                # Parse the JSON response
                data = response.json()
                
                # Extract the capacity data
                capacities = data.get('value', [])
                all_capacities.extend(capacities)
                
                # Check if there are more pages
                continuation_token = data.get('continuationToken')
                
                # If no continuation token, we've got all the data
                if not continuation_token:
                    break
                    
                print(f"Retrieved {len(capacities)} capacities. Fetching more...")
                
            else:
                # If the request failed, print error information
                print(f"Error: HTTP {response.status_code}")
                print(f"Response: {response.text}")
                return None
                
        except Exception as e:
            print(f"Error making API request: {e}")
            return None
    
    # Step 5: Convert the data to a PySpark DataFrame
    if all_capacities:
        print(f"Total capacities retrieved: {len(all_capacities)}")
        
        # Get or create a SparkSession
        spark = SparkSession.builder.appName("FabricCapacities").getOrCreate()
        
        # Convert the list of dictionaries to a PySpark DataFrame
        df = spark.createDataFrame(all_capacities)
        
        return df
    else:
        print("No capacities found")
        return None

# Main execution
if __name__ == "__main__":
    # Call the function to get capacities
    print("Retrieving Microsoft Fabric capacities...")
    capacities_df = get_fabric_capacities()
    
    # Display the results if we got data
    if capacities_df:
        # Show the schema of the DataFrame
        print("\nDataFrame Schema:")
        capacities_df.printSchema()
        
        # Display the first few records
        print("\nFirst 10 capacities:")
        capacities_df.show(10, truncate=False)
        
        # Show total count
        total_count = capacities_df.count()
        print(f"\nTotal number of capacities: {total_count}")
        
        # Step 6: Write the data to Lakehouse file named BHCap
        try:
            # Define the path for the Lakehouse table
            # In Fabric, the default lakehouse is automatically mounted
            table_name = "BHCap"
            
            # Write the DataFrame to the Lakehouse as a managed table
            # This creates a Delta table in the default Lakehouse
            print(f"\nWriting data to Lakehouse table: {table_name}")
            
            capacities_df.write \
                .mode("overwrite") \
                .option("overwriteSchema", "true") \
                .saveAsTable(table_name)
            
            print(f"Successfully wrote {total_count} records to table: {table_name}")
            
            # Verify the data was written by reading it back
            print(f"\nVerifying data in table: {table_name}")
            verification_df = spark.read.table(table_name)
            print(f"Records in table: {verification_df.count()}")
            
            # Optionally, also write as a parquet file for direct file access
            # This writes to the Files section of the Lakehouse
            file_path = f"Files/{table_name}"
            
            print(f"\nAlso writing as parquet file to: {file_path}")
            capacities_df.write \
                .mode("overwrite") \
                .parquet(file_path)
            
            print(f"Successfully wrote parquet file to: {file_path}")
            
        except Exception as e:
            print(f"Error writing to Lakehouse: {e}")
            print("Make sure you have a default Lakehouse attached to this notebook")
            
        # Example: Query the newly created table
        print("\nExample query from the new table:")
        spark.sql(f"SELECT id, displayName, sku, state FROM {table_name} LIMIT 5").show()
        
    else:
        print("Failed to retrieve capacities")

StatementMeta(, 3291f23c-f15a-4074-9db2-7dc45c97b96d, 9, Finished, Available, Finished)

Retrieving Microsoft Fabric capacities...
Total capacities retrieved: 8

DataFrame Schema:
root
 |-- displayName: string (nullable = true)
 |-- id: string (nullable = true)
 |-- region: string (nullable = true)
 |-- sku: string (nullable = true)
 |-- state: string (nullable = true)


First 10 capacities:
+---------------------------------+------------------------------------+----------------+---+--------+
|displayName                      |id                                  |region          |sku|state   |
+---------------------------------+------------------------------------+----------------+---+--------+
|f64nonprodsouthcentral001        |ab3b62c5-cff1-4341-a584-4ef86a529e8a|South Central US|F64|Inactive|
|f8nonprodsouthcentral001         |250aef2d-b24b-43a8-8564-8fefc5152522|South Central US|F8 |Active  |
|f64x002                          |d94bc350-4bb9-4f24-9d89-fd633996eb28|South Central US|F64|Active  |
|f64x001                          |56125c55-2f69-4fa3-bac0-e9407fc17374|Sout

In [7]:
# This simply gets details about Gateway servers.

# Import required libraries
import requests
import json
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, ArrayType, BooleanType, IntegerType
import pandas as pd

# Initialize Spark session (already available in Fabric Notebooks as 'spark')
# spark = SparkSession.builder.appName("FabricGatewayAPI").getOrCreate()

# Configuration
# The base URL for Fabric API endpoints
BASE_URL = "https://api.fabric.microsoft.com/v1"
GATEWAYS_ENDPOINT = "/gateways"
# Filter for OnPremises gateways only
GATEWAY_TYPE_FILTER = "OnPremises"

# Authentication setup
# In Microsoft Fabric notebooks, you can use the built-in token provider
# This function gets the access token automatically
from notebookutils import mssparkutils

try:
    access_token = mssparkutils.credentials.getToken("https://api.fabric.microsoft.com")
    print(f"Successfully obtained access token (length: {len(access_token)})")
except Exception as e:
    print(f"Failed to get access token: {str(e)}")
    access_token = None

# Set up headers with authentication
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Debug: Print headers (without showing full token)
print(f"Headers prepared. Authorization header present: {'Authorization' in headers}")

# Function to make API call and handle pagination
def get_onpremises_gateways(base_url, endpoint, headers, max_rows=100):
    """
    Retrieves OnPremises gateway data from Fabric API
    
    Parameters:
    - base_url: The base URL for the API
    - endpoint: The specific endpoint for gateways
    - headers: Authentication headers
    - max_rows: Maximum number of rows to retrieve (default: 100)
    
    Returns:
    - List of OnPremises gateway objects
    """
    
    all_gateways = []
    next_link = f"{base_url}{endpoint}"
    rows_retrieved = 0
    
    while next_link and rows_retrieved < max_rows:
        try:
            # Make the API request
            print(f"Requesting: {next_link}")
            response = requests.get(next_link, headers=headers)
            
            # Check if request was successful
            if response.status_code == 200:
                data = response.json()
                print(f"Response received with {len(data.get('value', []))} items")
                
                # Extract gateways from the response
                gateways = data.get('value', [])
                
                # Filter for OnPremises gateways only
                onpremises_gateways = [g for g in gateways if g.get('type') == 'OnPremises']
                print(f"Found {len(onpremises_gateways)} OnPremises gateways in this batch")
                
                # Calculate how many rows to add
                remaining_rows = max_rows - rows_retrieved
                rows_to_add = len(onpremises_gateways) if len(onpremises_gateways) < remaining_rows else remaining_rows
                all_gateways.extend(onpremises_gateways[:rows_to_add])
                rows_retrieved += rows_to_add
                
                # Get the next page URL if it exists
                next_link = data.get('@odata.nextLink', None)
                
                # Exit if we've retrieved enough rows
                if rows_retrieved >= max_rows:
                    break
                    
            else:
                print(f"Error: API request failed with status code {response.status_code}")
                print(f"Response: {response.text}")
                break
                
        except Exception as e:
            print(f"Exception occurred: {str(e)}")
            import traceback
            print(traceback.format_exc())
            break
    
    return all_gateways

# Call the API to get gateway data
print("Fetching OnPremises gateway data from Fabric API...")
gateways_data = get_onpremises_gateways(BASE_URL, GATEWAYS_ENDPOINT, headers, max_rows=100)

# Check if we got any data
if not gateways_data:
    print("No OnPremises gateway data retrieved. Please check your permissions and API connectivity.")
else:
    print(f"Successfully retrieved {len(gateways_data)} OnPremises gateway records")
    
    # Convert to DataFrame for easier manipulation
    # First, let's examine the structure of the data
    if gateways_data:
        print("\nSample OnPremises gateway record structure:")
        print(json.dumps(gateways_data[0], indent=2))
    
    # Create a Spark DataFrame from the data
    # Define schema for OnPremises gateways based on the exact API documentation
    schema = StructType([
        StructField("allowCloudConnectionRefresh", BooleanType(), True),
        StructField("allowCustomConnectors", BooleanType(), True),
        StructField("displayName", StringType(), True),
        StructField("id", StringType(), True),  # uuid format
        StructField("loadBalancingSetting", StringType(), True),  # It's just a string value
        StructField("numberOfMemberGateways", StringType(), True),  # Should be integer but API returns string
        StructField("type", StringType(), True),
        StructField("version", StringType(), True)
    ])
    
    # Create Spark DataFrame
    df = spark.createDataFrame(gateways_data, schema=schema)
    
    # Display ALL columns with no truncation
    print("\nShowing all OnPremises gateway data (core properties only):")
    df.show(truncate=False)
    
    # Display all columns and their values in a more readable format
    print("\nDetailed view of OnPremises gateways:")
    for row in df.collect():
        print("\n" + "="*50)
        print(f"Gateway Display Name: {row.displayName}")
        print(f"Gateway ID: {row.id}")
        print("="*50)
        
        # Core OnPremises properties
        print(f"Type: {row.type}")
        print(f"Version: {row.version}")
        print(f"Allow Cloud Connection Refresh: {row.allowCloudConnectionRefresh}")
        print(f"Allow Custom Connectors: {row.allowCustomConnectors}")
        print(f"Number of Member Gateways: {row.numberOfMemberGateways}")
        
        # Load Balancing Setting
        print(f"Load Balancing Setting: {row.loadBalancingSetting}")
    
    # Summary statistics for OnPremises gateways
    print("\n" + "="*50)
    print("SUMMARY STATISTICS FOR ONPREMISES GATEWAYS")
    print("="*50)
    
    # Total count
    print(f"\nTotal OnPremises Gateways: {df.count()}")
    
    # Cloud connection refresh enabled
    cloud_refresh_count = df.filter(df.allowCloudConnectionRefresh == True).count()
    print(f"\nGateways with Cloud Connection Refresh Enabled: {cloud_refresh_count}")
    
    # Custom connectors enabled
    custom_connectors_count = df.filter(df.allowCustomConnectors == True).count()
    print(f"Gateways with Custom Connectors Enabled: {custom_connectors_count}")
    
    # Version distribution
    print("\nGateways by Version:")
    df.groupBy("version").count().orderBy("count", ascending=False).show()
    
    # Member gateway statistics
    print("\nGateway Member Count Statistics:")
    from pyspark.sql.functions import col, avg, max as spark_max, min as spark_min
    
    member_stats = df.agg(
        avg(col("numberOfMemberGateways").cast("integer")).alias("avg_members"),
        spark_max(col("numberOfMemberGateways").cast("integer")).alias("max_members"),
        spark_min(col("numberOfMemberGateways").cast("integer")).alias("min_members")
    ).collect()[0]
    
    print(f"Average Member Gateways: {member_stats.avg_members:.2f}")
    print(f"Maximum Member Gateways: {member_stats.max_members}")
    print(f"Minimum Member Gateways: {member_stats.min_members}")
    
    # Display only the exact properties in a clean table format
    print("\nOnPremises Gateways - Core Properties Only:")
    selected_columns = [
        "id",
        "displayName", 
        "type",
        "version",
        "allowCloudConnectionRefresh",
        "allowCustomConnectors",
        "numberOfMemberGateways"
    ]
    df.select(selected_columns).show(truncate=False)
    
    # Export to Pandas for detailed analysis
    pandas_df = df.toPandas()
    print("\n\nData exported to Pandas DataFrame for further analysis")
    
    # Create a simple summary DataFrame with just the documented fields
    summary_df = df.select(selected_columns)
    
    # Save to Fabric table if needed
    # summary_df.write.mode("overwrite").saveAsTable("onpremises_gateway_metadata")

print("\nScript execution completed successfully!")

# Create a final clean display of just the documented OnPremises properties
if gateways_data:
    print("\n" + "="*50)
    print("ONPREMISES GATEWAYS - DOCUMENTED PROPERTIES ONLY")
    print("="*50)
    
    # Clean display of documented properties in a structured format
    for i, gateway in enumerate(gateways_data):
        print(f"\nGateway {i+1}:")
        print(f"  allowCloudConnectionRefresh: {gateway.get('allowCloudConnectionRefresh', 'N/A')}")
        print(f"  allowCustomConnectors: {gateway.get('allowCustomConnectors', 'N/A')}")
        print(f"  displayName: {gateway.get('displayName', 'N/A')}")
        print(f"  id: {gateway.get('id', 'N/A')}")
        print(f"  loadBalancingSetting: {gateway.get('loadBalancingSetting', 'N/A')}")
        print(f"  numberOfMemberGateways: {gateway.get('numberOfMemberGateways', 'N/A')}")
        print(f"  type: {gateway.get('type', 'N/A')}")
        print(f"  version: {gateway.get('version', 'N/A')}")
        
        if i >= 9:  # Show first 10 gateways for brevity
            remaining = len(gateways_data) - 10
            if remaining > 0:
                print(f"\n... and {remaining} more gateways")
            break

StatementMeta(, 2871b78c-e614-4223-a674-3ad3c4c4e8cc, 9, Finished, Available, Finished)

Successfully obtained access token (length: 2412)
Headers prepared. Authorization header present: True
Fetching OnPremises gateway data from Fabric API...
Requesting: https://api.fabric.microsoft.com/v1/gateways
Response received with 2 items
Found 2 OnPremises gateways in this batch
Successfully retrieved 2 OnPremises gateway records

Sample OnPremises gateway record structure:
{
  "displayName": "FABDGWAY1-POC",
  "numberOfMemberGateways": 3,
  "loadBalancingSetting": "DistributeEvenly",
  "allowCloudConnectionRefresh": false,
  "allowCustomConnectors": false,
  "publicKey": {
    "exponent": "AQAB",
    "modulus": "tX3pIxSCNQky3usQp8ufCXXiNO5Nrsx9AJPq/lMoqPJmEAH/Qu3UEJlfsuHjw64cWXr2zs+D4z/CKntoYknKpTkf23EhMFR2pa+IOgNEUYfo8adebbEXlMlp6aiJ5ZYG+2zRsC/GkgfBFXeDeIhmnXgOgHKAr4DkjxmvJ5DCIMELlutEvrU+VsoemT1zE4dYFOVvKjDn88Q1yClZ4CayblxQh6jT4W6ziAghCdqBXV0Psp44nZwrMJw4ysWTptiVtD36ZkH6hF1rOOFaFL+zfB+vGzFy4AuSg2fuEyWPOg+u6wCiqFi/N77/+LFsX77Zk6J8iYII89hywgQM2sZmSQ=="
  },
  "version": "3000.266.