# Budget Policy Setup Notebook

This notebook programmatically creates budget policies for each operational category in the Stellar Analytics Division. It implements:

- Resource tagging via Databricks REST API
- Unity Catalog object tagging via SQL
- Budget policy creation with category-specific limits
- Alert threshold configuration
- Cluster policy enforcement

**Author**: SAD Analytics Team  
**Version**: 1.0  
**Last Updated**: April 2025

In [0]:
# Parameters

# Databricks instance configuration
DATABRICKS_INSTANCE = "your-instance.cloud.databricks.com"
TOKEN = "your-token"  # Replace with your Databricks token

# Division-wide configuration
DIVISION = "SAD"  # Stellar Analytics Division
COST_CENTER = "ND-Analytics"

# Operational categories and their monthly budgets (in USD)
OPERATIONAL_CATEGORIES = {
    "DEEP_SPACE_TELEMETRY": 10000,
    "PROPULSION_ANALYTICS": 12000,
    "ORBITAL_MECHANICS": 8000,
    "MATERIALS_SCIENCE": 9000,
    "EXPLORATORY_MISSIONS": 15000,
    "NAVIGATION_SYSTEMS": 7000,
    "EXOPLANET_RESEARCH": 11000
}

# Alert thresholds
ALERT_THRESHOLDS = [75, 90, 100]

# Email notification list
ADMIN_EMAILS = [
    "budget-admin@nova-dynamics.com",
    "sad-leadership@nova-dynamics.com"
]

In [0]:
# Imports
import requests
import json
import logging
from datetime import datetime
from pyspark.sql import SparkSession

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("BudgetPolicySetup")

# Initialize Spark session for Unity Catalog operations
spark = SparkSession.builder.appName("BudgetPolicySetup").getOrCreate()

# Databricks API base URL
API_BASE_URL = f"https://{DATABRICKS_INSTANCE}/api/2.0"

In [0]:
# Resource tagging functions (REST API)

def create_databricks_headers(token):
    """Create headers for Databricks REST API calls."""
    return {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json"
    }

def tag_cluster(cluster_id, custom_tags, databricks_instance, token):
    """Tag a Databricks cluster with custom tags."""
    url = f"{API_BASE_URL}/clusters/edit"
    headers = create_databricks_headers(token)
    
    # Get existing cluster configuration
    get_url = f"{API_BASE_URL}/clusters/get"
    response = requests.get(get_url, headers=headers, params={"cluster_id": cluster_id})
    
    if response.status_code != 200:
        logger.error(f"Failed to get cluster config: {response.text}")
        return False
    
    cluster_config = response.json()
    
    # Update with new tags
    cluster_config["custom_tags"] = custom_tags
    
    # Apply the updated configuration
    response = requests.post(url, headers=headers, json=cluster_config)
    
    if response.status_code == 200:
        logger.info(f"Successfully tagged cluster {cluster_id}")
        return True
    else:
        logger.error(f"Failed to tag cluster: {response.text}")
        return False

def tag_sql_warehouse(warehouse_id, custom_tags, databricks_instance, token):
    """Tag a SQL warehouse with custom tags."""
    url = f"{API_BASE_URL}/sql/warehouses/{warehouse_id}/edit"
    headers = create_databricks_headers(token)
    
    # Get existing warehouse configuration
    get_url = f"{API_BASE_URL}/sql/warehouses/{warehouse_id}"
    response = requests.get(get_url, headers=headers)
    
    if response.status_code != 200:
        logger.error(f"Failed to get warehouse config: {response.text}")
        return False
    
    warehouse_config = response.json()
    
    # Update with new tags
    warehouse_config["tags"]["custom_tags"] = custom_tags
    
    # Apply the updated configuration
    response = requests.post(url, headers=headers, json=warehouse_config)
    
    if response.status_code == 200:
        logger.info(f"Successfully tagged SQL warehouse {warehouse_id}")
        return True
    else:
        logger.error(f"Failed to tag warehouse: {response.text}")
        return False

In [0]:
# Unity Catalog tagging functions (SQL)

def tag_catalog(catalog_name, tags_dict):
    """Tag a Unity Catalog catalog with custom tags."""
    tag_string = ", ".join([f"'{k}' = '{v}'" for k, v in tags_dict.items()])
    sql = f"ALTER CATALOG {catalog_name} SET TAGS ({tag_string})"
    
    try:
        spark.sql(sql)
        logger.info(f"Successfully tagged catalog {catalog_name}")
        return True
    except Exception as e:
        logger.error(f"Failed to tag catalog: {e}")
        return False

def tag_schema(catalog_name, schema_name, tags_dict):
    """Tag a Unity Catalog schema with custom tags."""
    tag_string = ", ".join([f"'{k}' = '{v}'" for k, v in tags_dict.items()])
    sql = f"ALTER SCHEMA {catalog_name}.{schema_name} SET TAGS ({tag_string})"
    
    try:
        spark.sql(sql)
        logger.info(f"Successfully tagged schema {catalog_name}.{schema_name}")
        return True
    except Exception as e:
        logger.error(f"Failed to tag schema: {e}")
        return False

def tag_table(catalog_name, schema_name, table_name, tags_dict):
    """Tag a Unity Catalog table with custom tags."""
    tag_string = ", ".join([f"'{k}' = '{v}'" for k, v in tags_dict.items()])
    sql = f"ALTER TABLE {catalog_name}.{schema_name}.{table_name} SET TAGS ({tag_string})"
    
    try:
        spark.sql(sql)
        logger.info(f"Successfully tagged table {catalog_name}.{schema_name}.{table_name}")
        return True
    except Exception as e:
        logger.error(f"Failed to tag table: {e}")
        return False

In [0]:
# Budget policy functions

def create_budget_policy(category_name, monthly_limit, tags, token):
    """Create a budget policy for an operational category."""
    url = f"{API_BASE_URL}/budgets/policies"
    headers = create_databricks_headers(token)
    
    policy_config = {
        "name": f"{category_name.lower()}_budget_policy",
        "description": f"Budget policy for {category_name.replace('_', ' ').title()}",
        "filter": {
            "tag": {
                "key": "OperationalCategory",
                "value": category_name
            }
        },
        "limit": {
            "amount": monthly_limit,
            "currency": "USD",
            "period": "MONTH"
        },
        "enabled": True
    }
    
    response = requests.post(url, headers=headers, json=policy_config)
    
    if response.status_code == 200:
        policy_id = response.json()["policy_id"]
        logger.info(f"Successfully created budget policy for {category_name}: {policy_id}")
        return policy_id
    else:
        logger.error(f"Failed to create budget policy: {response.text}")
        return None

def add_budget_alert(policy_id, email_list, threshold_percent, token):
    """Add an alert to a budget policy."""
    url = f"{API_BASE_URL}/budgets/alerts"
    headers = create_databricks_headers(token)
    
    alert_config = {
        "policy_id": policy_id,
        "threshold_percent": threshold_percent,
        "email_addresses": email_list,
        "description": f"Alert at {threshold_percent}% budget threshold"
    }
    
    response = requests.post(url, headers=headers, json=alert_config)
    
    if response.status_code == 200:
        alert_id = response.json()["alert_id"]
        logger.info(f"Successfully created budget alert for policy {policy_id} at {threshold_percent}%")
        return alert_id
    else:
        logger.error(f"Failed to create budget alert: {response.text}")
        return None

In [0]:
# Cluster policy enforcement

def create_cluster_policy(category_name, instance_type_limits, max_workers, token):
    """Create a cluster policy for an operational category."""
    url = f"{API_BASE_URL}/policies/clusters/create"
    headers = create_databricks_headers(token)
    
    policy_config = {
        "name": f"{category_name.lower()}_cluster_policy",
        "description": f"Cluster policy for {category_name.replace('_', ' ').title()}",
        "definition": json.dumps({
            "autotermination_minutes": {
                "type": "fixed",
                "value": 30
            },
            "custom_tags.OperationalCategory": {
                "type": "fixed",
                "value": category_name
            },
            "custom_tags.Team": {
                "type": "fixed",
                "value": DIVISION
            },
            "custom_tags.CostCenter": {
                "type": "fixed",
                "value": COST_CENTER
            },
            "node_type_id": {
                "type": "allowlist",
                "values": instance_type_limits
            },
            "num_workers": {
                "type": "range",
                "maxValue": max_workers
            }
        })
    }
    
    response = requests.post(url, headers=headers, json=policy_config)
    
    if response.status_code == 200:
        policy_id = response.json()["policy_id"]
        logger.info(f"Successfully created cluster policy for {category_name}: {policy_id}")
        return policy_id
    else:
        logger.error(f"Failed to create cluster policy: {response.text}")
        return None

In [0]:
# Define instance type and worker limits per category

CATEGORY_LIMITS = {
    "DEEP_SPACE_TELEMETRY": {
        "instances": ["i3.xlarge", "i3.2xlarge", "m5.xlarge", "m5.2xlarge"],
        "max_workers": 10
    },
    "PROPULSION_ANALYTICS": {
        "instances": ["r5.xlarge", "r5.2xlarge", "r5.4xlarge"],
        "max_workers": 12
    },
    "ORBITAL_MECHANICS": {
        "instances": ["m5.xlarge", "m5.2xlarge", "c5.2xlarge"],
        "max_workers": 8
    },
    "MATERIALS_SCIENCE": {
        "instances": ["c5.xlarge", "c5.2xlarge", "c5.4xlarge"],
        "max_workers": 8
    },
    "EXPLORATORY_MISSIONS": {
        "instances": ["m5.xlarge", "m5.2xlarge", "m5.4xlarge", "r5.2xlarge"],
        "max_workers": 16
    },
    "NAVIGATION_SYSTEMS": {
        "instances": ["m5.xlarge", "c5.xlarge", "c5.2xlarge"],
        "max_workers": 6
    },
    "EXOPLANET_RESEARCH": {
        "instances": ["r5.xlarge", "r5.2xlarge", "i3.2xlarge"],
        "max_workers": 10
    }
}

In [0]:
# Execution: Create budget policies for all operational categories

def setup_all_budget_policies():
    """Set up budget policies for all operational categories."""
    
    results = {}
    
    for category, monthly_budget in OPERATIONAL_CATEGORIES.items():
        logger.info(f"\n{'='*20} Setting up {category} {'='*20}")
        
        # Create tags for the category
        tags = {
            "OperationalCategory": category,
            "Team": DIVISION,
            "CostCenter": COST_CENTER,
            "Environment": "PROD"
        }
        
        # Create budget policy
        policy_id = create_budget_policy(category, monthly_budget, tags, TOKEN)
        
        if policy_id:
            # Add alerts for each threshold
            alert_ids = []
            for threshold in ALERT_THRESHOLDS:
                alert_id = add_budget_alert(policy_id, ADMIN_EMAILS, threshold, TOKEN)
                if alert_id:
                    alert_ids.append(alert_id)
            
            # Create cluster policy
            limits = CATEGORY_LIMITS[category]
            cluster_policy_id = create_cluster_policy(
                category, 
                limits["instances"], 
                limits["max_workers"], 
                TOKEN
            )
            
            results[category] = {
                "policy_id": policy_id,
                "alert_ids": alert_ids,
                "cluster_policy_id": cluster_policy_id
            }
        else:
            results[category] = {"error": "Failed to create budget policy"}
    
    return results

# Execute the setup
setup_results = setup_all_budget_policies()

# Display results
print("\n" + "="*50)
print("Budget Policy Setup Results")
print("="*50)
for category, result in setup_results.items():
    print(f"\n{category}:")
    print(f"  Budget: ${OPERATIONAL_CATEGORIES[category]:,}")
    print(f"  Policy ID: {result.get('policy_id', 'N/A')}")
    print(f"  Alert IDs: {result.get('alert_ids', [])}")
    print(f"  Cluster Policy ID: {result.get('cluster_policy_id', 'N/A')}")

In [0]:
# Validate deployment

def validate_policies():
    """Validate that all policies are properly deployed."""
    print("\n" + "="*50)
    print("Policy Validation Report")
    print("="*50)
    
    # Check for existing policies
    url = f"{API_BASE_URL}/budgets/policies"
    headers = create_databricks_headers(TOKEN)
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        policies = response.json().get("policies", [])
        
        for category in OPERATIONAL_CATEGORIES.keys():
            policy_name = f"{category.lower()}_budget_policy"
            policy_exists = any(p["name"] == policy_name for p in policies)
            
            status = "✓" if policy_exists else "✗"
            print(f"{status} {category}: {'Policy deployed' if policy_exists else 'Policy missing'}")
    else:
        print(f"Error validating policies: {response.text}")

# Run validation
validate_policies()

## Next Steps

The budget policies have been successfully created for all operational categories. Next steps:

1. Deploy the category-specific budget monitoring notebooks
2. Configure the master budget dashboard
3. Schedule regular budget reports
4. Train team members on budget compliance

Remember to regularly review and adjust budget allocations based on actual usage patterns.