In [0]:
!pip install dotenv

In [0]:
# Consolidated Budget Monitoring Dashboard

# Import necessary libraries (cell 1)
import os
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, HTML, Markdown
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Configuration parameters
ACCOUNT_HOST = os.getenv("ACCOUNT_HOST", "https://accounts.azuredatabricks.net")
ACCOUNT_ID = os.getenv("ACCOUNT_ID", "")
CLIENT_ID = os.getenv("CLIENT_ID", "")
CLIENT_SECRET = os.getenv("CLIENT_SECRET", "")
TOKEN = os.getenv("TOKEN")  # PAT token
DATABRICKS_INSTANCE = os.getenv("DATABRICKS_INSTANCE", "")

# Authentication setup (cell 2)
def get_oauth_token():
    """Get OAuth token for account-level APIs"""
    token_url = f"{ACCOUNT_HOST}/oidc/accounts/{ACCOUNT_ID}/v1/token"
    
    response = requests.post(
        token_url,
        auth=(CLIENT_ID, CLIENT_SECRET),
        headers={"Content-Type": "application/x-www-form-urlencoded"},
        data="grant_type=client_credentials&scope=all-apis"
    )
    
    if response.status_code == 200:
        token_data = response.json()
        return token_data["access_token"]
    else:
        print(f"Error getting token: {response.status_code}")
        print(response.text)
        return None

# Setup account token
ACCOUNT_TOKEN = get_oauth_token()
if ACCOUNT_TOKEN:
    print(f"✅ Account authentication successful!")
else:
    print("❌ Account authentication failed")

# Budget API functions (cell 3)
def get_budget_policies():
    """Fetches available budget policies from the account"""
    url = f"{ACCOUNT_HOST}/api/2.1/accounts/{ACCOUNT_ID}/budget-policies"
    
    headers = {
        "Authorization": f"Bearer {ACCOUNT_TOKEN}",
        "Content-Type": "application/json"
    }
    
    try:
        response = requests.get(url, headers=headers)
        
        if response.status_code == 200:
            result = response.json()
            policies = result.get("policies", [])
            return [("All Policies", "all")] + [(p.get("policy_name"), p.get("policy_id")) for p in policies]
        else:
            print(f"Error fetching budget policies: {response.status_code}")
            return [("All Policies", "all")]
    except Exception as e:
        print(f"Exception fetching budget policies: {e}")
        return [("All Policies", "all")]

def get_policy_tags(policy_id):
    """Gets the tags for a specific budget policy"""
    if policy_id == "all":
        return []
        
    url = f"{ACCOUNT_HOST}/api/2.1/accounts/{ACCOUNT_ID}/budget-policies/{policy_id}"
    
    headers = {
        "Authorization": f"Bearer {ACCOUNT_TOKEN}",
        "Content-Type": "application/json"
    }
    
    try:
        response = requests.get(url, headers=headers)
        
        if response.status_code == 200:
            policy = response.json()
            return policy.get("custom_tags", [])
        else:
            print(f"Error fetching policy details: {response.status_code}")
            return []
    except Exception as e:
        print(f"Exception fetching policy details: {e}")
        return []

# Monitoring query functions (cell 4)
def get_overall_spending_query(days, tag_key=None, tag_value=None):
    """Generate a query to get overall spending"""
    base_query = """
    SELECT
      billing_origin_product,
      SUM(usage_quantity) as total_dbu,
      usage_date
    FROM
      system.billing.usage
    WHERE
      usage_date >= CURRENT_DATE() - INTERVAL {days} DAY
    """
    
    if tag_key and tag_value:
        base_query += f"\n  AND custom_tags['{tag_key}'] = '{tag_value}'"
    
    base_query += """
    GROUP BY
      billing_origin_product, usage_date
    ORDER BY
      usage_date DESC, total_dbu DESC
    """
    
    return base_query.format(days=days)

def get_tag_based_spending_query(days, tag_key=None):
    """Generate a query to analyze spending by tag"""
    if tag_key:
        query = f"""
        SELECT
          custom_tags['{tag_key}'] as tag_value,
          SUM(usage_quantity) as total_dbu
        FROM
          system.billing.usage
        WHERE
          usage_date >= CURRENT_DATE() - INTERVAL {days} DAY
          AND custom_tags['{tag_key}'] IS NOT NULL
        GROUP BY
          custom_tags['{tag_key}']
        ORDER BY
          total_dbu DESC
        """
    else:
        query = f"""
        SELECT
          'Various Tags' as category,
          SUM(usage_quantity) as total_dbu
        FROM
          system.billing.usage
        WHERE
          usage_date >= CURRENT_DATE() - INTERVAL {days} DAY
          AND custom_tags IS NOT NULL
        """
    
    return query

def get_cost_estimation_query(days, tag_key=None, tag_value=None):
    """Generate a query to estimate costs"""
    base_query = """
    SELECT
      billing_origin_product,
      SUM(usage.usage_quantity * list_prices.pricing.default) as estimated_cost
    FROM
      system.billing.usage usage
    JOIN
      system.billing.list_prices list_prices ON
        usage.sku_name = list_prices.sku_name AND
        usage.usage_start_time >= list_prices.price_start_time AND
        (usage.usage_end_time <= list_prices.price_end_time OR list_prices.price_end_time IS NULL)
    WHERE
      usage.usage_date >= CURRENT_DATE() - INTERVAL {days} DAY
    """
    
    if tag_key and tag_value:
        base_query += f"\n  AND usage.custom_tags['{tag_key}'] = '{tag_value}'"
    
    base_query += """
    GROUP BY
      billing_origin_product
    ORDER BY
      estimated_cost DESC
    """
    
    return base_query.format(days=days)

# Visualization functions (cell 5)
def visualize_spending_by_product(days, tag_key=None, tag_value=None):
    """Creates a visualization of spending by product"""
    query = get_overall_spending_query(days, tag_key, tag_value)
    
    try:
        df = spark.sql(query)
        pdf = df.toPandas()
        
        if not pdf.empty:
            # Product summary
            product_summary = pdf.groupby('billing_origin_product')['total_dbu'].sum().reset_index()
            product_summary = product_summary.sort_values('total_dbu', ascending=False)
            
            plt.figure(figsize=(10, 6))
            plt.bar(product_summary['billing_origin_product'], product_summary['total_dbu'])
            plt.title(f'DBU Usage by Product (Last {days} Days)')
            plt.xlabel('Product')
            plt.ylabel('DBUs')
            plt.xticks(rotation=45, ha='right')
            plt.tight_layout()
            display(plt.gcf())
            plt.close()
            
            # Date trend analysis
            date_summary = pdf.groupby('usage_date')['total_dbu'].sum().reset_index()
            date_summary = date_summary.sort_values('usage_date')
            
            plt.figure(figsize=(10, 6))
            plt.plot(date_summary['usage_date'], date_summary['total_dbu'], marker='o')
            plt.title(f'Daily DBU Usage Trend (Last {days} Days)')
            plt.xlabel('Date')
            plt.ylabel('DBUs')
            plt.grid(True)
            plt.xticks(rotation=45, ha='right')
            plt.tight_layout()
            display(plt.gcf())
            plt.close()
            
            display(HTML(product_summary.to_html(index=False)))
        else:
            print("No data available for visualization.")
    
    except Exception as e:
        print(f"Error visualizing data: {e}")

def analyze_spending_by_tag(days, tag_key=None):
    """Analyzes and visualizes spending by tags"""
    query = get_tag_based_spending_query(days, tag_key)
    
    try:
        df = spark.sql(query)
        pdf = df.toPandas()
        
        if not pdf.empty:
            plt.figure(figsize=(10, 6))
            
            if tag_key:
                plt.bar(pdf['tag_value'], pdf['total_dbu'])
                plt.title(f'DBU Usage by Tag Value for "{tag_key}" (Last {days} Days)')
                plt.xlabel(f'Tag Value for {tag_key}')
            else:
                plt.bar(pdf['category'], pdf['total_dbu'])
                plt.title(f'DBU Usage for Tagged Resources (Last {days} Days)')
                plt.xlabel('Category')
            
            plt.ylabel('DBUs')
            plt.xticks(rotation=45, ha='right')
            plt.tight_layout()
            display(plt.gcf())
            plt.close()
            
            display(HTML(pdf.to_html(index=False)))
        else:
            print("No tag data available for analysis.")
    
    except Exception as e:
        print(f"Error analyzing tag data: {e}")

def estimate_costs(days, tag_key=None, tag_value=None):
    """Estimates costs based on usage and list prices"""
    query = get_cost_estimation_query(days, tag_key, tag_value)
    
    try:
        df = spark.sql(query)
        pdf = df.toPandas()
        
        if not pdf.empty:
            # Add a total row
            total_cost = pdf['estimated_cost'].sum()
            new_row = {'billing_origin_product': 'TOTAL', 'estimated_cost': total_cost}
            pdf = pd.concat([pdf, pd.DataFrame([new_row])], ignore_index=True)
            
            # Format as currency
            pdf['estimated_cost'] = pdf['estimated_cost'].map('${:,.2f}'.format)
            
            display(HTML(pdf.to_html(index=False)))
            
            # Visualize (excluding the total row)
            viz_df = pdf[:-1].copy()
            viz_df['estimated_cost'] = viz_df['estimated_cost'].str.replace('$', '', regex=False).str.replace(',', '', regex=False).astype(float)
            
            plt.figure(figsize=(10, 6))
            plt.bar(viz_df['billing_origin_product'], viz_df['estimated_cost'])
            plt.title(f'Estimated Cost by Product (Last {days} Days)')
            plt.xlabel('Product')
            plt.ylabel('Estimated Cost ($)')
            plt.xticks(rotation=45, ha='right')
            plt.tight_layout()
            display(plt.gcf())
            plt.close()
        else:
            print("No cost data available for analysis.")
    
    except Exception as e:
        print(f"Error estimating costs: {e}")

# UI Components (cell 6)
# Create budget policy selection dropdown
budget_policy_dropdown = widgets.Dropdown(
    options=get_budget_policies(),
    value="all",
    description='Budget Policy:',
    style={'description_width': 'initial'}
)

# Tag display area
tag_output = widgets.Output()

# Tag selection widgets
tag_key_dropdown = widgets.Dropdown(
    options=[("-- Select --", "")],
    value="",
    description='Tag Key:',
    style={'description_width': 'initial'}
)

tag_value_dropdown = widgets.Dropdown(
    options=[("-- Select --", "")],
    value="",
    description='Tag Value:',
    style={'description_width': 'initial'}
)

# Analysis configuration widgets
days_input = widgets.IntSlider(
    value=30,
    min=1,
    max=180,
    step=1,
    description='Days to analyze:',
    style={'description_width': 'initial'}
)

analysis_type = widgets.Dropdown(
    options=[
        'Overall Spending Analysis',
        'Tag-Based Analysis',
        'Cost Estimation'
    ],
    value='Overall Spending Analysis',
    description='Analysis type:',
    style={'description_width': 'initial'}
)

run_button = widgets.Button(
    description='Run Analysis',
    button_style='success',
    icon='chart-line'
)

# Results area
output_area = widgets.Output()

# UI Event handlers (cell 7)
def on_policy_change(change):
    """Updates tag options when a policy is selected"""
    with tag_output:
        tag_output.clear_output()
        
        if change['new'] == "all":
            display(Markdown("No specific policy selected. Please enter any tag filters manually below."))
            return
        
        tags = get_policy_tags(change['new'])
        if not tags:
            display(Markdown("No tags found for this policy."))
            return
        
        # Display tags table
        tag_df = pd.DataFrame(tags)
        
        # Fix: Convert options to dict safely
        options_dict = dict(budget_policy_dropdown.options)
        policy_name = options_dict.get(change['new'], "Selected Policy")
        
        display(Markdown(f"### Tags for {policy_name}"))
        display(HTML(tag_df.to_html(index=False)))
        
        # Update tag key options
        tag_key_dropdown.options = [("-- Select --", "")] + [(tag['key'], tag['key']) for tag in tags]
        tag_key_dropdown.value = ""
        tag_value_dropdown.options = [("-- Select --", "")]

def on_tag_key_change(change):
    """Updates tag value options when a key is selected"""
    if change['new'] == "":
        tag_value_dropdown.options = [("-- Select --", "")]
        return
        
    # Get the policy ID
    policy_id = budget_policy_dropdown.value
    if policy_id == 'all':
        return
        
    # Find values for the selected key
    tags = get_policy_tags(policy_id)
    for tag in tags:
        if tag['key'] == change['new']:
            tag_value_dropdown.options = [("-- Select --", "")] + [(tag['value'], tag['value'])]
            break

def run_analysis(b):
    """Main function to run the selected analysis"""
    with output_area:
        output_area.clear_output()
        
        # Get widget values
        days = days_input.value
        selected_analysis = analysis_type.value
        tag_key = tag_key_dropdown.value
        tag_value = tag_value_dropdown.value
        policy_id = budget_policy_dropdown.value
        
        print(f"Running analysis for the last {days} days")
        
        if policy_id != "all":
            # Fix: Convert options to dict safely and use get() with a default
            options_dict = dict(budget_policy_dropdown.options)
            policy_name = options_dict.get(policy_id, "Selected Policy")
            print(f"Budget policy: {policy_name}")
        
        if tag_key and tag_value:
            print(f"Filtering by tag: {tag_key} = {tag_value}")
        
        # Run the selected analysis
        if selected_analysis == 'Overall Spending Analysis':
            display(Markdown("## Overall Spending Analysis"))
            visualize_spending_by_product(days, tag_key, tag_value)
            
        elif selected_analysis == 'Tag-Based Analysis':
            display(Markdown("## Tag-Based Analysis"))
            analyze_spending_by_tag(days, tag_key)
            
        elif selected_analysis == 'Cost Estimation':
            display(Markdown("## Cost Estimation"))
            estimate_costs(days, tag_key, tag_value)

# Connect event handlers
budget_policy_dropdown.observe(on_policy_change, names='value')
tag_key_dropdown.observe(on_tag_key_change, names='value')
run_button.on_click(run_analysis)

# Display the UI (cell 8)
display(Markdown("# Budget Monitoring Dashboard"))
display(Markdown("## 1. Select Budget Policy"))
display(budget_policy_dropdown)
display(tag_output)

display(Markdown("## 2. Configure Analysis"))
display(tag_key_dropdown)
display(tag_value_dropdown)
display(days_input)
display(analysis_type)
display(run_button)

display(Markdown("## 3. Results"))
display(output_area)