# Cost Monitoring and Scaling Analysis

This notebook demonstrates cost monitoring and scaling analysis for the healthcare ML demo.

## Features
- Monitor OpenShift resource usage
- Track KEDA scaling events
- Analyze cost attribution
- Visualize scaling patterns
- Generate cost reports

In [None]:
# Import libraries
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import time
import json
import os
from IPython.display import clear_output, display, HTML
import warnings
warnings.filterwarnings('ignore')

# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("viridis")

print("📊 Cost Monitoring and Scaling Analysis")
print("🔍 Monitoring OpenShift resources and KEDA scaling")
print("💰 Tracking cost attribution and resource usage")

## 1. Resource Monitoring Functions

In [None]:
def get_pod_metrics():
    """Simulate getting pod metrics (in real environment, would use OpenShift API)"""
    # Simulate pod metrics data
    pods = [
        'quarkus-websocket-service',
        'vep-service', 
        'genetic-analysis-workbench',
        'genetic-data-cluster-kafka',
        'genetic-data-cluster-zookeeper'
    ]
    
    metrics = []
    current_time = datetime.now()
    
    for pod in pods:
        # Simulate varying resource usage
        cpu_usage = np.random.uniform(0.1, 2.0)  # CPU cores
        memory_usage = np.random.uniform(100, 1000)  # MB
        
        # VEP service might have higher usage during processing
        if pod == 'vep-service':
            cpu_usage *= np.random.uniform(1.5, 3.0)
            memory_usage *= np.random.uniform(1.2, 2.0)
        
        metrics.append({
            'pod_name': pod,
            'timestamp': current_time,
            'cpu_cores': round(cpu_usage, 3),
            'memory_mb': round(memory_usage, 1),
            'status': 'Running'
        })
    
    return metrics

def get_scaling_events():
    """Simulate KEDA scaling events"""
    events = []
    base_time = datetime.now() - timedelta(hours=2)
    
    # Simulate some scaling events
    for i in range(5):
        event_time = base_time + timedelta(minutes=i*20 + np.random.randint(0, 10))
        
        events.append({
            'timestamp': event_time,
            'scaler': 'kafka-scaler-vep',
            'action': 'scale_up' if i % 2 == 0 else 'scale_down',
            'from_replicas': np.random.randint(1, 3),
            'to_replicas': np.random.randint(2, 5),
            'trigger_metric': 'kafka_consumer_lag',
            'metric_value': np.random.randint(10, 100)
        })
    
    return events

def calculate_cost_attribution(metrics):
    """Calculate cost attribution based on resource usage"""
    # Cost rates (example rates in USD per hour)
    CPU_COST_PER_CORE_HOUR = 0.05
    MEMORY_COST_PER_GB_HOUR = 0.01
    
    cost_data = []
    
    for metric in metrics:
        cpu_cost = metric['cpu_cores'] * CPU_COST_PER_CORE_HOUR
        memory_cost = (metric['memory_mb'] / 1024) * MEMORY_COST_PER_GB_HOUR
        total_cost = cpu_cost + memory_cost
        
        cost_data.append({
            'pod_name': metric['pod_name'],
            'timestamp': metric['timestamp'],
            'cpu_cost_per_hour': round(cpu_cost, 4),
            'memory_cost_per_hour': round(memory_cost, 4),
            'total_cost_per_hour': round(total_cost, 4)
        })
    
    return cost_data

# Get current metrics
current_metrics = get_pod_metrics()
scaling_events = get_scaling_events()
cost_data = calculate_cost_attribution(current_metrics)

print("✅ Resource monitoring functions ready")
print(f"📊 Current pods monitored: {len(current_metrics)}")
print(f"📈 Scaling events: {len(scaling_events)}")

## 2. Real-time Resource Dashboard

In [None]:
# Create resource usage dashboard
df_metrics = pd.DataFrame(current_metrics)
df_costs = pd.DataFrame(cost_data)

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# CPU Usage by Pod
axes[0,0].bar(df_metrics['pod_name'], df_metrics['cpu_cores'], color='skyblue')
axes[0,0].set_title('Current CPU Usage by Pod')
axes[0,0].set_ylabel('CPU Cores')
axes[0,0].tick_params(axis='x', rotation=45)

# Memory Usage by Pod
axes[0,1].bar(df_metrics['pod_name'], df_metrics['memory_mb'], color='lightgreen')
axes[0,1].set_title('Current Memory Usage by Pod')
axes[0,1].set_ylabel('Memory (MB)')
axes[0,1].tick_params(axis='x', rotation=45)

# Cost per Hour by Pod
axes[1,0].bar(df_costs['pod_name'], df_costs['total_cost_per_hour'], color='salmon')
axes[1,0].set_title('Cost per Hour by Pod')
axes[1,0].set_ylabel('Cost (USD/hour)')
axes[1,0].tick_params(axis='x', rotation=45)

# Cost Breakdown
total_cpu_cost = df_costs['cpu_cost_per_hour'].sum()
total_memory_cost = df_costs['memory_cost_per_hour'].sum()
axes[1,1].pie([total_cpu_cost, total_memory_cost], 
              labels=['CPU Cost', 'Memory Cost'],
              autopct='%1.1f%%', colors=['orange', 'lightblue'])
axes[1,1].set_title('Cost Breakdown')

plt.tight_layout()
plt.show()

# Summary statistics
print("\n💰 Cost Summary:")
print(f"   Total Cost per Hour: ${df_costs['total_cost_per_hour'].sum():.4f}")
print(f"   Daily Cost Estimate: ${df_costs['total_cost_per_hour'].sum() * 24:.2f}")
print(f"   Monthly Cost Estimate: ${df_costs['total_cost_per_hour'].sum() * 24 * 30:.2f}")

print("\n📊 Resource Summary:")
print(f"   Total CPU Usage: {df_metrics['cpu_cores'].sum():.2f} cores")
print(f"   Total Memory Usage: {df_metrics['memory_mb'].sum():.1f} MB")
print(f"   Average CPU per Pod: {df_metrics['cpu_cores'].mean():.2f} cores")
print(f"   Average Memory per Pod: {df_metrics['memory_mb'].mean():.1f} MB")

## 3. Scaling Events Analysis

In [None]:
# Analyze scaling events
df_scaling = pd.DataFrame(scaling_events)

if not df_scaling.empty:
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    
    # Scaling timeline
    colors = ['green' if action == 'scale_up' else 'red' for action in df_scaling['action']]
    axes[0].scatter(df_scaling['timestamp'], df_scaling['to_replicas'], 
                   c=colors, s=100, alpha=0.7)
    axes[0].set_title('Scaling Events Timeline')
    axes[0].set_xlabel('Time')
    axes[0].set_ylabel('Replica Count')
    axes[0].tick_params(axis='x', rotation=45)
    
    # Scaling actions distribution
    action_counts = df_scaling['action'].value_counts()
    axes[1].pie(action_counts.values, labels=action_counts.index, 
               autopct='%1.1f%%', colors=['lightgreen', 'lightcoral'])
    axes[1].set_title('Scaling Actions Distribution')
    
    plt.tight_layout()
    plt.show()
    
    print("\n📈 Scaling Analysis:")
    print(f"   Total Scaling Events: {len(df_scaling)}")
    print(f"   Scale Up Events: {len(df_scaling[df_scaling['action'] == 'scale_up'])}")
    print(f"   Scale Down Events: {len(df_scaling[df_scaling['action'] == 'scale_down'])}")
    print(f"   Average Metric Value: {df_scaling['metric_value'].mean():.1f}")
    print(f"   Max Replicas: {df_scaling['to_replicas'].max()}")
    print(f"   Min Replicas: {df_scaling['to_replicas'].min()}")
else:
    print("📝 No scaling events to display")

## 4. Live Monitoring Function

In [None]:
def live_monitoring(duration_minutes=5, update_interval_seconds=30):
    """Live monitoring of resources and costs"""
    print(f"🔴 Starting live monitoring for {duration_minutes} minutes...")
    print(f"🔄 Updates every {update_interval_seconds} seconds")
    print("⏹️ Press Ctrl+C to stop\n")
    
    start_time = time.time()
    end_time = start_time + (duration_minutes * 60)
    
    monitoring_data = []
    
    try:
        while time.time() < end_time:
            # Get current metrics
            current_metrics = get_pod_metrics()
            current_costs = calculate_cost_attribution(current_metrics)
            
            # Calculate totals
            total_cpu = sum(m['cpu_cores'] for m in current_metrics)
            total_memory = sum(m['memory_mb'] for m in current_metrics)
            total_cost = sum(c['total_cost_per_hour'] for c in current_costs)
            
            # Store data point
            data_point = {
                'timestamp': datetime.now(),
                'total_cpu': total_cpu,
                'total_memory': total_memory,
                'total_cost_per_hour': total_cost,
                'pod_count': len(current_metrics)
            }
            monitoring_data.append(data_point)
            
            # Clear output and display current status
            clear_output(wait=True)
            
            print(f"🔴 Live Monitoring - {datetime.now().strftime('%H:%M:%S')}")
            print(f"⏱️ Elapsed: {(time.time() - start_time)/60:.1f} minutes")
            print(f"📊 Total CPU: {total_cpu:.2f} cores")
            print(f"💾 Total Memory: {total_memory:.1f} MB")
            print(f"💰 Cost per Hour: ${total_cost:.4f}")
            print(f"🏃 Active Pods: {len(current_metrics)}")
            
            # Show trend if we have multiple data points
            if len(monitoring_data) > 1:
                prev_cost = monitoring_data[-2]['total_cost_per_hour']
                cost_change = total_cost - prev_cost
                trend = "📈" if cost_change > 0 else "📉" if cost_change < 0 else "➡️"
                print(f"📊 Cost Trend: {trend} {cost_change:+.4f}")
            
            print(f"\n⏹️ Press Ctrl+C to stop monitoring")
            
            # Wait for next update
            time.sleep(update_interval_seconds)
            
    except KeyboardInterrupt:
        print("\n⏹️ Monitoring stopped by user")
    
    print(f"\n✅ Monitoring completed. Collected {len(monitoring_data)} data points")
    return monitoring_data

# Start live monitoring (uncomment to run)
# monitoring_data = live_monitoring(duration_minutes=2, update_interval_seconds=10)

print("📊 Live monitoring function ready")
print("💡 Uncomment the line above to start live monitoring")
print("🔍 This will show real-time resource usage and cost changes")

## 5. Cost Report Generation

In [None]:
def generate_cost_report():
    """Generate a comprehensive cost report"""
    print("📋 Generating Healthcare ML Demo Cost Report")
    print("=" * 50)
    
    # Get current data
    metrics = get_pod_metrics()
    costs = calculate_cost_attribution(metrics)
    scaling = get_scaling_events()
    
    df_costs = pd.DataFrame(costs)
    
    # Report sections
    print("\n💰 COST SUMMARY")
    print("-" * 20)
    total_hourly = df_costs['total_cost_per_hour'].sum()
    print(f"Current Hourly Cost: ${total_hourly:.4f}")
    print(f"Daily Estimate: ${total_hourly * 24:.2f}")
    print(f"Weekly Estimate: ${total_hourly * 24 * 7:.2f}")
    print(f"Monthly Estimate: ${total_hourly * 24 * 30:.2f}")
    
    print("\n📊 COST BY COMPONENT")
    print("-" * 25)
    for _, row in df_costs.iterrows():
        print(f"{row['pod_name']:<30} ${row['total_cost_per_hour']:.4f}/hour")
    
    print("\n📈 SCALING IMPACT")
    print("-" * 20)
    if scaling:
        df_scaling = pd.DataFrame(scaling)
        avg_replicas = df_scaling['to_replicas'].mean()
        max_replicas = df_scaling['to_replicas'].max()
        print(f"Average Replicas: {avg_replicas:.1f}")
        print(f"Peak Replicas: {max_replicas}")
        print(f"Scaling Events: {len(scaling)}")
        
        # Estimate scaling cost impact
        base_cost = total_hourly
        peak_cost = base_cost * (max_replicas / avg_replicas)
        print(f"Peak Cost Estimate: ${peak_cost:.4f}/hour")
    else:
        print("No scaling events recorded")
    
    print("\n🎯 COST OPTIMIZATION RECOMMENDATIONS")
    print("-" * 40)
    
    # Find highest cost pod
    highest_cost_pod = df_costs.loc[df_costs['total_cost_per_hour'].idxmax()]
    print(f"• Highest cost component: {highest_cost_pod['pod_name']} (${highest_cost_pod['total_cost_per_hour']:.4f}/hour)")
    
    # Resource efficiency
    df_metrics = pd.DataFrame(metrics)
    avg_cpu = df_metrics['cpu_cores'].mean()
    if avg_cpu < 0.5:
        print("• Consider reducing CPU requests for underutilized pods")
    
    print("• Implement scale-to-zero for development workloads")
    print("• Use spot instances for non-critical batch processing")
    print("• Monitor and adjust KEDA scaling thresholds")
    
    print("\n" + "=" * 50)
    print(f"Report generated at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    return {
        'total_hourly_cost': total_hourly,
        'daily_estimate': total_hourly * 24,
        'monthly_estimate': total_hourly * 24 * 30,
        'component_costs': df_costs.to_dict('records'),
        'scaling_events': len(scaling)
    }

# Generate cost report
cost_report = generate_cost_report()