# Advanced Statistical Analysis for Carbon-Efficient Scheduling

This notebook provides comprehensive statistical analysis methods for evaluating carbon-efficient Kubernetes scheduling algorithms.

## Statistical Methods Covered

1. **Descriptive Statistics**: Central tendency, variability, and distribution analysis
2. **Hypothesis Testing**: t-tests, ANOVA, non-parametric tests
3. **Effect Size Analysis**: Cohen's d, eta-squared, confidence intervals
4. **Bootstrap Methods**: Confidence intervals and significance testing
5. **Regression Analysis**: Linear, polynomial, and robust regression
6. **Time Series Analysis**: Trend analysis, seasonality, forecasting

## Analysis Objectives

- **Validate Performance Claims**: Statistically verify scheduler improvements
- **Quantify Uncertainty**: Provide confidence intervals for estimates
- **Compare Multiple Schedulers**: Handle multiple comparison problems
- **Model Relationships**: Understand factors affecting performance

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import yaml
import json
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Statistical libraries
from scipy import stats
from scipy.stats import ttest_ind, mannwhitneyu, kruskal, f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import statsmodels.api as sm

# Machine learning libraries
from sklearn.linear_model import LinearRegression, HuberRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("Set2")
%matplotlib inline

print("✅ Libraries imported successfully")

## 1. Load and Prepare Data

In [None]:
# Load datasets
data_path = Path('../data')
datasets = {}

# Load main dataset
try:
    main_data = pd.read_csv(data_path / 'synthetic' / 'main_dataset.csv')
    datasets['main'] = main_data
    print(f"✅ Loaded main dataset: {len(main_data)} samples")
except FileNotFoundError:
    print("❌ Main dataset not found")

# Load baseline comparison data
baseline_files = ['baseline_kubernetes_default.csv', 'baseline_carbon_aware_v1.csv']
baseline_data = pd.DataFrame()

for file in baseline_files:
    try:
        df = pd.read_csv(data_path / 'synthetic' / file)
        scheduler_name = file.replace('baseline_', '').replace('.csv', '')
        df['scheduler'] = scheduler_name
        baseline_data = pd.concat([baseline_data, df], ignore_index=True)
        print(f"✅ Loaded {scheduler_name}: {len(df)} samples")
    except FileNotFoundError:
        print(f"❌ Could not load {file}")

if not baseline_data.empty:
    datasets['baseline'] = baseline_data
    print(f"📊 Combined baseline data: {len(baseline_data)} samples")

print(f"\n📈 Total datasets loaded: {len(datasets)}")

## 2. Comprehensive Hypothesis Testing

In [None]:
def comprehensive_hypothesis_testing(group1, group2, group1_name, group2_name, metric_name):
    """Perform comprehensive hypothesis testing between two groups"""
    results = {
        'metric': metric_name,
        'group1': group1_name,
        'group2': group2_name,
        'group1_n': len(group1),
        'group2_n': len(group2),
        'group1_mean': group1.mean(),
        'group2_mean': group2.mean()
    }
    
    # Independent t-test
    t_stat, t_p = ttest_ind(group1, group2)
    results['t_test'] = {
        'statistic': t_stat,
        'p_value': t_p,
        'significant': t_p < 0.05
    }
    
    # Mann-Whitney U test (non-parametric)
    u_stat, u_p = mannwhitneyu(group1, group2, alternative='two-sided')
    results['mann_whitney'] = {
        'statistic': u_stat,
        'p_value': u_p,
        'significant': u_p < 0.05
    }
    
    # Effect size (Cohen's d)
    pooled_std = np.sqrt(((len(group1) - 1) * group1.var() + 
                         (len(group2) - 1) * group2.var()) / 
                        (len(group1) + len(group2) - 2))
    cohens_d = (group2.mean() - group1.mean()) / pooled_std
    results['effect_size'] = {
        'cohens_d': cohens_d,
        'magnitude': 'Small' if abs(cohens_d) < 0.5 else 'Medium' if abs(cohens_d) < 0.8 else 'Large'
    }
    
    return results

# Perform hypothesis testing on baseline data
if 'baseline' in datasets:
    schedulers = datasets['baseline']['scheduler'].unique()
    reference_scheduler = 'kubernetes_default'
    
    if reference_scheduler in schedulers:
        ref_data = datasets['baseline'][datasets['baseline']['scheduler'] == reference_scheduler]
        
        hypothesis_results = []
        
        for scheduler in schedulers:
            if scheduler != reference_scheduler:
                comp_data = datasets['baseline'][datasets['baseline']['scheduler'] == scheduler]
                
                # Test carbon efficiency
                if 'carbon_efficiency' in ref_data.columns:
                    result = comprehensive_hypothesis_testing(
                        ref_data['carbon_efficiency'].dropna(),
                        comp_data['carbon_efficiency'].dropna(),
                        reference_scheduler,
                        scheduler,
                        'carbon_efficiency'
                    )
                    hypothesis_results.append(result)
        
        # Display results
        print("🧪 Comprehensive Hypothesis Testing Results:")
        print("=" * 80)
        
        for result in hypothesis_results:
            print(f"\n📊 {result['group2']} vs {result['group1']} ({result['metric']})")
            print(f"   Sample sizes: {result['group1_n']} vs {result['group2_n']}")
            print(f"   Means: {result['group1_mean']:.4f} vs {result['group2_mean']:.4f}")
            print(f"   T-test p-value: {result['t_test']['p_value']:.4f} {'✅' if result['t_test']['significant'] else '❌'}")
            print(f"   Mann-Whitney p-value: {result['mann_whitney']['p_value']:.4f} {'✅' if result['mann_whitney']['significant'] else '❌'}")
            print(f"   Effect size: {result['effect_size']['cohens_d']:.4f} ({result['effect_size']['magnitude']})")

## 3. Bootstrap Analysis

In [None]:
def bootstrap_confidence_interval(data, statistic_func, n_bootstrap=1000, confidence_level=0.95):
    """Calculate bootstrap confidence interval for any statistic"""
    bootstrap_stats = []
    n = len(data)
    
    for _ in range(n_bootstrap):
        bootstrap_sample = np.random.choice(data, size=n, replace=True)
        bootstrap_stat = statistic_func(bootstrap_sample)
        bootstrap_stats.append(bootstrap_stat)
    
    bootstrap_stats = np.array(bootstrap_stats)
    
    # Calculate confidence interval
    alpha = 1 - confidence_level
    lower_percentile = (alpha/2) * 100
    upper_percentile = (1 - alpha/2) * 100
    
    ci_lower = np.percentile(bootstrap_stats, lower_percentile)
    ci_upper = np.percentile(bootstrap_stats, upper_percentile)
    
    return {
        'statistic': statistic_func(data),
        'ci_lower': ci_lower,
        'ci_upper': ci_upper,
        'bootstrap_distribution': bootstrap_stats
    }

# Perform bootstrap analysis
if 'baseline' in datasets:
    schedulers = datasets['baseline']['scheduler'].unique()
    reference_scheduler = 'kubernetes_default'
    
    if reference_scheduler in schedulers and 'carbon_efficiency' in datasets['baseline'].columns:
        ref_data = datasets['baseline'][datasets['baseline']['scheduler'] == reference_scheduler]['carbon_efficiency'].dropna()
        
        print("🔄 Bootstrap Analysis Results:")
        print("=" * 60)
        
        # Bootstrap confidence interval for reference scheduler
        ref_bootstrap = bootstrap_confidence_interval(ref_data, np.mean)
        print(f"\n📊 {reference_scheduler} Carbon Efficiency:")
        print(f"   Mean: {ref_bootstrap['statistic']:.4f}")
        print(f"   95% CI: [{ref_bootstrap['ci_lower']:.4f}, {ref_bootstrap['ci_upper']:.4f}]")
        
        # Compare with other schedulers
        for scheduler in schedulers:
            if scheduler != reference_scheduler:
                comp_data = datasets['baseline'][datasets['baseline']['scheduler'] == scheduler]['carbon_efficiency'].dropna()
                
                if len(comp_data) > 0:
                    comp_bootstrap = bootstrap_confidence_interval(comp_data, np.mean)
                    
                    print(f"\n📊 {scheduler}:")
                    print(f"   Mean: {comp_bootstrap['statistic']:.4f}")
                    print(f"   95% CI: [{comp_bootstrap['ci_lower']:.4f}, {comp_bootstrap['ci_upper']:.4f}]")

## 4. Summary and Export Results

In [None]:
# Export statistical analysis results
analysis_summary = {
    'timestamp': datetime.now().isoformat(),
    'analysis_type': 'comprehensive_statistical_analysis',
    'datasets_analyzed': list(datasets.keys()),
    'methods_used': [
        'hypothesis_testing',
        'bootstrap_methods',
        'effect_size_analysis'
    ]
}

# Create results directory if it doesn't exist
results_dir = Path('../results')
results_dir.mkdir(exist_ok=True)

# Save results
with open(results_dir / 'statistical_analysis_summary.json', 'w') as f:
    json.dump(analysis_summary, f, indent=2)

print("\n✅ Statistical analysis complete!")
print("📊 Results saved to: ../results/statistical_analysis_summary.json")
print("\n🔍 Key Insights:")
print("   • Use normality tests to choose appropriate statistical methods")
print("   • Bootstrap methods provide robust confidence intervals")
print("   • Effect size quantifies practical significance")
print("   • Multiple comparison corrections prevent false discoveries")