In [None]:

import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load metrics
BASELINE_PATH = "runs/baseline_v1_20251110_195108/metrics_baseline.json"  # Update this
ENHANCED_PATH = "runs/enhanced_v1_20251119_000135/metrics_enhanced.json"  # Update this

with open(BASELINE_PATH) as f:
    baseline = json.load(f)

with open(ENHANCED_PATH) as f:
    enhanced = json.load(f)

# Key comparisons
comparisons = []

# Test set performance
comparisons.append({
    "Metric": "Test Set Size",
    "Baseline": baseline.get("test_size", "N/A"),
    "Enhanced": enhanced.get("test_size", "N/A"),
    "Better": "→"
})

comparisons.append({
    "Metric": "MAE (Slope)",
    "Baseline": f"{baseline.get('slope_mae', 0):.4f}",
    "Enhanced": f"{enhanced.get('slope_mae', 0):.4f}",
    "Better": "✓ Enhanced" if enhanced.get('slope_mae', 1) < baseline.get('slope_mae', 1) else "✓ Baseline"
})

comparisons.append({
    "Metric": "High-Drift Users (%)",
    "Baseline": f"{baseline.get('high_drift_users', 0) / baseline.get('test_size', 1) * 100:.1f}%",
    "Enhanced": f"{enhanced.get('high_drift_users', 0) / enhanced.get('test_size', 1) * 100:.1f}%",
    "Better": "→"
})

# New capabilities
comparisons.append({
    "Metric": "Users with Change-Points",
    "Baseline": "N/A (not detected)",
    "Enhanced": f"{enhanced.get('test_users_with_cp', 0)} ({enhanced.get('test_cp_rate', 0)*100:.1f}%)",
    "Better": "✓ NEW"
})

comparisons.append({
    "Metric": "User Clusters Identified",
    "Baseline": "N/A (no clustering)",
    "Enhanced": f"{enhanced.get('n_clusters_used', 0)} distinct groups",
    "Better": "✓ NEW"
})

# Display
df_compare = pd.DataFrame(comparisons)
print("\n" + "="*70)
print("BASELINE vs ENHANCED COMPARISON")
print("="*70)
print(df_compare.to_string(index=False))
print("="*70 + "\n")

# Save
df_compare.to_csv("comparison_table.csv", index=False)
df_compare.to_latex("comparison_table.tex", index=False, escape=False)
print("Saved: comparison_table.csv and comparison_table.tex\n")


BASELINE vs ENHANCED COMPARISON
                  Metric            Baseline          Enhanced     Better
           Test Set Size                 400               400          →
             MAE (Slope)              0.0472            0.0472 ✓ Baseline
    High-Drift Users (%)               37.5%             37.5%          →
Users with Change-Points  N/A (not detected)          0 (0.0%)      ✓ NEW
User Clusters Identified N/A (no clustering) 4 distinct groups      ✓ NEW

Saved: comparison_table.csv and comparison_table.tex

