In [None]:
"""
quantify_improvement.py
Measure concrete improvements from clustering
"""

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load data
traj = pd.read_parquet("runs/enhanced_v1_20251119_000135/data/user_trajectories.parquet")
traj_valid = traj[traj["cluster"] >= 0].copy()

print("\n" + "="*70)
print("QUANTIFYING IMPROVEMENT: Baseline vs Enhanced")
print("="*70)

test_users = traj_valid.tail(400)

global_mean_slope = traj_valid["drift_slope"].mean()
baseline_predictions = np.full(len(test_users), global_mean_slope)
baseline_mae = mean_absolute_error(test_users["drift_slope"], baseline_predictions)
baseline_rmse = np.sqrt(mean_squared_error(test_users["drift_slope"], baseline_predictions))

print(f"\n1. BASELINE (Global Mean Predictor):")
print(f"   MAE:  {baseline_mae:.4f}")
print(f"   RMSE: {baseline_rmse:.4f}")

cluster_means = traj_valid.groupby("cluster")["drift_slope"].mean()
enhanced_predictions = test_users["cluster"].map(cluster_means)
enhanced_mae = mean_absolute_error(test_users["drift_slope"], enhanced_predictions)
enhanced_rmse = np.sqrt(mean_squared_error(test_users["drift_slope"], enhanced_predictions))

print(f"\n2. ENHANCED (Cluster-Specific Predictor):")
print(f"   MAE:  {enhanced_mae:.4f}")
print(f"   RMSE: {enhanced_rmse:.4f}")

mae_improvement = (baseline_mae - enhanced_mae) / baseline_mae * 100
rmse_improvement = (baseline_rmse - enhanced_rmse) / baseline_rmse * 100

print(f"\n3. IMPROVEMENT:")
print(f"   MAE reduced by:  {mae_improvement:.2f}%")
print(f"   RMSE reduced by: {rmse_improvement:.2f}%")

if mae_improvement > 5:
    print(f"   ✓ Clustering provides MEANINGFUL improvement!")
elif mae_improvement > 0:
    print(f"   ≈ Clustering provides MODEST improvement")
else:
    print(f"   ✗ Clustering does NOT improve prediction")

print(f"\n4. INTERPRETABILITY GAIN:")
print(f"   Baseline: Single global behavior")
print(f"   Enhanced: {traj_valid['cluster'].nunique()} distinct user archetypes discovered")
print(f"   → Can now personalize interventions per cluster!")

print("\n" + "="*70)


QUANTIFYING IMPROVEMENT: Baseline vs Enhanced

1. BASELINE (Global Mean Predictor):
   MAE:  0.0217
   RMSE: 0.0612

2. ENHANCED (Cluster-Specific Predictor):
   MAE:  0.0195
   RMSE: 0.0534

3. IMPROVEMENT:
   MAE reduced by:  10.27%
   RMSE reduced by: 12.74%
   ✓ Clustering provides MEANINGFUL improvement!

4. INTERPRETABILITY GAIN:
   Baseline: Single global behavior
   Enhanced: 4 distinct user archetypes discovered
   → Can now personalize interventions per cluster!

