# Table 2 & Table 3: Aggregation

Per plan Part E: Notebooks ONLY aggregate. No training, no evaluation, no branching.

- Table 2 (Exp I): RMSE = sqrt(mean(error_r^2)) where error_r = estimate_r - truth_r
- Table 3 (Exp II): mean +/- SE where SE = std / sqrt(N)

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

## Load Results

Update the paths below to point to your experiment results.

In [None]:
# Update these paths
EXP1_PATH = "../experiments/exp1_YYYYMMDD/exp1_results.csv"
EXP2_PATH = "../experiments/exp2_YYYYMMDD/exp2_results.csv"

In [None]:
# Load Experiment I results
try:
    exp1_df = pd.read_csv(EXP1_PATH)
    print(f"Exp I: {len(exp1_df)} rows loaded")
    display(exp1_df.head())
except FileNotFoundError:
    print(f"File not found: {EXP1_PATH}")
    exp1_df = None

In [None]:
# Load Experiment II results
try:
    exp2_df = pd.read_csv(EXP2_PATH)
    print(f"Exp II: {len(exp2_df)} rows loaded")
    display(exp2_df.head())
except FileNotFoundError:
    print(f"File not found: {EXP2_PATH}")
    exp2_df = None

## Table 2: Evaluation Accuracy (Exp I)

For each (method, metric): RMSE = sqrt(mean((estimate - truth)^2))

In [None]:
def compute_table2(df: pd.DataFrame) -> pd.DataFrame:
    """Compute Table 2 aggregation."""
    df = df.copy()
    df["error"] = df["estimate"] - df["truth"]
    df["sq_error"] = df["error"] ** 2
    
    # Group by method and metric
    grouped = df.groupby(["method", "metric"]).agg(
        rmse=("sq_error", lambda x: np.sqrt(x.mean())),
        bias=("error", "mean"),
        n_replicates=("replicate_id", "count"),
    ).reset_index()
    
    return grouped

In [None]:
if exp1_df is not None:
    table2 = compute_table2(exp1_df)
    print("\n=== TABLE 2: Evaluation Accuracy ===")
    display(table2.pivot(index="method", columns="metric", values="rmse"))

## Table 3: Training Comparison (Exp II)

For each (train_method, metric): mean +/- SE

In [None]:
def compute_table3(df: pd.DataFrame) -> pd.DataFrame:
    """Compute Table 3 aggregation."""
    grouped = df.groupby(["train_method", "metric"]).agg(
        mean=("value", "mean"),
        std=("value", "std"),
        n_replicates=("replicate_id", "count"),
    ).reset_index()
    
    grouped["se"] = grouped["std"] / np.sqrt(grouped["n_replicates"])
    grouped["formatted"] = grouped.apply(
        lambda r: f"{r['mean']:.4f} +/- {r['se']:.4f}", axis=1
    )
    
    return grouped

In [None]:
if exp2_df is not None:
    table3 = compute_table3(exp2_df)
    print("\n=== TABLE 3: Training Comparison ===")
    display(table3.pivot(index="train_method", columns="metric", values="formatted"))

## Summary Statistics

In [None]:
if exp1_df is not None:
    print("Exp I Summary:")
    print(f"  Methods: {exp1_df['method'].unique().tolist()}")
    print(f"  Metrics: {exp1_df['metric'].unique().tolist()}")
    print(f"  Replicates: {exp1_df['replicate_id'].nunique()}")

if exp2_df is not None:
    print("\nExp II Summary:")
    print(f"  Methods: {exp2_df['train_method'].unique().tolist()}")
    print(f"  Metrics: {exp2_df['metric'].unique().tolist()}")
    print(f"  Replicates: {exp2_df['replicate_id'].nunique()}")