# Task-level mean ± SD for filtered joints and wheels (across trials)

This notebook computes **task-level** statistics across multiple trials **using the trial-as-unit approach**:

1. For each trial and each signal (e.g., `joint_pos_1_f5`), compute the **trial mean** over time.
2. Across trials, compute **mean ± SD of the trial means** (sample SD by default).

**Outputs (saved to `/mnt/data/`):**
- `per_trial_means.csv` — trial means for each signal
- `task_level_mean_sd.csv` — task-level mean, SD, and formatted mean±sd for each signal


In [None]:
import os
import numpy as np
import pandas as pd

# ---- Inputs: four trial CSVs ----
TRIAL_FILES = [
    "1.csv",
    "2.csv",
    "3.csv",
    "4.csv",
    "5.csv",
    "6.csv",
    "7.csv",
    "8.csv",
    "9.csv",
]

# ---- Column naming (filtered signals) ----
JOINT_IDS = [1, 2, 3, 4, 5, 6]  # joints 1..6

JOINT_POS_COLS = [f"joint_pos_{j}_f5" for j in JOINT_IDS]
JOINT_VEL_COLS = [f"joint_vel_{j}_f5" for j in JOINT_IDS]

WHEEL_COLS = [
    "wheel_left_angle_f5",
    "wheel_left_speed_f5",
    "wheel_right_angle_f5",
    "wheel_right_speed_f5",
]

SIGNALS = JOINT_POS_COLS + JOINT_VEL_COLS + WHEEL_COLS

# ---- Stats settings ----
DDOF = 1  # sample SD across trials (recommended when trials are samples of the task)

print("Trials:")
for f in TRIAL_FILES:
    print(" -", f)

print("\nSignals (filtered):")
for s in SIGNALS:
    print(" -", s)


Trials:
 - 1.csv
 - 2.csv
 - 3.csv
 - 4.csv
 - 5.csv
 - 6.csv
 - 7.csv
 - 8.csv
 - 9.csv

Signals (filtered):
 - joint_pos_1_f5
 - joint_pos_2_f5
 - joint_pos_3_f5
 - joint_pos_4_f5
 - joint_pos_5_f5
 - joint_pos_6_f5
 - joint_vel_1_f5
 - joint_vel_2_f5
 - joint_vel_3_f5
 - joint_vel_4_f5
 - joint_vel_5_f5
 - joint_vel_6_f5
 - wheel_left_angle_f5
 - wheel_left_speed_f5
 - wheel_right_angle_f5
 - wheel_right_speed_f5


In [None]:
def _to_numeric_clean(series: pd.Series) -> np.ndarray:
    """Convert to float array and drop NaNs."""
    x = pd.to_numeric(series, errors="coerce").to_numpy(dtype=float)
    return x[~np.isnan(x)]

def compute_trial_means(trial_files, signals):
    """Return DataFrame: rows=trial, cols=signals with trial-mean over time."""
    rows = []
    missing_by_file = {}
    for i, path in enumerate(trial_files, start=1):
        df = pd.read_csv(path)

        row = {"trial": i, "file": os.path.basename(path)}
        missing = []
        for col in signals:
            if col not in df.columns:
                row[col] = np.nan
                missing.append(col)
                continue
            x = _to_numeric_clean(df[col])
            row[col] = float(np.mean(x)) if x.size else np.nan

        if missing:
            missing_by_file[row["file"]] = missing
        rows.append(row)

    out = pd.DataFrame(rows).set_index("trial")
    return out, missing_by_file

per_trial_means, missing = compute_trial_means(TRIAL_FILES, SIGNALS)

print("Per-trial means shape:", per_trial_means.shape)
display(per_trial_means.head())

if missing:
    print("\nMissing columns detected (these will be NaN):")
    for f, cols in missing.items():
        print(f"  {f}: {cols[:8]}{' ...' if len(cols) > 8 else ''}")


Per-trial means shape: (9, 17)


Unnamed: 0_level_0,file,joint_pos_1_f5,joint_pos_2_f5,joint_pos_3_f5,joint_pos_4_f5,joint_pos_5_f5,joint_pos_6_f5,joint_vel_1_f5,joint_vel_2_f5,joint_vel_3_f5,joint_vel_4_f5,joint_vel_5_f5,joint_vel_6_f5,wheel_left_angle_f5,wheel_left_speed_f5,wheel_right_angle_f5,wheel_right_speed_f5
trial,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1,1.csv,0.172717,0.248357,-1.638887,1.178263,-1.757056,-0.020502,0.0,-0.001418,-0.005494,-0.001806,0.002841,0.004093,1.165,0.0,-0.605,0.0
2,2.csv,0.166281,0.257696,-0.65093,1.141917,-1.734059,-0.088844,0.0,-0.000873,-0.003846,-0.001629,0.002711,0.001935,1.165,0.0,-0.605,0.0
3,3.csv,0.12133,0.254995,-1.219629,1.172872,-1.785227,-0.732526,0.0,-0.001348,-0.004456,-0.000945,0.00201,0.002969,1.165,0.0,-0.605,0.0
4,4.csv,0.128745,0.240951,-0.149284,1.274611,-1.576049,-0.736579,0.0,-0.000548,-0.001745,-1.4e-05,0.000661,0.00034,2.193,0.0,3.096,0.0
5,5.csv,0.220904,0.225494,-1.529356,1.324385,-1.879685,1.094348,0.0,-0.000503,-0.000673,-0.000521,0.000517,0.000617,2.193,0.0,3.096,0.0


In [None]:
def task_level_mean_sd(per_trial_means: pd.DataFrame, ddof: int = 1) -> pd.DataFrame:
    """Compute task-level mean and SD across trials for each signal (trial-as-unit)."""
    mean = per_trial_means.mean(axis=0, skipna=True)
    sd = per_trial_means.std(axis=0, ddof=ddof, skipna=True)

    summary = pd.DataFrame({"mean": mean, "sd": sd})
    summary.index.name = "signal"
    summary["mean±sd"] = summary.apply(
        lambda r: f"{r['mean']:.6g} ± {r['sd']:.6g}"
        if pd.notna(r["mean"]) and pd.notna(r["sd"]) else "",
        axis=1
    )
    return summary

task_summary = task_level_mean_sd(per_trial_means[SIGNALS], ddof=DDOF)
display(task_summary)


Unnamed: 0_level_0,mean,sd,mean±sd
signal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
joint_pos_1_f5,0.162426,0.03586,0.162426 ± 0.0358602
joint_pos_2_f5,0.190393,0.069505,0.190393 ± 0.0695047
joint_pos_3_f5,-1.008295,0.592763,-1.00829 ± 0.592763
joint_pos_4_f5,1.191689,0.06666,1.19169 ± 0.0666595
joint_pos_5_f5,-1.843743,0.140611,-1.84374 ± 0.140611
joint_pos_6_f5,0.418759,0.907375,0.418759 ± 0.907375
joint_vel_1_f5,0.0,0.0,0 ± 0
joint_vel_2_f5,-0.00152,0.001078,-0.00152014 ± 0.00107784
joint_vel_3_f5,-0.004125,0.002698,-0.00412539 ± 0.0026976
joint_vel_4_f5,-0.001627,0.001537,-0.00162702 ± 0.00153665


In [None]:
# ---- Save outputs ----
OUT_TRIAL = "per_trial_means.csv"
OUT_TASK  = "task_level_mean_sd.csv"

per_trial_means.to_csv(OUT_TRIAL, index=True)
task_summary.to_csv(OUT_TASK, index=True)

print("Saved:", OUT_TRIAL)
print("Saved:", OUT_TASK)


Saved: per_trial_means.csv
Saved: task_level_mean_sd.csv


## Optional: grouped “mean ± sd” view

A compact table with J1–J6 position/velocity plus wheel position/velocity.


In [None]:
def grouped_view(task_summary: pd.DataFrame) -> pd.DataFrame:
    rows = []
    # Joints positions
    for j in JOINT_IDS:
        col = f"joint_pos_{j}_f5"
        rows.append((f"J{j}_pos", task_summary.loc[col, "mean±sd"] if col in task_summary.index else ""))
    # Joints velocities
    for j in JOINT_IDS:
        col = f"joint_vel_{j}_f5"
        rows.append((f"J{j}_vel", task_summary.loc[col, "mean±sd"] if col in task_summary.index else ""))

    # Wheels
    wheel_names = {
        "wheel_left_angle_f5": "WL_pos",
        "wheel_left_speed_f5": "WL_vel",
        "wheel_right_angle_f5": "WR_pos",
        "wheel_right_speed_f5": "WR_vel",
    }
    for col, name in wheel_names.items():
        rows.append((name, task_summary.loc[col, "mean±sd"] if col in task_summary.index else ""))

    return pd.DataFrame(rows, columns=["signal", "task_mean±sd"]).set_index("signal")

grouped = grouped_view(task_summary)
display(grouped)


Unnamed: 0_level_0,task_mean±sd
signal,Unnamed: 1_level_1
J1_pos,0.162426 ± 0.0358602
J2_pos,0.190393 ± 0.0695047
J3_pos,-1.00829 ± 0.592763
J4_pos,1.19169 ± 0.0666595
J5_pos,-1.84374 ± 0.140611
J6_pos,0.418759 ± 0.907375
J1_vel,0 ± 0
J2_vel,-0.00152014 ± 0.00107784
J3_vel,-0.00412539 ± 0.0026976
J4_vel,-0.00162702 ± 0.00153665
