CELL 1 — Imports

In [None]:
import os
from pathlib import Path
import json
import numpy as np
import pandas as pd


CELL 2 — Konfigurasi path trial yang mau dibandingkan

In [None]:
SUBJECTS = list("ABCDEFGHIJ")
TRIALS = list(range(1, 73))


TRIAL_ROOTS = {
    "Trial_1":  Path(r"E:\0.TA_Teguh\GMM Trial 1"),  
    "Trial_2":  Path(r"E:\0.TA_Teguh\GMM Trial 2"), 
    "Trial_3":  Path(r"E:\0.TA_Teguh\GMM Trial 3"),   
    "Trial_4":  Path(r"E:\0.TA_Teguh\GMM Trial 4"),  
    "Trial8": Path(r"E:\0.TA_Teguh\GMM Trial 8"),  
}

# Nama file yang diharapkan
def head2_path(root: Path, subject: str, trial: int) -> Path:
    return root / "Head 2" / subject / f"Jalan{trial}.csv"

def head3_path(root: Path, subject: str, trial: int) -> Path:
    return root / "Head 3" / subject / f"Jalan{trial}.jsonl"


CELL 3 — Loader Head-2 (frame-level)

In [None]:
def load_head2_one_file(path: Path) -> pd.DataFrame:
    """
    Wajib minimal punya kolom:
    - frame
    - N_roi
    - valid_minpts  (0/1)
    - N_inlier
    - conf          (0..1)
    """
    df = pd.read_csv(path)
    # Normalize kolom yang umum (jaga-jaga variasi nama)
    col_map = {}
    for c in df.columns:
        cl = c.strip().lower()
        if cl == "frame":
            col_map[c] = "frame"
        elif cl in ["n_roi", "nroi", "points_roi", "roi_points"]:
            col_map[c] = "N_roi"
        elif cl in ["valid_minpts", "valid", "valid_frame"]:
            col_map[c] = "valid_minpts"
        elif cl in ["n_inlier", "ninlier", "points_inlier", "inlier_points"]:
            col_map[c] = "N_inlier"
        elif cl in ["conf", "confidence"]:
            col_map[c] = "conf"

    df = df.rename(columns=col_map)

    required = ["frame", "N_roi", "valid_minpts", "N_inlier"]
    missing = [c for c in required if c not in df.columns]
    if missing:
        raise ValueError(f"Missing columns {missing} in {path}")

    df["frame"] = df["frame"].astype(int)
    for c in ["N_roi", "N_inlier"]:
        df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0).astype(int)
    df["valid_minpts"] = pd.to_numeric(df["valid_minpts"], errors="coerce").fillna(0).astype(int)

    if "conf" in df.columns:
        df["conf"] = pd.to_numeric(df["conf"], errors="coerce")
    else:
        df["conf"] = np.nan

    # inlier_ratio (aman walau N_roi=0)
    df["inlier_ratio"] = np.where(df["N_roi"] > 0, df["N_inlier"] / df["N_roi"], np.nan)

    return df.sort_values("frame").reset_index(drop=True)


In [None]:
def sanity_check_trial_roots(max_show=5):
    print("Sanity check TRIAL_ROOTS ...")
    for name, root in TRIAL_ROOTS.items():
        h2 = root / "Head 2"
        h3 = root / "Head 3"
        ok = root.exists() and h2.exists() and h3.exists()
        print(f"- {name:12s} | exists={root.exists()} | Head2={h2.exists()} | Head3={h3.exists()} | OK={ok}")

        # cek contoh 1 file: A/Jalan1
        sample_h2 = head2_path(root, "A", 1)
        sample_h3 = head3_path(root, "A", 1)
        print(f"    sample H2: {sample_h2.exists()}  ({sample_h2})")
        print(f"    sample H3: {sample_h3.exists()}  ({sample_h3})")

sanity_check_trial_roots()


CELL 4 — Loader Head-3 (μ per frame) + centroid jitter

In [None]:
def load_head3_one_file(path: Path) -> pd.DataFrame:
    """
    JSONL, tiap baris minimal punya:
    - frame
    - mu : [x,y,z]
    Optional: Sigma, threshold, dsb
    """
    rows = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            frame = int(obj.get("frame"))
            mu = obj.get("mu", None)
            if mu is None or len(mu) != 3:
                continue
            rows.append({
                "frame": frame,
                "mu_x": float(mu[0]),
                "mu_y": float(mu[1]),
                "mu_z": float(mu[2]),
            })

    df = pd.DataFrame(rows)
    if df.empty:
        return df

    df = df.sort_values("frame").reset_index(drop=True)

    # centroid jitter: delta mu per frame valid (berurutan di df)
    dx = df["mu_x"].diff()
    dy = df["mu_y"].diff()
    dz = df["mu_z"].diff()
    df["dmu"] = np.sqrt(dx*dx + dy*dy + dz*dz)

    return df


CELL 5 — Hitung metrik per file (Trial x Subject x Jalan)

In [None]:
def count_mid_gaps(valid_series: pd.Series) -> int:
    """
    Hitung jumlah frame invalid yang berada DI TENGAH segmen valid.
    Definisi sederhana:
    - cari frame valid pertama dan terakhir
    - hitung invalid di antara keduanya
    """
    idx_valid = np.where(valid_series.values == 1)[0]
    if len(idx_valid) == 0:
        return int(len(valid_series))  # semua invalid dianggap gap besar
    first_v = idx_valid[0]
    last_v  = idx_valid[-1]
    mid = valid_series.values[first_v:last_v+1]
    return int(np.sum(mid == 0))

def evaluate_one_file(trial_name: str, root: Path, subject: str, trial: int) -> dict:
    p2 = head2_path(root, subject, trial)
    p3 = head3_path(root, subject, trial)

    if not p2.exists():
        return {
            "trial_name": trial_name,
            "subject": subject,
            "trial": trial,
            "status": "missing_head2",
        }

    h2 = load_head2_one_file(p2)

    frames_total = int(h2["frame"].nunique())
    frames_valid = int((h2["valid_minpts"] == 1).sum())
    valid_rate = frames_valid / frames_total if frames_total > 0 else 0.0

    mid_gaps = count_mid_gaps(h2["valid_minpts"]) if frames_total > 0 else 0
    mid_gap_ratio = mid_gaps / frames_total if frames_total > 0 else 0.0

    # frames_empty_after: valid_minpts=1 tapi N_inlier=0
    frames_empty_after = int(((h2["valid_minpts"] == 1) & (h2["N_inlier"] == 0)).sum())

    # inlier_ratio stats (hanya frame valid dan N_roi>0)
    mask_ratio = (h2["valid_minpts"] == 1) & (h2["N_roi"] > 0)
    inlier_ratio_med = float(h2.loc[mask_ratio, "inlier_ratio"].median()) if mask_ratio.any() else np.nan
    inlier_ratio_iqr = float(h2.loc[mask_ratio, "inlier_ratio"].quantile(0.75) - h2.loc[mask_ratio, "inlier_ratio"].quantile(0.25)) if mask_ratio.any() else np.nan

    # conf stats (kalau ada)
    conf_med = float(h2.loc[h2["valid_minpts"] == 1, "conf"].median()) if "conf" in h2.columns and (h2["valid_minpts"]==1).any() else np.nan
    conf_iqr = float(h2.loc[h2["valid_minpts"] == 1, "conf"].quantile(0.75) - h2.loc[h2["valid_minpts"] == 1, "conf"].quantile(0.25)) if "conf" in h2.columns and (h2["valid_minpts"]==1).any() else np.nan

    # centroid jitter dari Head-3
    dmu_med = np.nan
    dmu_p95 = np.nan
    if p3.exists():
        h3 = load_head3_one_file(p3)
        if not h3.empty:
            # drop NaN diff di baris pertama
            d = h3["dmu"].dropna()
            if len(d) > 0:
                dmu_med = float(d.median())
                dmu_p95 = float(d.quantile(0.95))

    return {
        "trial_name": trial_name,
        "subject": subject,
        "trial": trial,
        "status": "ok",
        "frames_total": frames_total,
        "frames_valid": frames_valid,
        "valid_rate": valid_rate,
        "mid_gaps": mid_gaps,
        "mid_gap_ratio": mid_gap_ratio,
        "frames_empty_after": frames_empty_after,
        "inlier_ratio_med": inlier_ratio_med,
        "inlier_ratio_iqr": inlier_ratio_iqr,
        "conf_med": conf_med,
        "conf_iqr": conf_iqr,
        "dmu_med": dmu_med,
        "dmu_p95": dmu_p95,
        "head2_path": str(p2),
        "head3_path": str(p3) if p3.exists() else "",
    }


CELL 6 — Jalankan evaluasi untuk SEMUA file (langsung full batch)

In [None]:
def run_evaluation_all() -> pd.DataFrame:
    rows = []
    total = len(TRIAL_ROOTS) * len(SUBJECTS) * len(TRIALS)
    k = 0

    for trial_name, root in TRIAL_ROOTS.items():
        for s in SUBJECTS:
            for t in TRIALS:
                k += 1
                if k % 50 == 0:
                    print(f"Progress {k}/{total} ...")
                rows.append(evaluate_one_file(trial_name, root, s, t))

    return pd.DataFrame(rows)

eval_df = run_evaluation_all()
print("Done. Rows:", len(eval_df))
eval_df.head()


CELL 7 — Buat tabel ringkasan per trial

In [None]:
def summarize_by_trial(eval_df: pd.DataFrame) -> pd.DataFrame:
    ok = eval_df[eval_df["status"] == "ok"].copy()

    # agregasi global per trial
    g = ok.groupby("trial_name", as_index=False).agg(
        files=("trial", "count"),
        valid_rate_mean=("valid_rate", "mean"),
        mid_gap_ratio_mean=("mid_gap_ratio", "mean"),
        empty_after_mean=("frames_empty_after", "mean"),
        inlier_ratio_med_mean=("inlier_ratio_med", "mean"),
        inlier_ratio_iqr_mean=("inlier_ratio_iqr", "mean"),
        dmu_med_mean=("dmu_med", "mean"),
        dmu_p95_mean=("dmu_p95", "mean"),
        conf_med_mean=("conf_med", "mean"),
        conf_iqr_mean=("conf_iqr", "mean"),
    )

    # rapikan
    for c in g.columns:
        if c.endswith("_mean"):
            g[c] = g[c].astype(float)

    return g.sort_values("trial_name").reset_index(drop=True)

trial_summary = summarize_by_trial(eval_df)
trial_summary


CELL 8 — Tabel per subjek 

In [None]:
def summarize_by_subject(eval_df: pd.DataFrame) -> pd.DataFrame:
    ok = eval_df[eval_df["status"] == "ok"].copy()
    g = ok.groupby(["trial_name", "subject"], as_index=False).agg(
        files=("trial", "count"),
        valid_rate_mean=("valid_rate", "mean"),
        mid_gap_ratio_mean=("mid_gap_ratio", "mean"),
        empty_after_mean=("frames_empty_after", "mean"),
        inlier_ratio_med_mean=("inlier_ratio_med", "mean"),
        dmu_med_mean=("dmu_med", "mean"),
        dmu_p95_mean=("dmu_p95", "mean"),
    )
    return g.sort_values(["trial_name", "subject"]).reset_index(drop=True)

subject_summary = summarize_by_subject(eval_df)
subject_summary


CELL 9 — Keputusan sederhana otomatis (ranking)

In [None]:
def rank_trials(trial_summary: pd.DataFrame) -> pd.DataFrame:
    df = trial_summary.copy()

    # Normalisasi sederhana (min-max) agar bisa dijumlahkan
    def minmax(s, invert=False):
        s = s.astype(float)
        lo, hi = np.nanmin(s), np.nanmax(s)
        if hi - lo < 1e-12:
            return np.zeros_like(s)
        x = (s - lo) / (hi - lo)
        return (1 - x) if invert else x

    # mid_gap & dmu harus kecil => invert=True
    score = (
        0.45 * minmax(df["mid_gap_ratio_mean"], invert=True) +
        0.35 * minmax(df["dmu_p95_mean"], invert=True) +
        0.20 * minmax(df["valid_rate_mean"], invert=False)
    )
    df["score"] = score
    return df.sort_values("score", ascending=False).reset_index(drop=True)

ranking = rank_trials(trial_summary)
ranking
