 GMM Trial 1 (Baseline): Hard SNR Gating
 Dataset: TI IWR6843AOP point cloud (x,y,z,doppler,SNR)
 Goal:
   - Head 1: point-level p_torso + component_id (CSV)
   - Head 2: frame-level conf + stats (CSV)
   - Head 3: window-level GMM params (JSONL)



CELL 2 — Imports

In [None]:
import os
import json
import math
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from scipy.stats import chi2

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse

# Optional interactivity (VSCode Jupyter)
try:
    import ipywidgets as widgets
    from IPython.display import display
    HAS_WIDGETS = True
except Exception:
    HAS_WIDGETS = False


CELL 3 — Config (ALL parameters in one place)

In [None]:
# -------------------------
# Paths
# -------------------------
DATASET_ROOT = Path(r"E:\0.TA_Teguh\dataset3")   # contains A..J
OUT_ROOT     = Path(r"E:\0.TA_Teguh\GMM")        # output root

SUBJECTS = list("ABCDEFGHIJ")  # A..J
TRIALS = list(range(1, 73))    # Jalan1..Jalan72

# ===============================
# Trial-2 Configuration (LOCKED)
# ===============================

# ROI (NO Z GATING)
ROI_X = (-2.5, 2.5)
ROI_Y = (0.0, 5.0)

# Frame validity
MIN_POINTS = 5   # baseline (later try 10)

# Gaussian model
GMM_K = 1        # SINGLE Gaussian (K=1)

# Mahalanobis outlier removal
CHI2_LEVEL = 0.99   # baseline
CHI2_DF = 3         # x,y,z

# Refit
USE_REFIT_OFFLINE = True

# Soft SNR weighting (baseline global)
SNR_REF = 10.0      # reference SNR for scaling
SNR_WEIGHT_MIN = 0.1
SNR_WEIGHT_MAX = 1.0


CELL 4 — Output folder preparation

In [None]:
def ensure_dirs():
    # Head folders
    for head_name in ["Head 1", "Head 2", "Head 3"]:
        (OUT_ROOT / head_name).mkdir(parents=True, exist_ok=True)
        (OUT_ROOT / head_name / "_summary").mkdir(parents=True, exist_ok=True)
        for s in SUBJECTS:
            (OUT_ROOT / head_name / s).mkdir(parents=True, exist_ok=True)

ensure_dirs()
print("Output folders ready at:", OUT_ROOT)


CELL 5 — File indexing (build a table of 720 paths)

In [None]:
def build_file_index() -> pd.DataFrame:
    rows = []
    for s in SUBJECTS:
        for t in TRIALS:
            in_path = DATASET_ROOT / s / f"Jalan{t}.csv"
            h1_out = OUT_ROOT / "Head 1" / s / f"Jalan{t}.csv"
            h2_out = OUT_ROOT / "Head 2" / s / f"Jalan{t}.csv"
            h3_out = OUT_ROOT / "Head 3" / s / f"Jalan{t}.jsonl"
            rows.append({
                "subject": s,
                "trial": t,
                "input_path": str(in_path),
                "head1_out": str(h1_out),
                "head2_out": str(h2_out),
                "head3_out": str(h3_out),
            })
    return pd.DataFrame(rows)

index_df = build_file_index()

# Validate missing files
missing = index_df[~index_df["input_path"].apply(lambda p: Path(p).exists())]
print("Total files:", len(index_df))
print("Missing files:", len(missing))
if len(missing) > 0:
    display(missing.head(10))


CELL 6 — Load one CSV (strict schema)

In [None]:
REQUIRED_COLS = ["timestamp", "frame", "x", "y", "z", "doppler", "SNR"]

def load_trial_csv(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    # Validate schema
    for c in REQUIRED_COLS:
        if c not in df.columns:
            raise ValueError(f"Missing column '{c}' in {path}")

    # Type casting
    df["timestamp"] = df["timestamp"].astype(str)
    df["frame"] = df["frame"].astype(int)
    for c in ["x", "y", "z", "doppler", "SNR"]:
        df[c] = df[c].astype(float)

    # Sort
    df = df.sort_values(["frame"]).reset_index(drop=True)
    return df


CELL 7 — Soft SNR Weighting

In [None]:
def compute_snr_weight(snr_values):
    """
    Soft SNR weighting (global, clipped)
    """
    w = snr_values / SNR_REF
    w = np.clip(w, SNR_WEIGHT_MIN, SNR_WEIGHT_MAX)
    return w

def fit_gaussian_k1(X, weights=None):
    if weights is None:
        mu = X.mean(axis=0)
        Sigma = np.cov(X.T)
    else:
        w = np.asarray(weights, dtype=float)
        w = w / (np.sum(w) + 1e-12)

        mu = np.sum(X * w[:, None], axis=0)
        Xc = X - mu
        Sigma = (Xc.T * w) @ Xc

    # Numerical safety
    Sigma += 1e-6 * np.eye(3)
    return mu, Sigma


def mahalanobis_filter(X, mu, Sigma, chi2_level):
    """
    Compute Mahalanobis d² and inlier mask
    """
    invSigma = np.linalg.inv(Sigma)
    diff = X - mu
    d2 = np.einsum("ij,jk,ik->i", diff, invSigma, diff)

    threshold = chi2.ppf(chi2_level, df=CHI2_DF)
    inlier_mask = d2 <= threshold
    return d2, inlier_mask, threshold

Cell 8 - Procces one File

In [None]:
def process_one_file(input_path, head1_out, head2_out, head3_out):

    df = load_trial_csv(input_path)

    head1_rows = []
    head2_rows = []
    head3_rows = []

    points_before = len(df)
    frames = sorted(df["frame"].unique())

    for f in frames:
        df_f = df[df["frame"] == f]
        ts = df_f["timestamp"].iloc[0]

        # ROI XY only
        df_roi = df_f[
            (df_f["x"].between(*ROI_X)) &
            (df_f["y"].between(*ROI_Y))
        ]

        N_raw = len(df_f)
        N_roi = len(df_roi)

        summary = {
            "timestamp": ts,
            "frame": f,
            "N_raw": N_raw,
            "N_roi": N_roi,
            "minpts_ok": int(N_roi >= MIN_POINTS),
            "valid_frame": 0,
            "N_inlier": 0,
            "conf": 0.0,
            "centroid_x": np.nan,
            "centroid_y": np.nan,
            "centroid_z": np.nan,
        }

        if N_roi < MIN_POINTS:
            head2_rows.append(summary)
            continue

        X = df_roi[["x", "y", "z"]].values
        snr = df_roi["SNR"].values
        w = compute_snr_weight(snr)

        mu, Sigma = fit_gaussian_k1(X, w)
        d2, inlier_mask, chi2_thr = mahalanobis_filter(X, mu, Sigma, CHI2_LEVEL)

        df_in = df_roi[inlier_mask].copy()
        df_in["md2"] = d2[inlier_mask]
        df_in["w_snr"] = w[inlier_mask]

        if len(df_in) < MIN_POINTS:
            head2_rows.append(summary)
            continue

        # Optional refit
        if USE_REFIT_OFFLINE:
            mu, Sigma = fit_gaussian_k1(
                df_in[["x","y","z"]].values,
                df_in["w_snr"].values
            )

        # Update summary
        summary.update({
            "valid_frame": 1,
            "N_inlier": len(df_in),
            "conf": len(df_in) / max(N_roi, 1),
            "centroid_x": mu[0],
            "centroid_y": mu[1],
            "centroid_z": mu[2],
        })

        head2_rows.append(summary)

        # Head 1 (Option A)
        head1_rows.append(df_in.assign(frame=f, timestamp=ts))

        # Head 3
        head3_rows.append({
            "timestamp": ts,
            "frame": f,
            "mu": mu.tolist(),
            "Sigma": Sigma.tolist(),
            "chi2_threshold": float(chi2_thr),
            "N_roi": int(N_roi),
            "N_inlier": int(len(df_in)),
        })

    # SAVE
    if head1_rows:
        pd.concat(head1_rows).to_csv(head1_out, index=False)
    else:
        pd.DataFrame().to_csv(head1_out, index=False)

    pd.DataFrame(head2_rows).to_csv(head2_out, index=False)

    with open(head3_out, "w") as f:
        for r in head3_rows:
            f.write(json.dumps(r) + "\n")

    return {
        "input_path": input_path,
        "points_before": points_before,
        "points_after": sum(r["N_inlier"] for r in head2_rows),
        "frames_total": len(frames),
        "frames_invalid": sum(1 for r in head2_rows if r["valid_frame"] == 0),
        "mean_conf": np.mean([r["conf"] for r in head2_rows]),
        "min_conf": np.min([r["conf"] for r in head2_rows]),
    }


CELL 9 — Batch runner (A..J, Jalan1..72) + per-subject summary


In [None]:
def run_batch(index_df: pd.DataFrame, subjects: Optional[List[str]] = None, trials: Optional[List[int]] = None) -> pd.DataFrame:
    if subjects is not None:
        df = index_df[index_df["subject"].isin(subjects)].copy()
    else:
        df = index_df.copy()

    if trials is not None:
        df = df[df["trial"].isin(trials)].copy()

    summaries = []
    total = len(df)
    for i, row in df.reset_index(drop=True).iterrows():
        s = row["subject"]
        t = int(row["trial"])
        print(f"[{i+1}/{total}] Processing {s}/Jalan{t} ...")

        try:
            summ = process_one_file(
                input_path=row["input_path"],
                head1_out=row["head1_out"],
                head2_out=row["head2_out"],
                head3_out=row["head3_out"]
            )
            summ["subject"] = s
            summ["trial"] = t
            summaries.append(summ)
        except Exception as e:
            print("  ERROR:", e)
            summaries.append({
                "subject": s, "trial": t,
                "input_path": row["input_path"],
                "error": str(e)
            })

    return pd.DataFrame(summaries)

In [None]:
#Safety: start with 1 subject, 1 trial to verify
#summaries_df = run_batch(index_df, subjects=["A"], trials=[1])
summaries_df = run_batch(index_df, subjects=list("ABCDEFGHIJ"), trials=list(range(1,73)))
display(summaries_df.head())

In [None]:
def save_subject_summaries(summaries_df: pd.DataFrame):
    # Head 1 summary folder
    h1_sum_dir = OUT_ROOT / "Head 1" / "_summary"
    h2_sum_dir = OUT_ROOT / "Head 2" / "_summary"
    h3_sum_dir = OUT_ROOT / "Head 3" / "_summary"

    for s in SUBJECTS:
        sub = summaries_df[summaries_df["subject"] == s].copy()
        if len(sub) == 0:
            continue

        # Minimal summary CSVs
        cols_common = [
            "subject", "trial",
            "points_before", "points_after",
            "frames_total", "frames_invalid",
            "mean_conf", "min_conf",
            "input_path"
        ]

        sub_out = sub[[c for c in cols_common if c in sub.columns]].copy()

        sub_out.to_csv(h1_sum_dir / f"summary_{s}.csv", index=False)
        sub_out.to_csv(h2_sum_dir / f"summary_{s}.csv", index=False)
        sub_out.to_csv(h3_sum_dir / f"summary_{s}.csv", index=False)

    # Optional global summary
    (OUT_ROOT / "_summary").mkdir(exist_ok=True)
    summaries_df.to_csv(OUT_ROOT / "_summary" / "global_summary.csv", index=False)

