ðŸ§© Cell A â€“ Konfigurasi ekstra untuk torso temporal

In [None]:
# Cell A: Konfigurasi tambahan untuk torso versi TEMPORAL
# Cell 1: Import library & konfigurasi dasar

import os
import glob
import numpy as np
import pandas as pd

# Daftar subjek
SUBJECTS = ["Afi", "Tsamara", "Tsania"]

# Folder input: hasil clustering (DBSCAN)
CLUSTER_BASE_DIR = r"E:\1.Clustering_TA\dataset2\hasil_clustering"

# Folder output baru untuk torso versi temporal
TORSO_TEMP_BASE_DIR = r"E:\0.TA_Teguh\dataset2\hasil_torso_temporal"

# Hyperparameter sederhana untuk skor
W_SIZE      = 0.7   # bobot ukuran cluster
W_DIST      = 0.3   # bobot kedekatan posisi
DIST_NORM   = 1.0   # normalisasi jarak (meter), asumsi perpindahan torso per frame < 1 m

# Reset kalau gap frame terlalu panjang (misal > 5 frame)
GAP_RESET_FRAMES = 3
print("SUBJECTS        :", SUBJECTS)
print("CLUSTER_BASE_DIR:", CLUSTER_BASE_DIR)

print("TORSO_TEMP_BASE_DIR:", TORSO_TEMP_BASE_DIR)
print("W_SIZE, W_DIST, DIST_NORM, GAP_RESET_FRAMES =", W_SIZE, W_DIST, DIST_NORM, GAP_RESET_FRAMES)


In [None]:
# Cell 1b: Kolom yang wajib ada di file clustering
REQUIRED_COLUMNS = [
    "timestamp",
    "frame",
    "x",
    "y",
    "z",
    "doppler",
    "SNR",
    "cluster_id",
]
print("REQUIRED_COLUMNS:", REQUIRED_COLUMNS)


In [None]:
# Cell 1c: Helper untuk listing file clustering per subjek

def list_cluster_files_for_subject(subject_name: str):
    """
    Mengembalikan daftar file clustering untuk 1 subjek, mis:
      E:\\1.Clustering_TA\\dataset2\\hasil_clustering\\Afi\\clustering_Jalan1.csv
      ...
    """
    subj_dir = os.path.join(CLUSTER_BASE_DIR, subject_name)
    pattern = os.path.join(subj_dir, "clustering_*.csv")
    files = sorted(glob.glob(pattern))
    # (opsional) print ringkas
    print(f"[INFO] {subject_name}: ditemukan {len(files)} file clustering")
    return files


ðŸ§© Cell B â€“ Helper: path output torso temporal

In [None]:
# Cell B: Path output untuk torso versi temporal

def build_output_torso_temp_path(subject_name: str, cluster_file_path: str) -> str:
    """
    Dari:
        E:\1.Clustering_TA\dataset2\hasil_clustering\Afi\clustering_Jalan1.csv
    Menjadi:
        E:\0.TA_Teguh\dataset2\hasil_torso_temporal\Afi\torsoT_Jalan1.csv
    """
    fname = os.path.basename(cluster_file_path)  # "clustering_Jalan1.csv"
    if fname.startswith("clustering_"):
        torso_fname = "torsoT_" + fname[len("clustering_"):]
    else:
        torso_fname = "torsoT_" + fname

    subj_out_dir = os.path.join(TORSO_TEMP_BASE_DIR, subject_name)
    os.makedirs(subj_out_dir, exist_ok=True)

    return os.path.join(subj_out_dir, torso_fname)

# Cek contoh
for subj in SUBJECTS:
    files = list_cluster_files_for_subject(subj)
    if files:
        print(subj, "->", build_output_torso_temp_path(subj, files[0]))


ðŸ§© Cell C â€“ Ekstraksi torso versi temporal

In [None]:
# Cell C: Ekstraksi torso versi TEMPORAL (size + distance)

def extract_torso_temporal_for_file(cluster_file_path: str, torso_temp_file_path: str):
    """
    Versi torso extraction dengan MEMPERTIMBANGKAN TEMPORAL CONSISTENCY:
      - Buang noise (cluster_id == -1)
      - Kalau belum ada torso_prev -> pilih cluster terbesar
      - Kalau sudah ada torso_prev & gap frame kecil:
            skor = kombinasi ukuran cluster + kedekatan ke centroid_prev
            pilih cluster dengan skor tertinggi
      - Kalau gap frame terlalu besar (>= GAP_RESET_FRAMES):
            reset: pilih cluster terbesar lagi

    Output:
      - CSV torso-only seperti input (semua titik cluster torso),
      - summary per file.
    """
    print(f"\n=== [TEMPORAL] Proses file clustering: {cluster_file_path}")
    df = pd.read_csv(cluster_file_path)

    missing = [c for c in REQUIRED_COLUMNS if c not in df.columns]
    if missing:
        raise ValueError(f"File {cluster_file_path} tidak memiliki kolom: {missing}")

    # Frame unik dan urutan
    unique_frames = sorted(df["frame"].unique())
    n_frames_total = len(unique_frames)
    print(f"  Total frame unik dalam file ini: {n_frames_total}")

    torso_rows = []

    # State temporal
    prev_centroid = None    # np.array([x, y, z])
    prev_frame_id = None

    for frame_id in unique_frames:
        df_frame = df[df["frame"] == frame_id]

        # Filter non-noise
        df_valid = df_frame[df_frame["cluster_id"] != -1]
        if df_valid.empty:
            # tidak ada cluster valid di frame ini
            continue

        # Hitung jumlah titik per cluster
        counts = df_valid["cluster_id"].value_counts()
        cluster_ids = counts.index.tolist()

        # Hitung centroid tiap cluster
        centroids = {}
        for cid in cluster_ids:
            sub = df_valid[df_valid["cluster_id"] == cid]
            cx = sub["x"].mean()
            cy = sub["y"].mean()
            cz = sub["z"].mean()
            centroids[cid] = np.array([cx, cy, cz], dtype=float)

        # Kalau belum ada prev_centroid atau gap terlalu besar -> reset (pilih cluster terbesar)
        use_simple = (prev_centroid is None)
        if (prev_frame_id is not None) and (frame_id - prev_frame_id >= GAP_RESET_FRAMES):
            use_simple = True

        if use_simple:
            # Reset / frame pertama: pakai cluster terbesar
            torso_cluster_id = counts.idxmax()
        else:
            # Pakai scoring: ukuran + jarak ke prev_centroid
            max_points_in_frame = counts.max()
            best_score = -1e9
            best_cid = None

            for cid in cluster_ids:
                n_points = counts.loc[cid]
                centroid = centroids[cid]

                # ukuran dinormalisasi
                size_norm = n_points / max_points_in_frame

                # jarak ke prev_centroid
                dist = np.linalg.norm(centroid - prev_centroid)
                dist_norm = min(dist / DIST_NORM, 1.0)  # clamp (0..1)

                # skor gabungan
                score = W_SIZE * size_norm - W_DIST * dist_norm

                if score > best_score:
                    best_score = score
                    best_cid = cid

            torso_cluster_id = best_cid

        # Ambil titik-titik torso cluster ini
        df_torso_frame = df_valid[df_valid["cluster_id"] == torso_cluster_id].copy()
        torso_rows.append(df_torso_frame)

        # Update state temporal
        prev_centroid = centroids[torso_cluster_id]
        prev_frame_id = frame_id

    # Gabung semua frame torso
    if torso_rows:
        df_torso_all = pd.concat(torso_rows, ignore_index=True)
    else:
        df_torso_all = pd.DataFrame(columns=df.columns)

    # Summary
    if df_torso_all.empty:
        print("  PERINGATAN: [TEMPORAL] Tidak ada frame dengan torso.")
        n_frames_with_torso = 0
        avg_points_per_torso_frame = 0.0
    else:
        frames_with_torso = df_torso_all["frame"].unique()
        n_frames_with_torso = len(frames_with_torso)
        n_frames_no_torso = n_frames_total - n_frames_with_torso
        avg_points_per_torso_frame = len(df_torso_all) / max(n_frames_with_torso, 1)

        print(f"  Frame dengan torso   : {n_frames_with_torso}")
        print(f"  Frame tanpa torso    : {n_frames_no_torso}")
        print(f"  Rata-rata titik torso/frame: {avg_points_per_torso_frame:.2f}")

    # Simpan
    os.makedirs(os.path.dirname(torso_temp_file_path), exist_ok=True)
    df_torso_all.to_csv(torso_temp_file_path, index=False)
    print(f"  >> [TEMPORAL] Disimpan ke: {torso_temp_file_path}")

    return {
        "file": cluster_file_path,
        "torso_file": torso_temp_file_path,
        "n_frames_total": n_frames_total,
        "n_frames_with_torso": int(n_frames_with_torso),
        "avg_points_per_torso_frame": float(avg_points_per_torso_frame),
    }


ðŸ§© Cell D â€“ Jalankan torso temporal untuk semua subjek & summary

In [None]:
# Cell D: Jalankan torso extraction versi TEMPORAL untuk semua subjek

def run_torso_temporal_for_all_subjects():
    summaries = []

    for subj in SUBJECTS:
        print("\n" + "=" * 70)
        print(f"[TEMPORAL] Subjek: {subj}")
        print("=" * 70)

        cluster_files = list_cluster_files_for_subject(subj)
        if not cluster_files:
            print(f"  Tidak ada file clustering untuk subjek {subj}")
            continue

        for cluster_path in cluster_files:
            torso_temp_path = build_output_torso_temp_path(subj, cluster_path)
            summary = extract_torso_temporal_for_file(cluster_path, torso_temp_path)
            summary["subject"] = subj
            summaries.append(summary)

    if summaries:
        df_summary_temp = pd.DataFrame(summaries)
    else:
        df_summary_temp = pd.DataFrame(
            columns=["subject", "file", "torso_file",
                     "n_frames_total", "n_frames_with_torso",
                     "avg_points_per_torso_frame"]
        )

    return df_summary_temp


# Jalankan
df_torso_temp_summary = run_torso_temporal_for_all_subjects()

print("\n=== [TEMPORAL] Ringkasan global torso extraction (head) ===")
display(df_torso_temp_summary.head())


ðŸ§© Cell E â€“ Bandingkan simple vs temporal per subjek

In [None]:
# Cell E: Bandingkan SIMPLE vs TEMPORAL per SUBJEK

def summarize_by_subject(df_summary):
    df = df_summary.copy()
    df["ratio_frames_with_torso"] = (
        df["n_frames_with_torso"] / df["n_frames_total"]
    )
    return (
        df.groupby("subject")
        .agg({
            "n_frames_total": "sum",
            "n_frames_with_torso": "sum",
            "avg_points_per_torso_frame": "mean",
            "ratio_frames_with_torso": "mean"
        })
    )

# print("=== SIMPLE (cluster terbesar saja) ===")
# df_simple_by_subj = summarize_by_subject(df_torso_summary)
# print(df_simple_by_subj)

print("\n=== TEMPORAL (size + distance) ===")
df_temp_by_subj = summarize_by_subject(df_torso_temp_summary)
print(df_temp_by_subj)
