ðŸ§© Cell 1 â€“ Import & Konfigurasi Dasar

In [1]:
# Cell 1: Import library & konfigurasi dasar

import os
import glob
import numpy as np
import pandas as pd

# Daftar subjek
SUBJECTS = ["Afi", "Tsamara", "Tsania"]

# Folder input: hasil clustering (DBSCAN)
CLUSTER_BASE_DIR = r"E:\1.Clustering_TA\dataset2\hasil_clustering"

# Folder output: hasil torso (branch utama, point cloud torso-only)
TORSO_BASE_DIR = r"E:\0.TA_Teguh\dataset2\hasil_torso"

print("SUBJECTS        :", SUBJECTS)
print("CLUSTER_BASE_DIR:", CLUSTER_BASE_DIR)
print("TORSO_BASE_DIR  :", TORSO_BASE_DIR)


SUBJECTS        : ['Afi', 'Tsamara', 'Tsania']
CLUSTER_BASE_DIR: E:\1.Clustering_TA\dataset2\hasil_clustering
TORSO_BASE_DIR  : E:\0.TA_Teguh\dataset2\hasil_torso


ðŸ§© Cell 2 â€“ Helper: List file clustering per subjek

In [2]:
# Cell 2: Fungsi untuk mengambil daftar file clustering per subjek

def list_cluster_files_for_subject(subject_name: str):
    """
    Mengembalikan list path file hasil clustering untuk satu subjek.
    Mengasumsikan struktur:
        E:\1.Clustering_TA\dataset2\hasil_clustering\<SUBJEK>\clustering_JalanN.csv
    """
    subj_dir = os.path.join(CLUSTER_BASE_DIR, subject_name)
    pattern = os.path.join(subj_dir, "clustering_*.csv")
    file_list = sorted(glob.glob(pattern))
    return file_list

# Cek contoh
for subj in SUBJECTS:
    files = list_cluster_files_for_subject(subj)
    print(f"{subj}: {len(files)} file clustering ditemukan.")
    if files:
        print("  Contoh:", files[0])


Afi: 72 file clustering ditemukan.
  Contoh: E:\1.Clustering_TA\dataset2\hasil_clustering\Afi\clustering_Jalan1.csv
Tsamara: 72 file clustering ditemukan.
  Contoh: E:\1.Clustering_TA\dataset2\hasil_clustering\Tsamara\clustering_Jalan1.csv
Tsania: 72 file clustering ditemukan.
  Contoh: E:\1.Clustering_TA\dataset2\hasil_clustering\Tsania\clustering_Jalan1.csv


ðŸ§© Cell 3 â€“ Helper: Path output torso per file

In [3]:
# Cell 3: Fungsi untuk membangun path output torso per file

def build_output_torso_path(subject_name: str, cluster_file_path: str) -> str:
    """
    Dari path input clustering, misal:
        E:\1.Clustering_TA\dataset2\hasil_clustering\Afi\clustering_Jalan1.csv
    hasilkan path output:
        E:\0.TA_Teguh\dataset2\hasil_torso\Afi\torso_Jalan1.csv
    """
    fname = os.path.basename(cluster_file_path)     # "clustering_Jalan1.csv"
    # Ganti prefix "clustering_" dengan "torso_"
    if fname.startswith("clustering_"):
        torso_fname = "torso_" + fname[len("clustering_"):]
    else:
        torso_fname = "torso_" + fname

    subj_out_dir = os.path.join(TORSO_BASE_DIR, subject_name)
    os.makedirs(subj_out_dir, exist_ok=True)

    return os.path.join(subj_out_dir, torso_fname)

# Cek contoh path
for subj in SUBJECTS:
    files = list_cluster_files_for_subject(subj)
    if files:
        print(subj, "->", build_output_torso_path(subj, files[0]))


Afi -> E:\0.TA_Teguh\dataset2\hasil_torso\Afi\torso_Jalan1.csv
Tsamara -> E:\0.TA_Teguh\dataset2\hasil_torso\Tsamara\torso_Jalan1.csv
Tsania -> E:\0.TA_Teguh\dataset2\hasil_torso\Tsania\torso_Jalan1.csv


ðŸ§© Cell 4 â€“ Ekstraksi torso untuk 1 file (versi simple: cluster terbesar per frame)

In [4]:
# Cell 4: Fungsi ekstraksi torso untuk 1 file clustering (BRANCH 1 - SIMPLE)

REQUIRED_COLUMNS = ["timestamp", "frame", "x", "y", "z", "doppler", "SNR", "cluster_id"]

def extract_torso_for_file(cluster_file_path: str, torso_file_path: str):
    """
    Membaca file hasil clustering (point cloud + cluster_id),
    lalu mengekstrak point cloud torso-only per frame dengan aturan:
      - buang semua titik dengan cluster_id == -1 (noise)
      - kalau ada >1 cluster valid di frame tsb, pilih cluster dengan jumlah titik terbanyak
      - simpan SEMUA titik cluster torso ke file output (tidak dirata-ratakan)

    Output:
      - CSV torso-only (format sama seperti input, tanpa cluster_id kalau mau)
      - summary singkat dicetak ke layar
    """
    print(f"\n=== Proses file clustering: {cluster_file_path}")
    df = pd.read_csv(cluster_file_path)

    # Validasi kolom
    missing = [c for c in REQUIRED_COLUMNS if c not in df.columns]
    if missing:
        raise ValueError(f"File {cluster_file_path} tidak memiliki kolom: {missing}")

    # Info awal
    unique_frames = sorted(df["frame"].unique())
    n_frames_total = len(unique_frames)
    print(f"  Total frame unik dalam file ini: {n_frames_total}")

    # List untuk menampung baris torso
    torso_rows = []

    # Loop per frame
    for frame_id, idx in df.groupby("frame").groups.items():
        df_frame = df.loc[idx]

        # Filter non-noise
        df_valid = df_frame[df_frame["cluster_id"] != -1]

        if df_valid.empty:
            # Frame ini tidak punya cluster valid â†’ no-torso (skip)
            continue

        # Hitung jumlah titik per cluster_id
        counts = df_valid["cluster_id"].value_counts()

        # Pilih cluster dengan jumlah titik terbanyak
        torso_cluster_id = counts.idxmax()
        n_points_torso = counts.max()

        # Ambil semua titik milik cluster torso ini
        df_torso_frame = df_valid[df_valid["cluster_id"] == torso_cluster_id].copy()

        # (Opsional) kalau kamu ingin tetap menyimpan cluster_id, biarkan kolom ini
        # Kalau tidak ingin, uncomment baris di bawah:
        # df_torso_frame = df_torso_frame.drop(columns=["cluster_id"])

        torso_rows.append(df_torso_frame)

    # Gabungkan semua frame torso
    if torso_rows:
        df_torso_all = pd.concat(torso_rows, ignore_index=True)
    else:
        df_torso_all = pd.DataFrame(columns=df.columns)  # bisa kosong

    # Summary per file
    if df_torso_all.empty:
        print("  PERINGATAN: Tidak ada frame yang memiliki torso valid di file ini!")
        n_frames_with_torso = 0
        avg_points_per_torso_frame = 0.0
    else:
        frames_with_torso = df_torso_all["frame"].unique()
        n_frames_with_torso = len(frames_with_torso)
        n_frames_no_torso = n_frames_total - n_frames_with_torso
        avg_points_per_torso_frame = len(df_torso_all) / max(n_frames_with_torso, 1)

        print(f"  Frame dengan torso   : {n_frames_with_torso}")
        print(f"  Frame tanpa torso    : {n_frames_no_torso}")
        print(f"  Rata-rata titik torso/frame: {avg_points_per_torso_frame:.2f}")

    # Simpan ke CSV
    os.makedirs(os.path.dirname(torso_file_path), exist_ok=True)
    df_torso_all.to_csv(torso_file_path, index=False)
    print(f"  >> Disimpan ke: {torso_file_path}")

    # Kembalikan summary untuk agregasi global
    return {
        "file": cluster_file_path,
        "torso_file": torso_file_path,
        "n_frames_total": n_frames_total,
        "n_frames_with_torso": int(n_frames_with_torso),
        "avg_points_per_torso_frame": float(avg_points_per_torso_frame),
    }


ðŸ§© Cell 5 â€“ Jalankan torso extraction untuk semua subjek & semua file

In [5]:
# Cell 5: Jalankan ekstraksi torso (branch utama) untuk semua subjek & semua file

def run_torso_extraction_for_all_subjects():
    """
    Loop semua subjek & semua file clustering,
    jalankan extract_torso_for_file,
    dan kumpulkan summary global.
    """
    summaries = []

    for subj in SUBJECTS:
        print("\n" + "=" * 70)
        print(f"Subjek: {subj}")
        print("=" * 70)

        cluster_files = list_cluster_files_for_subject(subj)
        if not cluster_files:
            print(f"  Tidak ada file clustering untuk subjek {subj}")
            continue

        for cluster_path in cluster_files:
            torso_path = build_output_torso_path(subj, cluster_path)
            summary = extract_torso_for_file(cluster_path, torso_path)
            summary["subject"] = subj
            summaries.append(summary)

    # Buat DataFrame summary global
    if summaries:
        df_summary = pd.DataFrame(summaries)
    else:
        df_summary = pd.DataFrame(
            columns=["subject", "file", "torso_file",
                     "n_frames_total", "n_frames_with_torso",
                     "avg_points_per_torso_frame"]
        )

    return df_summary


# Jalankan ekstraksi torso untuk semua subjek
df_torso_summary = run_torso_extraction_for_all_subjects()

print("\n=== Ringkasan global torso extraction (head) ===")
display(df_torso_summary.head())



Subjek: Afi

=== Proses file clustering: E:\1.Clustering_TA\dataset2\hasil_clustering\Afi\clustering_Jalan1.csv
  Total frame unik dalam file ini: 99
  Frame dengan torso   : 91
  Frame tanpa torso    : 8
  Rata-rata titik torso/frame: 20.45
  >> Disimpan ke: E:\0.TA_Teguh\dataset2\hasil_torso\Afi\torso_Jalan1.csv

=== Proses file clustering: E:\1.Clustering_TA\dataset2\hasil_clustering\Afi\clustering_Jalan10.csv
  Total frame unik dalam file ini: 94
  Frame dengan torso   : 89
  Frame tanpa torso    : 5
  Rata-rata titik torso/frame: 23.76
  >> Disimpan ke: E:\0.TA_Teguh\dataset2\hasil_torso\Afi\torso_Jalan10.csv

=== Proses file clustering: E:\1.Clustering_TA\dataset2\hasil_clustering\Afi\clustering_Jalan11.csv
  Total frame unik dalam file ini: 96
  Frame dengan torso   : 90
  Frame tanpa torso    : 6
  Rata-rata titik torso/frame: 19.86
  >> Disimpan ke: E:\0.TA_Teguh\dataset2\hasil_torso\Afi\torso_Jalan11.csv

=== Proses file clustering: E:\1.Clustering_TA\dataset2\hasil_clusteri

Unnamed: 0,file,torso_file,n_frames_total,n_frames_with_torso,avg_points_per_torso_frame,subject
0,E:\1.Clustering_TA\dataset2\hasil_clustering\A...,E:\0.TA_Teguh\dataset2\hasil_torso\Afi\torso_J...,99,91,20.450549,Afi
1,E:\1.Clustering_TA\dataset2\hasil_clustering\A...,E:\0.TA_Teguh\dataset2\hasil_torso\Afi\torso_J...,94,89,23.764045,Afi
2,E:\1.Clustering_TA\dataset2\hasil_clustering\A...,E:\0.TA_Teguh\dataset2\hasil_torso\Afi\torso_J...,96,90,19.855556,Afi
3,E:\1.Clustering_TA\dataset2\hasil_clustering\A...,E:\0.TA_Teguh\dataset2\hasil_torso\Afi\torso_J...,77,71,29.830986,Afi
4,E:\1.Clustering_TA\dataset2\hasil_clustering\A...,E:\0.TA_Teguh\dataset2\hasil_torso\Afi\torso_J...,99,92,24.532609,Afi


ðŸ§© Cell 6 â€“ Ringkasan agregat per subjek

In [6]:
# Cell 6: Ringkasan agregat per subjek (opsional tapi sangat berguna)

if df_torso_summary is not None and not df_torso_summary.empty:
    # Hitung persentase frame yang punya torso per file
    df_torso_summary["ratio_frames_with_torso"] = (
        df_torso_summary["n_frames_with_torso"] / df_torso_summary["n_frames_total"]
    )

    print("\n=== Ringkasan agregat per SUBJEK ===")
    df_by_subject = (
        df_torso_summary
        .groupby("subject")
        .agg({
            "n_frames_total": "sum",
            "n_frames_with_torso": "sum",
            "avg_points_per_torso_frame": "mean",
            "ratio_frames_with_torso": "mean"
        })
    )
    print(df_by_subject)
else:
    print("Tidak ada summary torso (df_torso_summary kosong).")



=== Ringkasan agregat per SUBJEK ===
         n_frames_total  n_frames_with_torso  avg_points_per_torso_frame  \
subject                                                                    
Afi                9453                 8540                   27.985436   
Tsamara           12675                11643                   26.461814   
Tsania            10812                 9827                   30.136372   

         ratio_frames_with_torso  
subject                           
Afi                     0.904585  
Tsamara                 0.917326  
Tsania                  0.906527  
