ðŸ§© Cell 1 â€“ Import & Config dasar

In [4]:
# Cell 1: Import libraries dan konfigurasi dasar

import os
import re
import numpy as np
import pandas as pd

from pathlib import Path

# Untuk split data dan evaluasi
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

# Untuk deep learning
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks

# Seed biar hasil lebih konsisten (optional)
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Path folder torso per frame (SUSAIKAN DENGAN PUNYAMU)
FOLDER_TORSO = r"E:\1.Clustering_TA\3output_clustering\02_torso_per_frame"

# Mapping subject â†’ label
SUBJECT_TO_LABEL = {
    "Afi": 0,
    "Kinan": 1,
    "Miftah": 2,
}

# Konfigurasi model / dataset
FEATURE_NAMES = [
    "torso_x",
    "torso_y",
    "torso_z",
    "vx",
    "vy",
    "vz",
    "is_held_from_prev",
    "t_norm",
]

PAD_VALUE_AFTER_NORM = -10.0  # nilai padding setelah normalisasi (supaya kalah di max-pool)

print("Config OK.")


ModuleNotFoundError: No module named 'numpy'

ðŸ§© Cell 2 â€“ Helper parsing nama file & baca satu trial

In [None]:
# Cell 2: Helper untuk parsing nama file & membaca satu trial

def parse_subject_trial_from_filename(fname: str):
    """
    Contoh nama file:
    Afi_Jalan10_clustered_torso.csv
    Kinan_Jalan72_clustered_torso.csv
    
    Return:
        subject (str), trial_name (str), trial_num (int)
    """
    stem = Path(fname).stem  # Afi_Jalan10_clustered_torso
    # Pecah dengan underscore
    parts = stem.split("_")  # ["Afi", "Jalan10", "clustered", "torso"] (kurang lebih)
    if len(parts) < 2:
        raise ValueError(f"Format nama file tidak dikenali: {fname}")
    
    subject = parts[0]
    trial_raw = parts[1]  # contoh "Jalan10"

    # Ambil angka di belakang "Jalan" untuk trial_num
    m = re.search(r"(\d+)", trial_raw)
    if m:
        trial_num = int(m.group(1))
    else:
        trial_num = -1  # fallback, kalau tidak ada angka
    
    trial_name = trial_raw  # simpan apa adanya
    
    return subject, trial_name, trial_num


def load_torso_csv(filepath: str):
    """
    Baca CSV torso per frame dan sort berdasarkan frame.
    Pastikan kolom yang dibutuhkan ada.
    """
    df = pd.read_csv(filepath)
    
    required_cols = [
        "frame",
        "torso_cluster_id",
        "torso_x",
        "torso_y",
        "torso_z",
        "num_points_torso",
        "is_held_from_prev",
    ]
    for col in required_cols:
        if col not in df.columns:
            raise KeyError(f"Kolom '{col}' tidak ditemukan di {filepath}")
    
    df = df.sort_values("frame").reset_index(drop=True)
    return df


print("Helper parsing & loader siap.")


ðŸ§© Cell 3 â€“ Feature engineering per trial (x,y,z,vx,vy,vz,is_held,t_norm)

In [None]:
# Cell 3: Feature engineering untuk satu trial (torso per frame)

def compute_frame_features_from_torso_df(df: pd.DataFrame) -> np.ndarray:
    """
    Input: df torso per frame (sudah sorted by frame)
    Output: matrix fitur (N_frame Ã— len(FEATURE_NAMES))
    
    Fitur yang dibuat:
      - torso_x, torso_y, torso_z        -> langsung dari df
      - vx, vy, vz                        -> diff posisi antar frame (Î” / frame)
      - is_held_from_prev                 -> dari df
      - t_norm                            -> indeks frame dinormalisasi ke [0, 1]
    """
    if len(df) == 0:
        # Tidak ada frame sama sekali
        return np.zeros((0, len(FEATURE_NAMES)), dtype=np.float32)
    
    # Posisi
    x = df["torso_x"].to_numpy(dtype=np.float32)
    y = df["torso_y"].to_numpy(dtype=np.float32)
    z = df["torso_z"].to_numpy(dtype=np.float32)
    
    # Velocity aproksimasi (Î”pos per frame, mengasumsikan Î”t konstan)
    # diff: [x1, x2, x3,...] -> [nan, x2-x1, x3-x2,...]
    vx = np.diff(x, prepend=x[0])
    vy = np.diff(y, prepend=y[0])
    vz = np.diff(z, prepend=z[0])
    
    vx = vx.astype(np.float32)
    vy = vy.astype(np.float32)
    vz = vz.astype(np.float32)
    
    # is_held_from_prev -> float (0.0 atau 1.0)
    is_held = df["is_held_from_prev"].to_numpy(dtype=np.float32)
    
    # t_norm: 0..1 sepanjang trial
    N = len(df)
    if N > 1:
        t_norm = np.linspace(0.0, 1.0, N, dtype=np.float32)
    else:
        t_norm = np.array([0.0], dtype=np.float32)
    
    # Susun matrix fitur dalam urutan FEATURE_NAMES
    features_list = []
    
    for name in FEATURE_NAMES:
        if name == "torso_x":
            features_list.append(x)
        elif name == "torso_y":
            features_list.append(y)
        elif name == "torso_z":
            features_list.append(z)
        elif name == "vx":
            features_list.append(vx)
        elif name == "vy":
            features_list.append(vy)
        elif name == "vz":
            features_list.append(vz)
        elif name == "is_held_from_prev":
            features_list.append(is_held)
        elif name == "t_norm":
            features_list.append(t_norm)
        else:
            raise KeyError(f"FEATURE_NAMES berisi nama yang tidak dikenali: {name}")
    
    mat = np.stack(features_list, axis=1)  # shape = (N_frame, n_features)
    return mat.astype(np.float32)


print("Fungsi feature engineering per trial siap.")


ðŸ§© Cell 4 â€“ Kumpulkan semua trial (X_list & metadata)

In [None]:
# Cell 4: Load semua file torso, bangun list matrix fitur + metadata

all_trial_features = []  # list of np.ndarray, shape variabel (N_frame_i, n_features)
all_subjects = []        # list of subject string
all_labels = []          # list of int label
all_trial_names = []     # e.g., "Jalan10"
all_trial_nums = []      # int

folder_path = Path(FOLDER_TORSO)
csv_files = sorted(folder_path.glob("*_clustered_torso.csv"))

print(f"Total file ditemukan: {len(csv_files)}")

for fpath in csv_files:
    subject, trial_name, trial_num = parse_subject_trial_from_filename(fpath.name)
    if subject not in SUBJECT_TO_LABEL:
        print(f"[WARNING] Subject '{subject}' tidak dikenali, skip file: {fpath.name}")
        continue
    
    df_torso = load_torso_csv(fpath)
    
    # Kalau df kosong, skip
    if df_torso.empty:
        print(f"[WARNING] Data frame kosong, skip file: {fpath.name}")
        continue
    
    feat_mat = compute_frame_features_from_torso_df(df_torso)
    if feat_mat.shape[0] == 0:
        print(f"[WARNING] Hasil fitur 0 frame, skip file: {fpath.name}")
        continue
    
    all_trial_features.append(feat_mat)
    all_subjects.append(subject)
    all_labels.append(SUBJECT_TO_LABEL[subject])
    all_trial_names.append(trial_name)
    all_trial_nums.append(trial_num)

print("Total trial valid:", len(all_trial_features))
print("Contoh shape trial pertama:", all_trial_features[0].shape)


ðŸ§© Cell 5 â€“ Hitung N_MAX_FRAMES & build normalisasi global (Z-score)

In [None]:
# Cell 5: Analisis panjang sequence & hitung parameter normalisasi global

# Panjang tiap trial
lengths = np.array([m.shape[0] for m in all_trial_features], dtype=np.int32)
print("Statistik panjang frame per trial:")
print("  min  :", lengths.min())
print("  max  :", lengths.max())
print("  mean :", lengths.mean())
print("  90th :", np.percentile(lengths, 90))
print("  95th :", np.percentile(lengths, 95))

# Pilih N_MAX_FRAMES berdasarkan percentile (boleh kamu ubah kalau mau eksplisit)
N_MAX_FRAMES = int(np.clip(np.percentile(lengths, 95), 64, 512))
print("N_MAX_FRAMES yang dipakai:", N_MAX_FRAMES)

# Hitung Z-score global dari semua frame di semua trial (sebelum padding)
# Gabungkan semua frame real
all_frames_concat = np.concatenate(all_trial_features, axis=0)  # shape (sum_N, n_features)
feature_means = all_frames_concat.mean(axis=0)
feature_stds = all_frames_concat.std(axis=0) + 1e-8  # tambah epsilon supaya tidak 0

print("Feature means:", feature_means)
print("Feature stds :", feature_stds)


ðŸ§© Cell 6 â€“ Fungsi pad/crop & bangun X, y final

In [None]:
# Cell 6: Fungsi padding/cropping & membangun X, y dalam bentuk numpy array

def normalize_features(mat: np.ndarray, means: np.ndarray, stds: np.ndarray) -> np.ndarray:
    """
    Z-score global: (x - mean) / std, feature-wise.
    """
    return (mat - means) / stds


def pad_or_crop_sequence(mat: np.ndarray, N_max: int, pad_value: float = PAD_VALUE_AFTER_NORM) -> np.ndarray:
    """
    Input:
        mat: (N_frame, n_features) sesudah normalisasi
        N_max: panjang frame target
    Output:
        out: (N_max, n_features)
    
    - Jika N_frame > N_max: center crop (ambil tengah)
    - Jika N_frame < N_max: pad di akhir dengan pad_value
    """
    N, D = mat.shape
    if N == N_max:
        return mat
    
    if N > N_max:
        # center crop
        start = (N - N_max) // 2
        end = start + N_max
        return mat[start:end, :]
    
    # N < N_max: pad di tail
    out = np.full((N_max, D), pad_value, dtype=mat.dtype)
    out[:N, :] = mat
    return out


# Bangun X dan y
n_trials = len(all_trial_features)
n_features = len(FEATURE_NAMES)

X = np.zeros((n_trials, N_MAX_FRAMES, n_features), dtype=np.float32)
y = np.array(all_labels, dtype=np.int32)

for i, mat in enumerate(all_trial_features):
    mat_norm = normalize_features(mat, feature_means, feature_stds)
    X[i] = pad_or_crop_sequence(mat_norm, N_MAX_FRAMES, pad_value=PAD_VALUE_AFTER_NORM)

print("Shape X:", X.shape)  # (n_trial, N_max_frames, n_features)
print("Shape y:", y.shape)


ðŸ§© Cell 7 â€“ Train/Val/Test split (70/15/15, stratified subject)

In [None]:
# Cell 7: Split train/val/test 70/15/15 (stratified by subject/label)

# Pertama: train (70%) vs temp (30%)
X_train, X_temp, y_train, y_temp = train_test_split(
    X,
    y,
    test_size=0.30,
    random_state=SEED,
    stratify=y,
)

# Kedua: temp (30%) dipecah jadi val (15%) dan test (15%)
# 0.5 * 30% = 15%
X_val, X_test, y_val, y_test = train_test_split(
    X_temp,
    y_temp,
    test_size=0.50,
    random_state=SEED,
    stratify=y_temp,
)

print("Train shape:", X_train.shape, y_train.shape)
print("Val   shape:", X_val.shape, y_val.shape)
print("Test  shape:", X_test.shape, y_test.shape)

# Cek distribusi label per split
def label_dist(name, arr):
    unique, counts = np.unique(arr, return_counts=True)
    print(f"\n{name} label distribution:")
    for u, c in zip(unique, counts):
        print(f"  label {u}: {c}")

label_dist("Train", y_train)
label_dist("Val", y_val)
label_dist("Test", y_test)


ðŸ§© Cell 8 â€“ Definisi model PointNet-style (spatial only)

In [None]:
# Cell 8: Definisi model PointNet-style (per-frame MLP + global max pooling)

def build_pointnet_spatial_model(
    n_frames: int,
    n_features: int,
    n_classes: int = 3,
) -> tf.keras.Model:
    """
    n_frames   : N_MAX_FRAMES
    n_features : len(FEATURE_NAMES)
    n_classes  : jumlah kelas (Afi/Kinan/Miftah = 3)
    """
    inputs = layers.Input(shape=(n_frames, n_features), name="spatial_input")
    
    # Shared per-frame MLP (TimeDistributed Dense)
    x = layers.TimeDistributed(layers.Dense(64, activation="relu"))(inputs)
    x = layers.TimeDistributed(layers.BatchNormalization())(x)
    
    x = layers.TimeDistributed(layers.Dense(128, activation="relu"))(x)
    x = layers.TimeDistributed(layers.BatchNormalization())(x)
    
    x = layers.TimeDistributed(layers.Dense(256, activation="relu"))(x)
    x = layers.TimeDistributed(layers.BatchNormalization())(x)
    
    # Global max pooling di axis frame
    x = layers.GlobalMaxPooling1D()(x)  # -> (batch, 256)
    
    # Head klasifikasi
    x = layers.Dense(128, activation="relu")(x)
    x = layers.Dropout(0.3)(x)
    
    outputs = layers.Dense(n_classes, activation="softmax", name="class_output")(x)
    
    model = models.Model(inputs=inputs, outputs=outputs, name="PointNet_Spatial_Only")
    return model


n_classes = len(SUBJECT_TO_LABEL)
model = build_pointnet_spatial_model(
    n_frames=N_MAX_FRAMES,
    n_features=len(FEATURE_NAMES),
    n_classes=n_classes,
)

model.summary()


ðŸ§© Cell 9 â€“ Compile & Training

In [None]:
# Cell 9: Compile & training model

learning_rate = 1e-3
batch_size = 16
n_epochs = 50

model.compile(
    optimizer=optimizers.Adam(learning_rate=learning_rate),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
)

# Callback: early stopping + save best model
checkpoint_path = "pointnet_spatial_only_best.h5"

cb_early = callbacks.EarlyStopping(
    monitor="val_loss",
    patience=8,
    restore_best_weights=True,
)

cb_ckpt = callbacks.ModelCheckpoint(
    checkpoint_path,
    monitor="val_loss",
    save_best_only=True,
    save_weights_only=False,
    verbose=1,
)

history = model.fit(
    X_train,
    y_train,
    validation_data=(X_val, y_val),
    epochs=n_epochs,
    batch_size=batch_size,
    callbacks=[cb_early, cb_ckpt],
    verbose=1,
)

print("Training selesai. Best model disimpan ke:", checkpoint_path)


In [None]:
import matplotlib.pyplot as plt

history_dict = history.history

train_loss = history_dict.get("loss", [])
val_loss   = history_dict.get("val_loss", [])
train_acc  = history_dict.get("accuracy", [])
val_acc    = history_dict.get("val_accuracy", [])

epochs = range(1, len(train_loss) + 1)

plt.figure(figsize=(12, 5))

# ---- Plot loss ----
plt.subplot(1, 2, 1)
plt.plot(epochs, train_loss, label="Train Loss")
plt.plot(epochs, val_loss, label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training & Validation Loss")
plt.grid(True, linestyle="--", alpha=0.5)
plt.legend()

# ---- Plot accuracy ----
plt.subplot(1, 2, 2)
plt.plot(epochs, train_acc, label="Train Acc")
plt.plot(epochs, val_acc, label="Val Acc")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Training & Validation Accuracy")
plt.grid(True, linestyle="--", alpha=0.5)
plt.legend()

plt.tight_layout()
plt.show()


ðŸ§© Cell 10 â€“ Evaluasi di test set

In [None]:
# Cell 10: Evaluasi di test set

test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test loss: {test_loss:.4f}")
print(f"Test acc : {test_acc:.4f}")

# Prediksi & laporan klasifikasi
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

print("\nClassification report (label 0=Afi, 1=Kinan, 2=Miftah):")
print(classification_report(y_test, y_pred, digits=4))

print("Confusion matrix:")
print(confusion_matrix(y_test, y_pred))


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix

labels_order = [0, 1, 2]
label_names = ["Afi", "Kinan", "Miftah"]

cm = confusion_matrix(y_test, y_pred, labels=labels_order)

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_names)
fig, ax = plt.subplots(figsize=(5, 5))
disp.plot(ax=ax, cmap="Blues", colorbar=True)
plt.title("Confusion Matrix â€“ Test Set")
plt.xlabel("Predicted label")
plt.ylabel("True label")
plt.tight_layout()
plt.show()


ðŸ§© Cell 11 â€“ Simpan model final (opsional jika mau override)

In [None]:
final_model_path = "pointnet_spatial_only_final.h5"
model.save(final_model_path)
print("Model final disimpan ke:", final_model_path)
