ðŸŸ¦ Cell 1 â€“ Import & konfigurasi folder

In [10]:
# Cell 1: Import & konfigurasi path

import os
import numpy as np
import pandas as pd

# Folder input: hasil Tahap 3 (gait + steps)
FOLDER_GAITFRAME_STEPS = r"E:\1.Clustering_TA\dataset\gaitframe_steps"

# Folder output utama untuk fitur final v1 (tanpa filter)
FOLDER_GAIT_FEATURES_FINAL_ROOT = r"E:\1.Clustering_TA\dataset\gait_features_final_v1_nofilter"

os.makedirs(FOLDER_GAIT_FEATURES_FINAL_ROOT, exist_ok=True)

print("Input gait_with_steps  :", FOLDER_GAITFRAME_STEPS)
print("Output root (final v1) :", FOLDER_GAIT_FEATURES_FINAL_ROOT)


Input gait_with_steps  : E:\1.Clustering_TA\dataset\gaitframe_steps
Output root (final v1) : E:\1.Clustering_TA\dataset\gait_features_final_v1_nofilter


Fungsi inti: hitung fitur gait final (TANPA filter interval)

In [11]:
# Cell 3: Hitung fitur gait final dari satu DataFrame gait_with_steps (tanpa filter interval)

def compute_final_gait_features_nofilter(df_gait):
    """
    Menghitung fitur gait final v1 (tanpa filter interval):
    - step_count
    - duration_walk_sec  (antara step pertama & terakhir)
    - mean_step_time     (dari semua interval step, tanpa filter)
    - step_time_std
    - cadence_spm
    - distance_2d_m      (perpindahan 2D torso_x, torso_y antara step pertama & terakhir)
    - walking_speed_ms
    - step_length_m

    Catatan: semua interval step dipakai apa adanya (NO filtering).
    """

    df = df_gait.copy()

    # Pastikan timestamp_sec ada; kalau belum, buat dari kolom 'timestamp'
    if "timestamp_sec" not in df.columns:
        if "timestamp" not in df.columns:
            raise ValueError("DataFrame tidak punya 'timestamp_sec' maupun 'timestamp'")
        df["timestamp_sec"] = df["timestamp"].apply(timestamp_to_seconds)

    # Cek kolom penting
    REQUIRED_COLS = ["timestamp_sec", "step_event", "torso_x", "torso_y"]
    for c in REQUIRED_COLS:
        if c not in df.columns:
            raise ValueError(f"Kolom wajib '{c}' tidak ada di DataFrame")

    # Ambil step times (tanpa filter)
    step_mask = df["step_event"] == 1
    step_times = df.loc[step_mask, "timestamp_sec"].dropna().values

    step_count = int(len(step_times))

    # Default nilai kalau step kurang dari 2
    if step_count < 2:
        return {
            "step_count": step_count,
            "duration_walk_sec": np.nan,
            "mean_step_time": np.nan,
            "step_time_std": np.nan,
            "cadence_spm": np.nan,
            "distance_2d_m": np.nan,
            "walking_speed_ms": np.nan,
            "step_length_m": np.nan,
        }

    # Interval antar step (RAW, tanpa filter)
    intervals = np.diff(step_times)  # detik

    if len(intervals) > 0:
        mean_step_time = float(np.mean(intervals))
        step_time_std = float(np.std(intervals, ddof=1)) if len(intervals) > 1 else 0.0
    else:
        mean_step_time = np.nan
        step_time_std = np.nan

    # Durasi berjalan: antara step pertama & terakhir
    t_start = float(step_times[0])
    t_end   = float(step_times[-1])
    duration_walk_sec = t_end - t_start if t_end > t_start else np.nan

    # Cadence (tanpa filter)
    if duration_walk_sec is not None and duration_walk_sec > 0 and not np.isnan(duration_walk_sec):
        cadence_spm = step_count * 60.0 / duration_walk_sec
    else:
        cadence_spm = np.nan

    # ---------- Distance 2D (X,Y) ----------
    x_steps = df.loc[step_mask, "torso_x"].dropna().values
    y_steps = df.loc[step_mask, "torso_y"].dropna().values

    if len(x_steps) < 2 or len(y_steps) < 2:
        distance_2d_m = np.nan
        walking_speed_ms = np.nan
        step_length_m = np.nan
    else:
        x_start, x_end = float(x_steps[0]), float(x_steps[-1])
        y_start, y_end = float(y_steps[0]), float(y_steps[-1])

        dx = x_end - x_start
        dy = y_end - y_start

        distance_2d_m = float(np.sqrt(dx**2 + dy**2))

        # Walking speed (m/s)
        if duration_walk_sec is not None and duration_walk_sec > 0 and not np.isnan(duration_walk_sec):
            walking_speed_ms = distance_2d_m / duration_walk_sec
        else:
            walking_speed_ms = np.nan

        # Step length (m) ~ jarak total / (step_count - 1)
        if step_count > 1 and not np.isnan(distance_2d_m):
            step_length_m = distance_2d_m / (step_count - 1)
        else:
            step_length_m = np.nan

    return {
        "step_count": step_count,
        "duration_walk_sec": duration_walk_sec,
        "mean_step_time": mean_step_time,
        "step_time_std": step_time_std,
        "cadence_spm": cadence_spm,
        "distance_2d_m": distance_2d_m,
        "walking_speed_ms": walking_speed_ms,
        "step_length_m": step_length_m,
    }


ðŸŸ¦ Cell 4 â€“ Proses satu file _gait_with_steps.csv

In [12]:
# Cell 4: Proses satu file gait_with_steps â†’ satu baris fitur

def process_single_gaitfile_final_nofilter(file_path):
    """
    Membaca satu file *_gait_with_steps.csv dan menghitung
    fitur gait final v1 (tanpa filter interval).
    Mengembalikan satu dict (satu baris).
    """

    base_name = os.path.basename(file_path)
    # Contoh: Afi_Jalan8_gait_with_steps.csv â†’ Afi_Jalan8
    trial_name = base_name.replace("_gait_with_steps.csv", "")

    # subject = prefix sebelum "_" pertama â†’ "Afi_Jalan8" â†’ "Afi"
    parts = trial_name.split("_")
    subject = parts[0] if len(parts) > 0 else "UNKNOWN"
    trial_id = "_".join(parts[1:]) if len(parts) > 1 else ""

    print(f"\n=== Proses trial: {trial_name} ===")
    print("File:", file_path)

    df = pd.read_csv(file_path)

    feats = compute_final_gait_features_nofilter(df)

    row = {
        "subject": subject,
        "trial": trial_name,
        "trial_id": trial_id,
    }
    row.update(feats)

    return row


ðŸŸ¦ Cell 5 â€“ Proses semua file & simpan per-subjek + global

In [13]:
# Cell 5: Proses semua *_gait_with_steps.csv dan simpan:
# - satu file global: gait_features_final_all_v1_nofilter.csv
# - satu file per subject: {subject}_gait_features_5_v1_nofilter.csv

def process_all_final_gait_features_nofilter(
    folder_steps=FOLDER_GAITFRAME_STEPS,
    root_out=FOLDER_GAIT_FEATURES_FINAL_ROOT
):
    rows = []
    files = sorted(os.listdir(folder_steps))

    # Kumpulkan semua baris
    for fname in files:
        if not fname.endswith("_gait_with_steps.csv"):
            continue

        fpath = os.path.join(folder_steps, fname)

        try:
            row = process_single_gaitfile_final_nofilter(fpath)
            rows.append(row)
        except Exception as e:
            print(f"ERROR pada file {fname}: {e}")

    if not rows:
        print("Tidak ada file *_gait_with_steps.csv yang berhasil diproses.")
        return pd.DataFrame()

    df_all = pd.DataFrame(rows)

    # Simpan file global
    global_out_path = os.path.join(root_out, "gait_features_final_all_v1_nofilter.csv")
    df_all.to_csv(global_out_path, index=False)
    print("\n=== Global summary disimpan ===")
    print(global_out_path)

    # Simpan per subject
    subjects = df_all["subject"].unique()
    for subj in subjects:
        df_subj = df_all[df_all["subject"] == subj].copy()

        subj_folder = os.path.join(root_out, subj)
        os.makedirs(subj_folder, exist_ok=True)

        subj_out_path = os.path.join(subj_folder, f"{subj}_gait_features_5_v1_nofilter.csv")
        df_subj.to_csv(subj_out_path, index=False)

        print(f"  -> Simpan fitur final v1 (tanpa filter) untuk subject {subj}:")
        print(f"     {subj_out_path}")

    return df_all


ðŸŸ¦ Cell 6 â€“ Eksekusi & cek hasil

In [14]:
# Cell 6: Jalankan dan lihat beberapa baris hasil

df_final_v1 = process_all_final_gait_features_nofilter()
df_final_v1.head()



=== Proses trial: Afi_Jalan10 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan10_gait_with_steps.csv

=== Proses trial: Afi_Jalan11 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan11_gait_with_steps.csv

=== Proses trial: Afi_Jalan12 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan12_gait_with_steps.csv

=== Proses trial: Afi_Jalan13 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan13_gait_with_steps.csv

=== Proses trial: Afi_Jalan14 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan14_gait_with_steps.csv

=== Proses trial: Afi_Jalan15 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan15_gait_with_steps.csv

=== Proses trial: Afi_Jalan16 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan16_gait_with_steps.csv

=== Proses trial: Afi_Jalan17 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan17_gait_with_steps.csv

=== Proses trial: Afi_Jalan18 ===
File: E:\1.Clustering_TA\dataset\gait

Unnamed: 0,subject,trial,trial_id,step_count,duration_walk_sec,mean_step_time,step_time_std,cadence_spm,distance_2d_m,walking_speed_ms,step_length_m
0,Afi,Afi_Jalan10,Jalan10,6,1.708814,0.341763,0.060461,210.672431,1.587076,0.928759,0.317415
1,Afi,Afi_Jalan11,Jalan11,6,2.574859,0.514972,0.345293,139.813481,2.669618,1.036802,0.533924
2,Afi,Afi_Jalan12,Jalan12,7,2.927619,0.487937,0.491226,143.461291,2.86054,0.977087,0.476757
3,Afi,Afi_Jalan13,Jalan13,4,1.878781,0.62626,0.559467,127.742403,2.696253,1.435108,0.898751
4,Afi,Afi_Jalan14,Jalan14,2,3.849878,3.849878,0.0,31.169819,2.140821,0.556075,2.140821


ðŸŸ¦ Cell 7 â€“ Rapikan urutan trial (Jalan1, Jalan2, â€¦) dan simpan versi sorted

In [15]:
# Cell 7: Rapikan urutan trial berdasarkan angka di trial_id (mis: Jalan1, Jalan2, ...)

df_eval = df_final_v1.copy()

# Tambah kolom trial_num dari trial_id, mis "Jalan10" -> 10
df_eval["trial_num"] = (
    df_eval["trial_id"]
    .str.extract(r"(\d+)", expand=False)
    .astype(float)  # pakai float dulu biar kalau ada NaN tidak error
)

# Kalau ada trial tanpa angka (NaN), kita taruh di belakang
df_eval_sorted = (
    df_eval
    .sort_values(["subject", "trial_num"], na_position="last")
    .reset_index(drop=True)
)

print("Total baris:", len(df_eval_sorted))
df_eval_sorted.head(10)


Total baris: 216


Unnamed: 0,subject,trial,trial_id,step_count,duration_walk_sec,mean_step_time,step_time_std,cadence_spm,distance_2d_m,walking_speed_ms,step_length_m,trial_num
0,Afi,Afi_Jalan1,Jalan1,3,0.714816,0.357408,0.040571,251.813054,1.180483,1.651451,0.590242,1.0
1,Afi,Afi_Jalan2,Jalan2,6,2.083814,0.416763,0.253368,172.760141,0.732601,0.351567,0.14652,2.0
2,Afi,Afi_Jalan3,Jalan3,5,1.597363,0.399341,0.052022,187.809534,3.081816,1.929315,0.770454,3.0
3,Afi,Afi_Jalan4,Jalan4,2,0.39193,0.39193,0.0,306.177123,4.584173,11.696408,4.584173,4.0
4,Afi,Afi_Jalan5,Jalan5,5,1.971074,0.492769,0.293835,152.201287,2.492903,1.264743,0.623226,5.0
5,Afi,Afi_Jalan6,Jalan6,4,1.152331,0.38411,0.146658,208.273491,3.92893,3.40955,1.309643,6.0
6,Afi,Afi_Jalan7,Jalan7,7,2.924984,0.487497,0.443471,143.590529,2.54859,0.871318,0.424765,7.0
7,Afi,Afi_Jalan8,Jalan8,5,1.221945,0.305486,0.035183,245.510232,1.633387,1.336711,0.408347,8.0
8,Afi,Afi_Jalan9,Jalan9,5,3.134482,0.783621,0.952409,95.709594,0.781561,0.249343,0.19539,9.0
9,Afi,Afi_Jalan10,Jalan10,6,1.708814,0.341763,0.060461,210.672431,1.587076,0.928759,0.317415,10.0


In [16]:
from pathlib import Path

global_sorted_path = Path(FOLDER_GAIT_FEATURES_FINAL_ROOT) / "gait_features_final_all_v1_nofilter_sorted.csv"
df_eval_sorted.to_csv(global_sorted_path, index=False)
print("Global sorted disimpan di:", global_sorted_path)


Global sorted disimpan di: E:\1.Clustering_TA\dataset\gait_features_final_v1_nofilter\gait_features_final_all_v1_nofilter_sorted.csv


In [17]:
# Cell 8: Sanity check fitur utama

df = df_eval_sorted.copy()

def count_true(series):
    return int(series.sum())

print("=== SANITY CHECK GLOBAL ===")
print("Total trial                :", len(df))
print("Step count < 2             :", count_true(df["step_count"] < 2))
print("Durasi_walk <= 0 (detik)   :", count_true(df["duration_walk_sec"] <= 0))
print("Distance_2d < 0.2 m        :", count_true(df["distance_2d_m"] < 0.2))
print("Speed > 3.0 m/s            :", count_true(df["walking_speed_ms"] > 3.0))
print("Speed < 0.1 m/s (hampir diam):", count_true((df["walking_speed_ms"] > 0) & (df["walking_speed_ms"] < 0.1)))

print("\n=== SANITY CHECK PER SUBJECT ===")
subjects = df["subject"].unique()
for subj in subjects:
    dsub = df[df["subject"] == subj]
    print(f"\n-- {subj} --")
    print("  Total trial           :", len(dsub))
    print("  Step count < 2        :", count_true(dsub["step_count"] < 2))
    print("  Durasi_walk <= 0      :", count_true(dsub["duration_walk_sec"] <= 0))
    print("  Distance_2d < 0.2 m   :", count_true(dsub["distance_2d_m"] < 0.2))
    print("  Speed > 3.0 m/s       :", count_true(dsub["walking_speed_ms"] > 3.0))


=== SANITY CHECK GLOBAL ===
Total trial                : 216
Step count < 2             : 1
Durasi_walk <= 0 (detik)   : 0
Distance_2d < 0.2 m        : 5
Speed > 3.0 m/s            : 7
Speed < 0.1 m/s (hampir diam): 33

=== SANITY CHECK PER SUBJECT ===

-- Afi --
  Total trial           : 72
  Step count < 2        : 0
  Durasi_walk <= 0      : 0
  Distance_2d < 0.2 m   : 4
  Speed > 3.0 m/s       : 4

-- Kinan --
  Total trial           : 72
  Step count < 2        : 0
  Durasi_walk <= 0      : 0
  Distance_2d < 0.2 m   : 0
  Speed > 3.0 m/s       : 0

-- Miftah --
  Total trial           : 72
  Step count < 2        : 1
  Durasi_walk <= 0      : 0
  Distance_2d < 0.2 m   : 1
  Speed > 3.0 m/s       : 3


In [18]:
# Cell 9: Statistik ringkas per subject untuk fitur utama

df = df_eval_sorted.copy()

cols_stats = [
    "step_count",
    "mean_step_time",
    "step_time_std",
    "cadence_spm",
    "walking_speed_ms",
    "step_length_m",
    "duration_walk_sec",
    "distance_2d_m",
]

group_stats = (
    df
    .groupby("subject")[cols_stats]
    .agg(["mean", "std", "min", "max"])
)

group_stats


Unnamed: 0_level_0,step_count,step_count,step_count,step_count,mean_step_time,mean_step_time,mean_step_time,mean_step_time,step_time_std,step_time_std,...,step_length_m,step_length_m,duration_walk_sec,duration_walk_sec,duration_walk_sec,duration_walk_sec,distance_2d_m,distance_2d_m,distance_2d_m,distance_2d_m
Unnamed: 0_level_1,mean,std,min,max,mean,std,min,max,mean,std,...,min,max,mean,std,min,max,mean,std,min,max
subject,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Afi,8.611111,5.572962,2,26,0.640239,0.43244,0.277648,3.849878,0.524061,0.366929,...,0.016241,4.584173,4.91094,4.191356,0.331384,16.689015,1.692213,1.124357,0.087864,4.584173
Kinan,9.013889,4.570244,3,27,0.640795,0.271939,0.274138,1.781407,0.640947,0.378546,...,0.018205,2.04586,4.981714,3.426727,1.048218,17.815456,1.980562,0.998385,0.294696,4.135856
Miftah,9.027778,5.202849,1,27,0.637584,0.230223,0.275882,1.470518,0.597474,0.359306,...,0.002914,2.941452,5.410872,4.15956,0.603805,18.758836,2.096936,1.30369,0.03788,5.882904
