ðŸŸ¦ Cell 1 â€“ Import & konfigurasi folder

In [30]:
import os
import numpy as np
import pandas as pd

# Folder input: hasil Tahap 3 (gait + steps) â€“ SAMA seperti V1
FOLDER_GAITFRAME_STEPS = r"E:\1.Clustering_TA\dataset\gaitframe_steps"

# Folder output untuk V2 (baru)
FOLDER_GAIT_FEATURES_FINAL_ROOT_V2 = r"E:\1.Clustering_TA\dataset\gait_features_final_v2_filtered"
os.makedirs(FOLDER_GAIT_FEATURES_FINAL_ROOT_V2, exist_ok=True)

print("Input gait_with_steps  :", FOLDER_GAITFRAME_STEPS)
print("Output root V2         :", FOLDER_GAIT_FEATURES_FINAL_ROOT_V2)



Input gait_with_steps  : E:\1.Clustering_TA\dataset\gaitframe_steps
Output root V2         : E:\1.Clustering_TA\dataset\gait_features_final_v2_filtered


ðŸŸ¦ Cell 2 â€“ Helper: konversi timestamp (pakai lagi)

In [31]:
def timestamp_to_seconds(ts):
    """
    Mengubah string timestamp "HH:MM:SS.micro" menjadi detik (float).
    Kalau gagal parse â†’ np.nan
    """
    if pd.isna(ts):
        return np.nan
    try:
        hms = str(ts).split(":")
        if len(hms) != 3:
            return np.nan
        h = int(hms[0])
        m = int(hms[1])
        s = float(hms[2])
        return h * 3600 + m * 60 + s
    except Exception:
        return np.nan


ðŸŸ¦ Cell 3 â€“ Fungsi helper filter interval (MAD)

In [32]:
def _filter_intervals_mad(intervals,
                          min_step=0.25,
                          max_step=2.0,
                          k_mad=3.0):
    """
    Filter interval langkah dengan 2 tahap:
    1) Hard range [min_step, max_step]
    2) MAD (Median Absolute Deviation) di sekitar median interval
    """
    intervals = np.asarray(intervals, dtype=float)

    # Hard range
    base_mask = (intervals >= min_step) & (intervals <= max_step)
    if base_mask.sum() < 2:
        # terlalu sedikit, pakai hard range saja
        return base_mask

    valid_vals = intervals[base_mask]
    median = np.median(valid_vals)
    mad = np.median(np.abs(valid_vals - median))

    if mad == 0:
        return base_mask

    robust_z = np.abs(valid_vals - median) / (mad + 1e-8)
    keep_valid = robust_z <= k_mad

    final_mask = np.zeros_like(intervals, dtype=bool)
    final_mask[base_mask] = keep_valid

    return final_mask


ðŸŸ¦ Cell 4 â€“ Fungsi inti V2

In [33]:
def compute_final_gait_features_v2(df_gait):
    """
    V2: fitur gait dengan filtering interval + validasi trial.
    Menghasilkan:
    - step_count_raw
    - step_count_valid
    - duration_walk_sec
    - mean_step_time
    - step_time_std
    - cadence_spm
    - distance_2d_m
    - walking_speed_ms
    - step_length_m
    - is_valid_trial
    """

    df = df_gait.copy()

    # Pastikan timestamp_sec ada
    if "timestamp_sec" not in df.columns:
        if "timestamp" not in df.columns:
            raise ValueError("DataFrame tidak punya 'timestamp_sec' maupun 'timestamp'")
        df["timestamp_sec"] = df["timestamp"].apply(timestamp_to_seconds)

    REQUIRED_COLS = ["timestamp_sec", "step_event", "torso_x", "torso_y"]
    for c in REQUIRED_COLS:
        if c not in df.columns:
            raise ValueError(f"Kolom wajib '{c}' tidak ada di DataFrame: {c}")

    # Ambil step times
    step_mask = df["step_event"] == 1
    ts_steps = df.loc[step_mask, "timestamp_sec"].dropna().values
    idx_steps = df.index[step_mask].values

    step_count_raw = int(len(ts_steps))

    if step_count_raw < 2:
        return {
            "step_count_raw": step_count_raw,
            "step_count_valid": step_count_raw,
            "duration_walk_sec": np.nan,
            "mean_step_time": np.nan,
            "step_time_std": np.nan,
            "cadence_spm": np.nan,
            "distance_2d_m": np.nan,
            "walking_speed_ms": np.nan,
            "step_length_m": np.nan,
            "is_valid_trial": 0,
        }

    intervals = np.diff(ts_steps)

    mask_valid_int = _filter_intervals_mad(intervals,
                                           min_step=0.25,
                                           max_step=2.0,
                                           k_mad=3.0)

    if mask_valid_int.sum() == 0:
        return {
            "step_count_raw": step_count_raw,
            "step_count_valid": 0,
            "duration_walk_sec": np.nan,
            "mean_step_time": np.nan,
            "step_time_std": np.nan,
            "cadence_spm": np.nan,
            "distance_2d_m": np.nan,
            "walking_speed_ms": np.nan,
            "step_length_m": np.nan,
            "is_valid_trial": 0,
        }

    valid_idx_pairs = np.where(mask_valid_int)[0]
    first_valid_step_idx = valid_idx_pairs[0]
    last_valid_step_idx = valid_idx_pairs[-1] + 1

    ts_steps_valid = ts_steps[first_valid_step_idx:last_valid_step_idx+1]
    idx_steps_valid = idx_steps[first_valid_step_idx:last_valid_step_idx+1]

    step_count_valid = int(len(ts_steps_valid))

    if step_count_valid < 2:
        return {
            "step_count_raw": step_count_raw,
            "step_count_valid": step_count_valid,
            "duration_walk_sec": np.nan,
            "mean_step_time": np.nan,
            "step_time_std": np.nan,
            "cadence_spm": np.nan,
            "distance_2d_m": np.nan,
            "walking_speed_ms": np.nan,
            "step_length_m": np.nan,
            "is_valid_trial": 0,
        }

    intervals_valid = np.diff(ts_steps_valid)

    mean_step_time = float(np.mean(intervals_valid))
    step_time_std = float(np.std(intervals_valid, ddof=1)) if len(intervals_valid) > 1 else 0.0

    t_start = float(ts_steps_valid[0])
    t_end = float(ts_steps_valid[-1])
    duration_walk_sec = t_end - t_start if t_end > t_start else np.nan

    if duration_walk_sec is not None and duration_walk_sec > 0 and not np.isnan(duration_walk_sec):
        cadence_spm = step_count_valid * 60.0 / duration_walk_sec
    else:
        cadence_spm = np.nan

    x_steps_valid = df.loc[idx_steps_valid, "torso_x"].values
    y_steps_valid = df.loc[idx_steps_valid, "torso_y"].values

    if len(x_steps_valid) < 2 or len(y_steps_valid) < 2:
        distance_2d_m = np.nan
        walking_speed_ms = np.nan
        step_length_m = np.nan
    else:
        x_start, x_end = float(x_steps_valid[0]), float(x_steps_valid[-1])
        y_start, y_end = float(y_steps_valid[0]), float(y_steps_valid[-1])

        dx = x_end - x_start
        dy = y_end - y_start
        distance_2d_m = float(np.sqrt(dx**2 + dy**2))

        if duration_walk_sec is not None and duration_walk_sec > 0 and not np.isnan(duration_walk_sec):
            walking_speed_ms = distance_2d_m / duration_walk_sec
        else:
            walking_speed_ms = np.nan

        if step_count_valid > 1 and not np.isnan(distance_2d_m):
            step_length_m = distance_2d_m / (step_count_valid - 1)
        else:
            step_length_m = np.nan

    # Validasi trial
    is_valid = 1
    if step_count_valid < 4:
        is_valid = 0
    if (duration_walk_sec is np.nan) or (duration_walk_sec is None) or (duration_walk_sec < 1.5):
        is_valid = 0
    if (distance_2d_m is np.nan) or (distance_2d_m < 0.5):
        is_valid = 0
    if (mean_step_time < 0.25) or (mean_step_time > 1.8):
        is_valid = 0
    if not np.isnan(walking_speed_ms):
        if (walking_speed_ms < 0.2) or (walking_speed_ms > 3.0):
            is_valid = 0

    return {
        "step_count_raw": step_count_raw,
        "step_count_valid": step_count_valid,
        "duration_walk_sec": duration_walk_sec,
        "mean_step_time": mean_step_time,
        "step_time_std": step_time_std,
        "cadence_spm": cadence_spm,
        "distance_2d_m": distance_2d_m,
        "walking_speed_ms": walking_speed_ms,
        "step_length_m": step_length_m,
        "is_valid_trial": int(is_valid),
    }


ðŸŸ¦ Cell 5 â€“ Proses 1 file (V2)

In [34]:
def process_single_gaitfile_final_v2(file_path):
    """
    Membaca satu file *_gait_with_steps.csv dan menghitung
    fitur gait final V2 (dengan filtering interval + validasi).
    """

    base_name = os.path.basename(file_path)
    trial_name = base_name.replace("_gait_with_steps.csv", "")

    parts = trial_name.split("_")
    subject = parts[0] if len(parts) > 0 else "UNKNOWN"
    trial_id = "_".join(parts[1:]) if len(parts) > 1 else ""

    print(f"\n=== [V2] Proses trial: {trial_name} ===")
    print("File:", file_path)

    df = pd.read_csv(file_path)

    feats = compute_final_gait_features_v2(df)

    row = {
        "subject": subject,
        "trial": trial_name,
        "trial_id": trial_id,
    }
    row.update(feats)

    return row


ðŸŸ¦ Cell 6 â€“ Proses semua file + autosort + simpan

In [35]:
def process_all_final_gait_features_v2(
    folder_steps=FOLDER_GAITFRAME_STEPS,
    root_out=FOLDER_GAIT_FEATURES_FINAL_ROOT_V2
):
    rows = []
    files = sorted(os.listdir(folder_steps))

    for fname in files:
        if not fname.endswith("_gait_with_steps.csv"):
            continue

        fpath = os.path.join(folder_steps, fname)

        try:
            row = process_single_gaitfile_final_v2(fpath)
            rows.append(row)
        except Exception as e:
            print(f"ERROR pada file {fname}: {e}")

    if not rows:
        print("Tidak ada file *_gait_with_steps.csv yang berhasil diproses (V2).")
        return pd.DataFrame()

    df_all = pd.DataFrame(rows)

    df_all["trial_num"] = (
        df_all["trial_id"]
        .str.extract(r"(\d+)", expand=False)
        .astype(float)
    )

    df_all_sorted = (
        df_all
        .sort_values(["subject", "trial_num"], na_position="last")
        .reset_index(drop=True)
    )

    global_out_path = os.path.join(root_out, "gait_features_final_all_v2_filtered_sorted.csv")
    df_all_sorted.to_csv(global_out_path, index=False)
    print("\n=== [V2] Global summary (sorted) disimpan ===")
    print(global_out_path)

    subjects = df_all_sorted["subject"].unique()
    for subj in subjects:
        df_subj = df_all_sorted[df_all_sorted["subject"] == subj].copy()

        subj_folder = os.path.join(root_out, subj)
        os.makedirs(subj_folder, exist_ok=True)

        subj_out_path = os.path.join(subj_folder, f"{subj}_gait_features_5_v2_filtered_sorted.csv")
        df_subj.to_csv(subj_out_path, index=False)

        print(f"  -> [V2] Simpan fitur final untuk subject {subj}:")
        print(f"     {subj_out_path}")

    return df_all_sorted


ðŸŸ¦ Cell 7 â€“ Jalankan V2

In [36]:
df_final_v2 = process_all_final_gait_features_v2()
df_final_v2.head()



=== [V2] Proses trial: Afi_Jalan10 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan10_gait_with_steps.csv

=== [V2] Proses trial: Afi_Jalan11 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan11_gait_with_steps.csv

=== [V2] Proses trial: Afi_Jalan12 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan12_gait_with_steps.csv

=== [V2] Proses trial: Afi_Jalan13 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan13_gait_with_steps.csv

=== [V2] Proses trial: Afi_Jalan14 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan14_gait_with_steps.csv

=== [V2] Proses trial: Afi_Jalan15 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan15_gait_with_steps.csv

=== [V2] Proses trial: Afi_Jalan16 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan16_gait_with_steps.csv

=== [V2] Proses trial: Afi_Jalan17 ===
File: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan17_gait_with_steps.csv

=== [V2] Proses trial: Afi_Jala

Unnamed: 0,subject,trial,trial_id,step_count_raw,step_count_valid,duration_walk_sec,mean_step_time,step_time_std,cadence_spm,distance_2d_m,walking_speed_ms,step_length_m,is_valid_trial,trial_num
0,Afi,Afi_Jalan1,Jalan1,3,3,0.714816,0.357408,0.040571,251.813054,1.180483,1.651451,0.590242,0,1.0
1,Afi,Afi_Jalan2,Jalan2,6,6,2.083814,0.416763,0.253368,172.760141,0.732601,0.351567,0.14652,1,2.0
2,Afi,Afi_Jalan3,Jalan3,5,5,1.597363,0.399341,0.052022,187.809534,3.081816,1.929315,0.770454,1,3.0
3,Afi,Afi_Jalan4,Jalan4,2,2,0.39193,0.39193,0.0,306.177123,4.584173,11.696408,4.584173,0,4.0
4,Afi,Afi_Jalan5,Jalan5,5,4,1.049145,0.349715,0.081992,228.757703,1.700045,1.62041,0.566682,0,5.0


In [37]:
from pathlib import Path

global_sorted_path = Path(FOLDER_GAIT_FEATURES_FINAL_ROOT_V2) / "gait_features_final_v2_filtered.csv"
df_final_v2.to_csv(global_sorted_path, index=False)
print("Global sorted disimpan di:", global_sorted_path)



Global sorted disimpan di: E:\1.Clustering_TA\dataset\gait_features_final_v2_filtered\gait_features_final_v2_filtered.csv


In [38]:
# Cell 8: Sanity check fitur utama (V2)

df = df_final_v2.copy()

def count_true(series):
    # otomatis mengabaikan NaN
    return int(series.fillna(False).sum())

print("=== SANITY CHECK GLOBAL (V2) ===")
print("Total trial                    :", len(df))

# Perhatikan: pakai step_count_valid, bukan step_count
print("Step_count_raw  < 2            :", count_true(df["step_count_raw"] < 2))
print("Step_count_valid < 4           :", count_true(df["step_count_valid"] < 4))

print("Durasi_walk <= 0 (detik)       :", count_true(df["duration_walk_sec"] <= 0))
print("Distance_2d < 0.2 m            :", count_true(df["distance_2d_m"] < 0.2))
print("Speed > 3.0 m/s                :", count_true(df["walking_speed_ms"] > 3.0))
print("Speed 0-0.1 m/s (hampir diam)  :", count_true((df["walking_speed_ms"] > 0) & (df["walking_speed_ms"] < 0.1)))

# Tambahan penting: cek valid vs invalid trial
print("Trial dengan is_valid_trial = 0:", count_true(df["is_valid_trial"] == 0))

print("\n=== SANITY CHECK PER SUBJECT (V2) ===")
subjects = df["subject"].unique()
for subj in subjects:
    dsub = df[df["subject"] == subj]
    print(f"\n-- {subj} --")
    print("  Total trial                 :", len(dsub))
    print("  Step_count_raw  < 2         :", count_true(dsub["step_count_raw"] < 2))
    print("  Step_count_valid < 4        :", count_true(dsub["step_count_valid"] < 4))
    print("  Durasi_walk <= 0 (detik)    :", count_true(dsub["duration_walk_sec"] <= 0))
    print("  Distance_2d < 0.2 m         :", count_true(dsub["distance_2d_m"] < 0.2))
    print("  Speed > 3.0 m/s             :", count_true(dsub["walking_speed_ms"] > 3.0))
    print("  is_valid_trial = 0          :", count_true(dsub["is_valid_trial"] == 0))



=== SANITY CHECK GLOBAL (V2) ===
Total trial                    : 216
Step_count_raw  < 2            : 1
Step_count_valid < 4           : 22
Durasi_walk <= 0 (detik)       : 0
Distance_2d < 0.2 m            : 4
Speed > 3.0 m/s                : 11
Speed 0-0.1 m/s (hampir diam)  : 15
Trial dengan is_valid_trial = 0: 101

=== SANITY CHECK PER SUBJECT (V2) ===

-- Afi --
  Total trial                 : 72
  Step_count_raw  < 2         : 0
  Step_count_valid < 4        : 11
  Durasi_walk <= 0 (detik)    : 0
  Distance_2d < 0.2 m         : 4
  Speed > 3.0 m/s             : 6
  is_valid_trial = 0          : 45

-- Kinan --
  Total trial                 : 72
  Step_count_raw  < 2         : 0
  Step_count_valid < 4        : 4
  Durasi_walk <= 0 (detik)    : 0
  Distance_2d < 0.2 m         : 0
  Speed > 3.0 m/s             : 2
  is_valid_trial = 0          : 28

-- Miftah --
  Total trial                 : 72
  Step_count_raw  < 2         : 1
  Step_count_valid < 4        : 7
  Durasi_walk <= 0 

In [39]:
# Cell 9: Statistik ringkas per subject untuk fitur utama (V2)

df = df_final_v2.copy()

cols_stats = [
    "step_count_raw",
    "step_count_valid",
    "mean_step_time",
    "step_time_std",
    "cadence_spm",
    "walking_speed_ms",
    "step_length_m",
    "duration_walk_sec",
    "distance_2d_m",
]

group_stats = (
    df
    .groupby("subject")[cols_stats]
    .agg(["mean", "std", "min", "max"])
)

group_stats



Unnamed: 0_level_0,step_count_raw,step_count_raw,step_count_raw,step_count_raw,step_count_valid,step_count_valid,step_count_valid,step_count_valid,mean_step_time,mean_step_time,...,step_length_m,step_length_m,duration_walk_sec,duration_walk_sec,duration_walk_sec,duration_walk_sec,distance_2d_m,distance_2d_m,distance_2d_m,distance_2d_m
Unnamed: 0_level_1,mean,std,min,max,mean,std,min,max,mean,std,...,min,max,mean,std,min,max,mean,std,min,max
subject,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Afi,8.611111,5.572962,2,26,7.777778,5.297414,0,25,0.497615,0.180189,...,0.017573,4.584173,3.853434,3.774547,0.331384,15.564541,1.70944,1.035593,0.087864,4.584173
Kinan,9.013889,4.570244,3,27,7.902778,4.431512,2,26,0.473002,0.184591,...,0.031068,4.51818,3.578306,3.153748,0.329145,15.230792,1.715115,0.87559,0.368204,4.51818
Miftah,9.027778,5.202849,1,27,8.027778,4.717281,1,26,0.527695,0.218176,...,0.023131,2.941452,4.188775,3.724705,0.603805,16.940505,2.00922,1.197089,0.231308,5.882904
