ðŸŸ¦ CELL 1 â€“ Setup folder & import (boleh taruh di atas / lanjutkan saja)

In [1]:
import os
import numpy as np
import pandas as pd

# Folder input: hasil Tahap 3 (gait + steps)
FOLDER_GAITFRAME_STEPS = r"E:\1.Clustering_TA\dataset\gaitframe_steps"

# Folder output: fitur gait per trial
FOLDER_GAIT_FEATURES = r"E:\1.Clustering_TA\dataset\gait_features"
os.makedirs(FOLDER_GAIT_FEATURES, exist_ok=True)


ðŸŸ¦ CELL 2 â€“ Fungsi hitung gait fitur dari satu df gait_with_steps

In [2]:
def compute_gait_features_from_gait_df(
    df_gait_steps,
    min_step_time=0.3,
    max_step_time=1.5
):
    """
    Menghitung fitur gait dasar dari satu trial, berdasarkan kolom:
    - timestamp_sec
    - step_event (0/1)

    Menghasilkan dict dengan:
    - step_count
    - duration_sec
    - n_valid_intervals
    - mean_step_time
    - std_step_time
    - min_step_time_used
    - max_step_time_used
    - cadence_spm
    """

    df = df_gait_steps.copy()

    # Pastikan kolom penting ada
    required_cols = ["timestamp_sec", "step_event"]
    for c in required_cols:
        if c not in df.columns:
            raise ValueError(f"Kolom '{c}' tidak ditemukan di DataFrame gait_with_steps")

    # Ambil semua indeks step
    step_mask = df["step_event"] == 1
    step_times = df.loc[step_mask, "timestamp_sec"].values

    features = {
        "step_count": int(step_mask.sum()),
        "duration_sec": np.nan,
        "n_valid_intervals": 0,
        "mean_step_time": np.nan,
        "std_step_time": np.nan,
        "min_step_time_used": np.nan,
        "max_step_time_used": np.nan,
        "cadence_spm": np.nan,
    }

    # Durasi trial: dari frame valid pertama sampai terakhir
    if df["timestamp_sec"].notna().sum() > 1:
        t_valid = df["timestamp_sec"].dropna()
        features["duration_sec"] = float(t_valid.max() - t_valid.min())

    # Tidak ada atau hanya 1 step â†’ tidak bisa hitung interval
    if len(step_times) < 2:
        return features

    # Hitung interval antar step
    step_intervals = np.diff(step_times)  # detik

    # Terapkan filter fisiologis (buang interval terlalu kecil/besar)
    valid_int_mask = (step_intervals >= min_step_time) & (step_intervals <= max_step_time)
    valid_intervals = step_intervals[valid_int_mask]

    if len(valid_intervals) == 0:
        # Tidak ada interval yang lolos filter â†’ fitur waktu tetap NaN
        return features

    features["n_valid_intervals"] = int(len(valid_intervals))
    features["mean_step_time"] = float(np.mean(valid_intervals))
    features["std_step_time"] = float(np.std(valid_intervals, ddof=1)) if len(valid_intervals) > 1 else 0.0
    features["min_step_time_used"] = float(valid_intervals.min())
    features["max_step_time_used"] = float(valid_intervals.max())

    # Cadence: pakai step_count dan durasi trial (kalau ada)
    if features["duration_sec"] is not None and features["duration_sec"] > 0 and not np.isnan(features["duration_sec"]):
        # steps per minute
        features["cadence_spm"] = float(features["step_count"] * 60.0 / features["duration_sec"])

    return features


ðŸŸ¦ CELL 3 â€“ Fungsi proses satu file

In [3]:
def process_single_gait_with_steps_file(
    file_path,
    min_step_time=0.3,
    max_step_time=1.5
):
    """
    Membaca satu file gait_with_steps dan menghitung fitur gait.
    Mengembalikan (trial_name, dict_fitur).
    """

    base_name = os.path.basename(file_path)
    trial_name = base_name.replace("_gait_with_steps.csv", "")

    print(f"\n=== Hitung gait fitur: {trial_name} ===")
    print("Load:", file_path)

    df = pd.read_csv(file_path)

    # Pastikan timestamp_sec ada, kalau belum buat dari timestamp
    if "timestamp_sec" not in df.columns and "timestamp" in df.columns:
        from math import isnan

        def convert_timestamp_to_seconds(ts):
            try:
                h, m, s = ts.split(":")
                return int(h) * 3600 + int(m) * 60 + float(s)
            except:
                return np.nan

        df["timestamp_sec"] = df["timestamp"].apply(convert_timestamp_to_seconds)

    feats = compute_gait_features_from_gait_df(
        df,
        min_step_time=min_step_time,
        max_step_time=max_step_time
    )

    return trial_name, feats


ðŸŸ¦ CELL 4 â€“ Proses semua trial & simpan summary fitur

In [4]:
def process_all_gait_features(
    folder_steps=FOLDER_GAITFRAME_STEPS,
    folder_out=FOLDER_GAIT_FEATURES,
    min_step_time=0.3,
    max_step_time=1.5
):
    rows = []

    files = sorted(os.listdir(folder_steps))
    for fname in files:
        if not fname.endswith("_gait_with_steps.csv"):
            continue

        path = os.path.join(folder_steps, fname)

        try:
            trial_name, feats = process_single_gait_with_steps_file(
                path,
                min_step_time=min_step_time,
                max_step_time=max_step_time
            )

            row = {"trial": trial_name}
            row.update(feats)
            rows.append(row)

        except Exception as e:
            print(f"ERROR pada file {fname}: {e}")

    if not rows:
        print("Tidak ada file gait_with_steps yang diproses.")
        return pd.DataFrame()

    df_features = pd.DataFrame(rows)

    # Simpan summary ke CSV
    summary_path = os.path.join(folder_out, "gait_features_summary.csv")
    df_features.to_csv(summary_path, index=False)
    print("\n=== Gait features summary saved ===")
    print(summary_path)

    return df_features


ðŸŸ¦ CELL 5 â€“ Jalankan & lihat hasil

In [5]:
df_gait_feats = process_all_gait_features()
df_gait_feats



=== Hitung gait fitur: Afi_Jalan10 ===
Load: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan10_gait_with_steps.csv

=== Hitung gait fitur: Afi_Jalan11 ===
Load: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan11_gait_with_steps.csv

=== Hitung gait fitur: Afi_Jalan12 ===
Load: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan12_gait_with_steps.csv

=== Hitung gait fitur: Afi_Jalan13 ===
Load: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan13_gait_with_steps.csv

=== Hitung gait fitur: Afi_Jalan14 ===
Load: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan14_gait_with_steps.csv

=== Hitung gait fitur: Afi_Jalan15 ===
Load: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan15_gait_with_steps.csv

=== Hitung gait fitur: Afi_Jalan16 ===
Load: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan16_gait_with_steps.csv

=== Hitung gait fitur: Afi_Jalan17 ===
Load: E:\1.Clustering_TA\dataset\gaitframe_steps\Afi_Jalan17_gait_with_steps.csv

=== Hitung gait fitur: Afi_Jala

Unnamed: 0,trial,step_count,duration_sec,n_valid_intervals,mean_step_time,std_step_time,min_step_time_used,max_step_time_used,cadence_spm
0,Afi_Jalan10,6,4.465575,3,0.385905,0.002022,0.383628,0.387490,80.616718
1,Afi_Jalan11,6,4.521947,3,0.674581,0.378025,0.333739,1.081165,79.611725
2,Afi_Jalan12,7,6.152356,2,0.906615,0.825006,0.323248,1.489982,68.266531
3,Afi_Jalan13,4,3.348574,2,0.802744,0.662668,0.334167,1.271321,71.672300
4,Afi_Jalan14,2,4.402647,0,,,,,
...,...,...,...,...,...,...,...,...,...
211,Miftah_Jalan71,14,14.640848,3,0.403999,0.082463,0.333469,0.494666,57.373726
212,Miftah_Jalan72,26,20.260822,7,0.495260,0.315790,0.328740,1.209755,76.995889
213,Miftah_Jalan7,8,5.742082,1,0.329996,0.000000,0.329996,0.329996,83.593373
214,Miftah_Jalan8,9,5.752037,4,0.520381,0.342960,0.327791,1.033535,93.879786
