In [2]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import GroupShuffleSplit

  from pandas.core import (


In [3]:
dtype_map = {
    "subject_id": "int16",
    "trial": "int16",
    "acc_x": "float32", "acc_y": "float32", "acc_z": "float32",
    "gyro_x": "float32","gyro_y": "float32","gyro_z": "float32",
    "azimuth": "float32", "pitch": "float32", "roll": "float32",
    "label": "category"
}
df = pd.read_csv("../data/raw/MobiAct_combined.csv", dtype=dtype_map)

In [4]:
df.head()

Unnamed: 0,timestamp,rel_time,acc_x,acc_y,acc_z,gyro_x,gyro_y,gyro_z,azimuth,pitch,roll,label,subject_id,trial
0,1295405261000,0.0,-1.407311,9.614395,-2.086666,-0.844216,0.40928,0.086437,92.746895,-36.879684,-11.741077,STD,10,1
1,1295410262000,0.005001,-1.406354,9.61296,-2.084512,-0.711047,0.346971,0.076358,92.20536,-37.470173,-11.839779,STD,10,1
2,1295415352000,0.010091,-1.40538,9.611498,-2.08232,-0.598953,0.093462,0.025045,91.74305,-38.09079,-11.880902,STD,10,1
3,1295420307000,0.015046,-1.404432,9.610076,-2.080186,-0.128893,-0.012828,-0.002443,91.267319,-38.842915,-11.933741,STD,10,1
4,1295425257000,0.019996,-1.403484,9.608654,-2.078054,0.04948,0.018326,0.016493,90.819679,-39.538643,-11.957446,STD,10,1


In [5]:
fall_labels = ['BSC', 'FKL', 'SDL', 'FOL']
post_fall   = ['LYI']
df['fall_label'] = df['label'].apply(
    lambda x: 'FALL' if x in fall_labels 
              else ('POST_FALL' if x in post_fall else 'ADL')
)

In [6]:
gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, test_idx = next(gss.split(df, df['fall_label'], groups=df['subject_id']))
df_train = df.iloc[train_idx].reset_index(drop=True)
df_test  = df.iloc[test_idx].reset_index(drop=True)

In [7]:
df_full = df_test[(df_test.subject_id==17) & (df_test.trial==3)]
df_full.to_csv("../app/samples/full_trace_subject17_trial3.csv", index=False)

In [6]:
def extract_windows(df, window_size=500, step_size=250):
    windows = []
    for start in range(0, len(df) - window_size + 1, step_size):
        win = df.iloc[start:start+window_size].copy()
        labels = win['fall_label'].unique()
        # only keep if it’s purely FALL or purely ADL
        if 'FALL' in labels and 'ADL' not in labels:
            win_label = 'FALL'
        elif 'ADL' in labels and 'FALL' not in labels:
            win_label = 'ADL'
        else:
            continue
        win['window_label'] = win_label
        windows.append(win)
    return windows

In [7]:
train_windows = extract_windows(df_train)
test_windows  = extract_windows(df_test)

In [11]:
output_base = "extracted_windows"
for split_name, windows in [("train", train_windows), ("test", test_windows)]:
    out_dir = os.path.join(output_base, split_name)
    os.makedirs(out_dir, exist_ok=True)
    # save first 5 windows (or len(windows) if fewer)
    for i, win_df in enumerate(windows[:50]):
        fn = f"window_{i}_{win_df['window_label'].iloc[0]}.csv"
        win_df.to_csv(os.path.join(out_dir, fn), index=False)

print(f"Saved {min(50,len(train_windows))} train windows and "
      f"{min(50,len(test_windows))} test windows under `{output_base}/`.")


Saved 50 train windows and 50 test windows under `extracted_windows/`.
