In [1]:
import sys
from pathlib import Path
import re

# Specify patient ID (change this to analyze different patients)
patient_id = "04"  # Extract just the number from 'patient01'

# Specify ADL event (change this to analyze different ADLs)
adl_event = "chair_to_bed_transfer"  # Options below:
# synchronization
# emptywc
# resting
# phone
# computer
# arm_raises  
# eating  
# hand_cycling  
# chair_to_bed_transfer  
# bed_to_chair_transfer  
# pressure_relief  
# laying_on_back  
# laying_on_right  
# laying_on_left  
# laying_on_stomach  
# assisted_propulsion  
# self_propulsion

# Add src to sys.path for imports
sys.path.insert(0, str(Path.cwd().parent / "src"))
from utils import get_project_root, get_data_path

# Set up paths using utility functions
project_root = get_project_root()
raw_data_dir = get_data_path("raw")

# Function to extract patient ID from folder name
def extract_patient_id(folder_name):
    """Extract patient ID from folder name like '20250626_112001_patient01_0.synchronization'"""
    match = re.search(r'patient(\d+)', folder_name)
    return match.group(1) if match else None

# Function to get folders for a specific patient
def get_patient_folders(patient_id):
    """Get all folders for a specific patient ID"""
    if not raw_data_dir.exists():
        return []
    
    patient_folders = []
    for folder in raw_data_dir.iterdir():
        if folder.is_dir():
            folder_patient_id = extract_patient_id(folder.name)
            if folder_patient_id == patient_id:
                patient_folders.append(folder)
    
    return sorted(patient_folders)



# List folders for the specified patient
if raw_data_dir.exists():
    patient_folders = get_patient_folders(patient_id)
    print(f"\nFound {len(patient_folders)} folders for patient{patient_id}:")
    for folder in patient_folders:
        print(f"  - {folder.name}")
else:
    print("Raw data directory not found!")


# 1. Filter for ADL event folders
event_folders = [folder for folder in patient_folders if adl_event in folder.name]

# 2. Sort them (by name, which usually encodes the timestamp)
event_folders = sorted(event_folders)

# 3. Construct the csv_dir for each ADL event
csv_dirs = []
for folder in event_folders:
    # Find the only subdirectory inside the event folder
    subdirs = [d for d in folder.iterdir() if d.is_dir()]
    if len(subdirs) == 1:
        csv_dir = subdirs[0]
        csv_dirs.append(str(csv_dir) + "/")  # Add trailing slash for consistency
    else:
        print(f"Warning: Expected 1 subdirectory in {folder}, found {len(subdirs)}")

# 4. Print the detected csv_dirs
print(f"Detected ADL event csv directories for '{adl_event}':")
for d in csv_dirs:
    print(d)

# Now you can use csv_dirs[0], csv_dirs[1], ... in your downstream code


Found 20 folders for patient04:
  - 20250630_142700_patient04_0.synchronization
  - 20250630_143329_patient04_1.emptywc
  - 20250630_143449_patient04_2.resting
  - 20250630_143605_patient04_3.phone
  - 20250630_143854_patient04_4.computer
  - 20250630_144118_patient04_5.arm_raises
  - 20250630_144240_patient04_6.eating
  - 20250630_144552_patient04_7.hand_cycling
  - 20250630_144704_patient04_8.chair_to_bed_transfer
  - 20250630_144730_patient04_9.bed_to_chair_transfer
  - 20250630_144806_patient04_8.chair_to_bed_transfer
  - 20250630_144832_patient04_9.bed_to_chair_transfer
  - 20250630_144924_patient04_10.pressure_relief
  - 20250630_145100_patient04_11.laying_on_back
  - 20250630_145212_patient04_13.laying_on_right
  - 20250630_145339_patient04_15.laying_on_left
  - 20250630_145512_patient04_16.laying_on_stomach
  - 20250630_145641_patient04_17.assisted_propulsion
  - 20250630_145743_patient04_18.self_propulsion
  - 20250630_150600_patient04_0.synchronization
Detected ADL event csv

In [2]:
from data_preprocessing.m5_parser import parse_m5_sensor_data
from data_preprocessing.polar_parser import parse_polar_sensor_data
from data_preprocessing.sensomative_parser import parse_sensomative_sensor_data

csv_dir = csv_dirs[0] # For synchronzation/chair_to_bed_transfer/bed_to_chair_transfer, you can use csv_dirs[1] also

m5_1_csv_file = csv_dir + "M5StickC_01_data.csv"
m5_wrist_l = parse_m5_sensor_data(m5_1_csv_file)

m5_2_csv_file = csv_dir + "M5StickC_02_data.csv"
m5_wrist_r = parse_m5_sensor_data(m5_2_csv_file)

m5_3_csv_file = csv_dir + "M5StickC_03_data.csv"
m5_wheel = parse_m5_sensor_data(m5_3_csv_file)

polar_csv_file = csv_dir + "polar_acc.csv"
polar_chest = parse_polar_sensor_data(polar_csv_file)

sensomative_csv_file = csv_dir + "pressure1.csv"
sensomative_bottom = parse_sensomative_sensor_data(sensomative_csv_file)

In [3]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np

# Sensor data and labels (using your new variable names)
sensor_data = [
    ("M5 Wrist L", m5_wrist_l, "linear_acceleration", ["linear_acceleration_x", "linear_acceleration_y", "linear_acceleration_z"], {"x": "tab:blue", "y": "tab:orange", "z": "tab:green"}),
    ("M5 Wrist L", m5_wrist_l, "angular_velocity", ["angular_velocity_x", "angular_velocity_y", "angular_velocity_z"], {"x": "tab:blue", "y": "tab:orange", "z": "tab:green"}),
    ("M5 Wrist R", m5_wrist_r, "linear_acceleration", ["linear_acceleration_x", "linear_acceleration_y", "linear_acceleration_z"], {"x": "tab:blue", "y": "tab:orange", "z": "tab:green"}),
    ("M5 Wrist R", m5_wrist_r, "angular_velocity", ["angular_velocity_x", "angular_velocity_y", "angular_velocity_z"], {"x": "tab:blue", "y": "tab:orange", "z": "tab:green"}),
    ("M5 Wheel", m5_wheel, "linear_acceleration", ["linear_acceleration_x", "linear_acceleration_y", "linear_acceleration_z"], {"x": "tab:blue", "y": "tab:orange", "z": "tab:green"}),
    ("M5 Wheel", m5_wheel, "angular_velocity", ["angular_velocity_x", "angular_velocity_y", "angular_velocity_z"], {"x": "tab:blue", "y": "tab:orange", "z": "tab:green"}),
    ("Polar Chest", polar_chest, "linear_acceleration", ["linear_acceleration_x", "linear_acceleration_y", "linear_acceleration_z"], {"x": "tab:blue", "y": "tab:orange", "z": "tab:green"}),
    ("Sensomative Bottom", sensomative_bottom, "pressure", [f"pressure_{i}" for i in range(12)], None),  # All 12 cells
]

# n_sensors = len(sensor_data)
# fig, axes = plt.subplots(n_sensors, 1, figsize=(14, 2.2 * n_sensors), sharex=True)

# if n_sensors == 1:
#     axes = [axes]

# for ax, (sensor_name, parsed, modality_key, cols, color_map) in zip(axes, sensor_data):
#     if modality_key in parsed:
#         df = parsed[modality_key]
#         if sensor_name != "Sensomative Bottom":
#             for axis, col in zip(["x", "y", "z"], cols):
#                 if col in df.columns:
#                     ax.plot(df["datetime"], df[col], label=axis, color=color_map[axis], linewidth=1)
#             ax.legend(title="Axis", loc="upper right", fontsize="small")
#         else:
#             # Use a colormap for 12 pressure cells (updated for matplotlib >=3.7)
#             colors = plt.get_cmap('tab20', 12)
#             for i, col in enumerate(cols):
#                 if col in df.columns:
#                     ax.plot(df["datetime"], df[col], label=f"cell_{i}", color=colors(i), linewidth=1)
#             ax.legend(title="Cell", loc="upper right", fontsize="x-small", ncol=4)
#         ax.set_ylabel(sensor_name)
#     else:
#         ax.set_ylabel(sensor_name)
#         ax.text(0.5, 0.5, "No data", ha="center", va="center", transform=ax.transAxes)
#     ax.grid(True, linestyle=":", alpha=0.5)
#     # Add modality as subplot title
#     ax.set_title(modality_key)

# # Set x-ticks every 1 second and format
# axes[-1].xaxis.set_major_locator(mdates.SecondLocator(interval=2))
# axes[-1].xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))

# for label in axes[-1].get_xticklabels():
#     label.set_rotation(30)
#     label.set_horizontalalignment('right')
#     label.set_fontsize(8)

# axes[-1].set_xlabel("Time")
# plt.suptitle(f"Sensor Data: Linear Acceleration, Angular Velocity, and Sensomative Pressure\nADL event: {adl_event}\nPatient{patient_id}", y=1.02)
# plt.tight_layout(h_pad=0.2)
# plt.show()

In [4]:
import pandas as pd
import numpy as np
from tsfresh import extract_features, select_features
from tsfresh.feature_extraction import MinimalFCParameters
from tsfresh.feature_extraction.settings import from_columns


window_size = 4  # in seconds, or number of samples if not using datetime
step_size = 2    # in seconds, or number of samples


tsfresh_har_features = {
    "mean": None,
    "standard_deviation": None,
    "variance": None,
    "minimum": None,
    "maximum": None,
    "median": None,
    "skewness": None,
    "kurtosis": None,
    "absolute_sum_of_changes": None,
    "mean_abs_change": None,
    "mean_change": None,
    "longest_strike_above_mean": None,
    "longest_strike_below_mean": None,
    "autocorrelation": [{"lag": 1}],
    "cid_ce": [{"normalize": True}],
    "binned_entropy": [{"max_bins": 5}],
    "sample_entropy": None,
    "number_peaks": [{"n": 1}],
    "count_above_mean": None,
    "count_below_mean": None,
    "abs_energy": None,
    "fft_coefficient": [{"coeff": 1, "attr": "abs"}],
    "spkt_welch_density": [{"coeff": 1}],
    "agg_autocorrelation": [{"f_agg": "mean", "maxlag": 3}],
    "number_crossing_m": [{"m": 0}],
}


# --- Helper: Gather all time series into a single DataFrame ---
def gather_all_timeseries(sensor_data):
    dfs = []
    for sensor_name, parsed, modality_key, cols, _ in sensor_data:
        if modality_key in parsed:
            df = parsed[modality_key].copy()
            for col in cols:
                if col in df.columns:
                    dfs.append(
                        df[["datetime", col]].rename(columns={col: "value"}).assign(
                            sensor=sensor_name,
                            modality=modality_key,
                            channel=col
                        )
                    )
    all_df = pd.concat(dfs, ignore_index=True)
    return all_df

all_df = gather_all_timeseries(sensor_data)
all_df = all_df.sort_values("datetime").reset_index(drop=True)

# --- Windowing ---
all_df["timestamp"] = (all_df["datetime"] - all_df["datetime"].min()).dt.total_seconds()
all_df["window_id"] = (all_df["timestamp"] // step_size).astype(int)

# --- Prepare for tsfresh ---
all_df["id"] = (
    all_df["window_id"].astype(str) + "@" +
    all_df["sensor"] + "@" +
    all_df["modality"] + "@" +
    all_df["channel"]
)

# --- Feature Extraction ---
features = extract_features(
    all_df,
    column_id="id",
    column_sort="timestamp",
    column_value="value",
    default_fc_parameters=tsfresh_har_features,
    n_jobs=20,
    disable_progressbar=False
)

# --- Reshape: Each row = one window, columns = sensor@modality@channel|feature ---
# Split the id to get window_id and the rest
feature_rows = []
window_ids = sorted(all_df["window_id"].unique())
for window_id in window_ids:
    # Find all ids for this window
    prefix = f"{window_id}@"
    row = {}
    for idx in features.index:
        if idx.startswith(prefix):
            parts = idx.split("@")
            sensor, modality, channel = parts[1], parts[2], parts[3]
            for feat in features.columns:
                colname = f"{sensor}@{modality}@{channel}@{feat}"
                row[colname] = features.loc[idx, feat]
    feature_rows.append(row)

features_df = pd.DataFrame(feature_rows)
features_df.insert(0, "window_id", window_ids)

# --- Add patient and ADL class columns ---
# You need to provide the patient and ADL class for each window.
# For example, if you have variables patient_id and adl_class:
features_df["patient"] = patient_id  # Replace with your variable or logic
features_df["ADL_class"] = adl_event # Replace with your variable or logic

# --- Done! ---
print("Extracted features shape:", (features_df.shape[0], features_df.shape[1]-3))
# display(features_df.head())

Feature Extraction: 100%|██████████| 91/91 [00:00<00:00, 156.18it/s]


Extracted features shape: (11, 825)


In [5]:
# print(list(features_df.columns))
for col in features_df.columns:
    print(col)
    

window_id
M5 Wheel@angular_velocity@angular_velocity_x@value__mean
M5 Wheel@angular_velocity@angular_velocity_x@value__standard_deviation
M5 Wheel@angular_velocity@angular_velocity_x@value__variance
M5 Wheel@angular_velocity@angular_velocity_x@value__minimum
M5 Wheel@angular_velocity@angular_velocity_x@value__maximum
M5 Wheel@angular_velocity@angular_velocity_x@value__median
M5 Wheel@angular_velocity@angular_velocity_x@value__skewness
M5 Wheel@angular_velocity@angular_velocity_x@value__kurtosis
M5 Wheel@angular_velocity@angular_velocity_x@value__absolute_sum_of_changes
M5 Wheel@angular_velocity@angular_velocity_x@value__mean_abs_change
M5 Wheel@angular_velocity@angular_velocity_x@value__mean_change
M5 Wheel@angular_velocity@angular_velocity_x@value__longest_strike_above_mean
M5 Wheel@angular_velocity@angular_velocity_x@value__longest_strike_below_mean
M5 Wheel@angular_velocity@angular_velocity_x@value__autocorrelation__lag_1
M5 Wheel@angular_velocity@angular_velocity_x@value__cid_ce__n