In [35]:
import numpy as np
import pandas as pd
import os
from scipy.signal import find_peaks

subjects = range(1, 22)  # GP1 to GP21
base_dir = r"C:\Users\opeye\Desktop\ALL GAIT DATA\ALL PARTICIPANTS"  # Update this to your folder path
speeds = np.round(np.arange(0.6, 1.8, 0.1), 1)  # Speeds from 0.6 to 1.7 m/s
sampling_rate_force = 1000
sampling_rate_marker = 200


In [36]:
# ========== HELPER FUNCTIONS ==========
def extract_gait_phase_features(fpz, sampling_rate=1000, threshold=20):
    on_ground = fpz > threshold
    transitions = np.diff(on_ground.astype(int))
    stance_starts = np.where(transitions == 1)[0]
    stance_ends = np.where(transitions == -1)[0]
    min_len = min(len(stance_starts), len(stance_ends))
    stance_starts, stance_ends = stance_starts[:min_len], stance_ends[:min_len]
    stance_times = (stance_ends - stance_starts) / sampling_rate
    swing_times = (stance_starts[1:] - stance_ends[:-1]) / sampling_rate
    impulses = [np.trapz(fpz[start:end], dx=1/sampling_rate) for start, end in zip(stance_starts, stance_ends)]
    return stance_times, swing_times[:len(stance_times)], impulses[:len(stance_times)]


In [37]:
def extract_marker_features(marker_data, step_indices, side='R', sampling_rate=200):
    x_col, y_col, z_col = f'{side}_FCC_x', f'{side}_FCC_y', f'{side}_FCC_z'
    stride_lengths, step_widths, vertical_displacements, velocities = [], [], [], []
    for i in range(len(step_indices) - 1):
        i1, i2 = step_indices[i], step_indices[i + 1]
        dx = marker_data[x_col].iloc[i2] - marker_data[x_col].iloc[i1]
        stride_lengths.append(abs(dx))
        y_left = marker_data['L_FCC_y'].iloc[i1]
        y_right = marker_data['R_FCC_y'].iloc[i1]
        step_widths.append(abs(y_right - y_left))
        z_range = marker_data[z_col].iloc[i1:i2].max() - marker_data[z_col].iloc[i1:i2].min()
        vertical_displacements.append(z_range)
        distance = np.sqrt((marker_data[x_col].iloc[i2] - marker_data[x_col].iloc[i1])**2 +
                           (marker_data[y_col].iloc[i2] - marker_data[y_col].iloc[i1])**2 +
                           (marker_data[z_col].iloc[i2] - marker_data[z_col].iloc[i1])**2)
        time = (i2 - i1) / sampling_rate
        velocities.append(distance / time if time > 0 else 0)
    return stride_lengths, step_widths, vertical_displacements, velocities

In [40]:
# ========== MAIN LOOP ==========
all_features = []

for subject in subjects:
    for speed in speeds:
        try:
            force_file = f"{base_dir}/GP{subject}_{speed}_force.csv"
            marker_file = f"{base_dir}/GP{subject}_{speed}_marker.csv"
            force_data = pd.read_csv(force_file)
            marker_data = pd.read_csv(marker_file)

            # Force signal processing
            fp1_z, fp2_z = force_data['FP1_z'], force_data['FP2_z']
            peaks_fp1, _ = find_peaks(fp1_z, height=100, distance=200)
            peaks_fp2, _ = find_peaks(fp2_z, height=100, distance=200)

            step_times_left = np.diff(peaks_fp1) / sampling_rate_force
            step_times_right = np.diff(peaks_fp2) / sampling_rate_force
            peak_force_left = fp1_z.iloc[peaks_fp1].values
            peak_force_right = fp2_z.iloc[peaks_fp2].values
            force_asymmetry = np.abs(peak_force_left[:len(peak_force_right)] - peak_force_right[:len(peak_force_left)])

            stance_left, swing_left, impulse_left = extract_gait_phase_features(fp1_z)
            stance_right, swing_right, impulse_right = extract_gait_phase_features(fp2_z)

            # Marker feature extraction
            marker_peaks_fp1 = np.clip((peaks_fp1 / 5).astype(int), 0, len(marker_data)-1)
            marker_peaks_fp2 = np.clip((peaks_fp2 / 5).astype(int), 0, len(marker_data)-1)
            stride_L, width_L, disp_L, vel_L = extract_marker_features(marker_data, marker_peaks_fp1, side='L')
            stride_R, width_R, disp_R, vel_R = extract_marker_features(marker_data, marker_peaks_fp2, side='R')

            # üõ† Fix: align all feature arrays to the same length
            feature_arrays = [
                step_times_left, step_times_right, force_asymmetry,
                stance_left, swing_left, impulse_left,
                stride_L, disp_L, vel_L,
                stride_R, width_R, disp_R, vel_R,
                peak_force_left, peak_force_right,
                stance_right, swing_right, impulse_right
            ]
            min_len = min(len(arr) for arr in feature_arrays)

            # Skip if data is too short
            if min_len < 5:
                print(f"Skipped Subject {subject}, Speed {speed}: not enough valid steps")
                continue

            # Build feature row
            df = pd.DataFrame({
                'step_time_left': step_times_left[:min_len],
                'step_time_right': step_times_right[:min_len],
                'force_asymmetry': force_asymmetry[:min_len],
                'stance_time_left': stance_left[:min_len],
                'swing_time_left': swing_left[:min_len],
                'impulse_left': impulse_left[:min_len],
                'stride_length_left': stride_L[:min_len],
                'vertical_disp_left': disp_L[:min_len],
                'foot_velocity_left': vel_L[:min_len],
                'peak_force_left': peak_force_left[:min_len],
                'peak_force_right': peak_force_right[:min_len],
                'step_time_diff': np.abs(step_times_left[:min_len] - step_times_right[:min_len]),
                'stance_time_right': stance_right[:min_len],
                'swing_time_right': swing_right[:min_len],
                'impulse_right': impulse_right[:min_len],
                'vertical_disp_right': disp_R[:min_len],
                'foot_velocity_right': vel_R[:min_len],
                'stride_length_right': stride_R[:min_len],
                'step_width': width_R[:min_len],
                'subject': subject,
                'speed': speed,
                'target': 0
            })

            all_features.append(df)
            print(f"‚úÖ Processed Subject {subject}, Speed {speed}")

        except Exception as e:
            print(f"‚ùå Skipped Subject {subject}, Speed {speed}: {e}")


‚úÖ Processed Subject 1, Speed 0.6
‚úÖ Processed Subject 1, Speed 0.7
‚úÖ Processed Subject 1, Speed 0.8
‚úÖ Processed Subject 1, Speed 0.9
‚úÖ Processed Subject 1, Speed 1.0
‚úÖ Processed Subject 1, Speed 1.1
‚úÖ Processed Subject 1, Speed 1.2
‚úÖ Processed Subject 1, Speed 1.3
‚úÖ Processed Subject 1, Speed 1.4
‚úÖ Processed Subject 1, Speed 1.5
‚úÖ Processed Subject 1, Speed 1.6
‚úÖ Processed Subject 1, Speed 1.7
‚ùå Skipped Subject 1, Speed 1.8: [Errno 2] No such file or directory: 'C:\\Users\\opeye\\Desktop\\ALL GAIT DATA\\ALL PARTICIPANTS/GP1_1.8_force.csv'
‚úÖ Processed Subject 2, Speed 0.6
‚úÖ Processed Subject 2, Speed 0.7
‚úÖ Processed Subject 2, Speed 0.8
‚úÖ Processed Subject 2, Speed 0.9
‚úÖ Processed Subject 2, Speed 1.0
‚úÖ Processed Subject 2, Speed 1.1
‚úÖ Processed Subject 2, Speed 1.2
‚úÖ Processed Subject 2, Speed 1.3
‚úÖ Processed Subject 2, Speed 1.4
‚úÖ Processed Subject 2, Speed 1.5
‚úÖ Processed Subject 2, Speed 1.6
‚úÖ Processed Subject 2, Speed 1.7
‚ùå Skippe

In [41]:

# Combine all and save
final_df = pd.concat(all_features, ignore_index=True)
final_df.to_csv("gait_features_all_subjects.csv", index=False)
print("Feature extraction completed and saved to gait_features_all_subjects.csv")


Feature extraction completed and saved to gait_features_all_subjects.csv


# Simulate Stroke-Like Gait

In [42]:
import numpy as np

# Select 40% of rows randomly to simulate stroke-like gait
np.random.seed(42)
anomaly_indices = np.random.choice(final_df.index, int(0.4 * len(final_df)), replace=False)

# Simulate abnormal gait by modifying key features
final_df.loc[anomaly_indices, 'step_time_left'] *= 1.4
final_df.loc[anomaly_indices, 'step_time_right'] *= 1.3
final_df.loc[anomaly_indices, 'step_time_diff'] *= 2.0

final_df.loc[anomaly_indices, 'force_asymmetry'] *= 1.5

final_df.loc[anomaly_indices, 'stride_length_left'] *= 0.7
final_df.loc[anomaly_indices, 'stride_length_right'] *= 0.75
final_df.loc[anomaly_indices, 'vertical_disp_left'] *= 0.6
final_df.loc[anomaly_indices, 'vertical_disp_right'] *= 0.6

final_df.loc[anomaly_indices, 'foot_velocity_left'] *= 0.7
final_df.loc[anomaly_indices, 'foot_velocity_right'] *= 0.7

# Mark these rows as stroke-like
final_df.loc[anomaly_indices, 'target'] = 1


In [43]:
final_df.to_csv("gait_features_all_subjects_with_stroke.csv", index=False)
print("‚úÖ Feature extraction and stroke simulation completed.")
print("üíæ Saved as: gait_features_all_subjects_with_stroke.csv")


‚úÖ Feature extraction and stroke simulation completed.
üíæ Saved as: gait_features_all_subjects_with_stroke.csv


In [None]:
len(final_df)