In [2]:
# Load the uploaded force data file
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import find_peaks

file_path = r"C:\Users\opeye\Desktop\ALL GAIT DATA\SUBJECT 1\GP1_0.8_force.csv"
force_data = pd.read_csv(file_path)

# Display the first few rows
print(force_data.head())

      FP1_x      FP2_x      FP1_y      FP2_y       FP1_z      FP2_z
0  8.579960 -11.563360 -25.844818 -14.817612  758.271912  13.269334
1  8.738980 -11.459427 -25.997309 -14.683493  758.036011  12.880083
2  8.896370 -11.337660 -26.163317 -14.522822  757.782715  12.474997
3  9.052503 -11.199483 -26.341640 -14.337643  757.513428  12.055962
4  9.207713 -11.046248 -26.531141 -14.129924  757.229675  11.624811


In [4]:
# Extract vertical ground reaction forces
fp1_z = force_data['FP1_z']  # Left foot
fp2_z = force_data['FP2_z']  # Right foot

# Find heel strikes using peak detection on vertical force
# Only consider forces above 100 N to avoid noise
peaks_fp1, _ = find_peaks(fp1_z, height=100, distance=200)
peaks_fp2, _ = find_peaks(fp2_z, height=100, distance=200)


In [5]:
# STEP TIMING OF SUBJECT 1
step_times_left = pd.Series(peaks_fp1).diff().dropna() / 1000  # in seconds
step_times_left
import numpy as np

# Calculate step timing (in seconds) for both feet
step_times_left = np.diff(peaks_fp1) / 1000  # since sampling rate is 1000 Hz
step_times_right = np.diff(peaks_fp2) / 1000

# Calculate peak forces at heel strikes
peak_forces_left = fp1_z.iloc[peaks_fp1].values
peak_forces_right = fp2_z.iloc[peaks_fp2].values

# Calculate symmetry features (using length-limited versions to match lengths)
min_len = min(len(step_times_left), len(step_times_right), len(peak_forces_left), len(peak_forces_right))
features_df = pd.DataFrame({
    'step_time_left': step_times_left[:min_len],
    'step_time_right': step_times_right[:min_len],
    'peak_force_left': peak_forces_left[:min_len],
    'peak_force_right': peak_forces_right[:min_len],
    'step_time_diff': np.abs(step_times_left[:min_len] - step_times_right[:min_len]),
    'force_asymmetry': np.abs(peak_forces_left[:min_len] - peak_forces_right[:min_len])
})

In [6]:
import numpy as np

def extract_gait_phase_features(fpz, sampling_rate=1000, threshold=20):
    """
    Extract stance time, swing time, and impulse from vertical force (FPz) signal.
    
    Parameters:
        fpz (array-like): Vertical ground reaction force for one foot (e.g. FP1_z)
        sampling_rate (int): Sampling frequency in Hz (default 1000)
        threshold (float): Force threshold to detect contact (default 20 N)
    
    Returns:
        stance_times (np.array): Duration of stance phases in seconds
        swing_times (np.array): Duration of swing phases in seconds
        impulses (np.array): Force impulse during stance (Ns)
    """
    
    on_ground = fpz > threshold
    transitions = np.diff(on_ground.astype(int))

    # Detect start and end of stance phases
    stance_starts = np.where(transitions == 1)[0]
    stance_ends   = np.where(transitions == -1)[0]

    # Ensure equal number of starts and ends
    min_len = min(len(stance_starts), len(stance_ends))
    stance_starts = stance_starts[:min_len]
    stance_ends = stance_ends[:min_len]

    # Compute stance times
    stance_times = (stance_ends - stance_starts) / sampling_rate

    # Compute swing times (between consecutive stance phases)
    swing_times = (stance_starts[1:] - stance_ends[:-1]) / sampling_rate

    # Compute impulse for each stance phase
    impulses = []
    for start, end in zip(stance_starts, stance_ends):
        impulse = np.trapz(fpz[start:end], dx=1/sampling_rate)
        impulses.append(impulse)
    impulses = np.array(impulses)

    return stance_times, swing_times, impulses


In [7]:
# Left foot (FP1_z)
stance_left, swing_left, impulse_left = extract_gait_phase_features(fp1_z)

# Right foot (FP2_z)
stance_right, swing_right, impulse_right = extract_gait_phase_features(fp2_z)


In [8]:
min_len = min(len(features_df),
              len(stance_left), len(swing_left), len(impulse_left),
              len(stance_right), len(swing_right), len(impulse_right))

features_df = features_df.iloc[:min_len].copy()
features_df['stance_time_left'] = stance_left[:min_len]
features_df['swing_time_left'] = swing_left[:min_len]
features_df['impulse_left'] = impulse_left[:min_len]
features_df['stance_time_right'] = stance_right[:min_len]
features_df['swing_time_right'] = swing_right[:min_len]
features_df['impulse_right'] = impulse_right[:min_len]


In [9]:
features_df.head()

Unnamed: 0,step_time_left,step_time_right,peak_force_left,peak_force_right,step_time_diff,force_asymmetry,stance_time_left,swing_time_left,impulse_left,stance_time_right,swing_time_right,impulse_right
0,0.899,0.386,843.106689,808.012573,0.513,35.094116,-0.443,1.82,0.0,0.852,0.493,500.868937
1,0.406,0.957,776.911133,785.990967,0.551,9.079834,-0.489,1.841,0.0,0.885,0.478,535.590799
2,0.982,0.378,804.047974,783.152161,0.604,20.895813,-0.465,1.825,0.0,0.904,0.472,530.582168
3,0.432,0.995,775.748108,780.385986,0.563,4.637878,-0.483,1.832,0.0,0.889,0.459,545.180584
4,0.949,0.39,805.030823,776.606934,0.559,28.423889,-0.461,1.828,0.0,0.866,0.488,523.873655


In [10]:
# Load the uploaded marker data file
file_path = r"C:\Users\opeye\Desktop\ALL GAIT DATA\SUBJECT 1\GP1_0.8_marker.csv"
marker_data = pd.read_csv(file_path)

# Display the first few rows
print(marker_data.head())


    L_FCC_x   L_FM1_x   L_FM2_x   L_FM5_x   R_FCC_x   R_FM1_x   R_FM2_x  \
0  0.570443  0.779472  0.781610  0.764915  0.752671  0.963455  0.961456   
1  0.567284  0.776336  0.778602  0.761761  0.759216  0.970223  0.968129   
2  0.563920  0.772992  0.775377  0.758397  0.766173  0.977402  0.975211   
3  0.560371  0.769458  0.771950  0.754845  0.773495  0.984937  0.982647   
4  0.556662  0.765758  0.768342  0.751126  0.781122  0.992765  0.990378   

    R_FM5_x   L_FCC_y   L_FM1_y  ...   R_FM2_y   R_FM5_y   L_FCC_z   L_FM1_z  \
0  0.931390  0.561532  0.535632  ...  0.414943  0.369149  0.081882  0.064970   
1  0.938391  0.561540  0.535614  ...  0.414677  0.368786  0.081948  0.064903   
2  0.945811  0.561546  0.535597  ...  0.414398  0.368410  0.082017  0.064828   
3  0.953592  0.561546  0.535579  ...  0.414108  0.368026  0.082089  0.064745   
4  0.961668  0.561541  0.535561  ...  0.413811  0.367639  0.082163  0.064655   

    L_FM2_z   L_FM5_z   R_FCC_z   R_FM1_z   R_FM2_z   R_FM5_z  
0  0

In [11]:
import numpy as np

def extract_marker_features(marker_data, step_indices, side='R', sampling_rate=200):
    """
    Extract stride length, step width, vertical displacement, and foot velocity
    from marker data for one foot side ('L' or 'R').
    
    Parameters:
        marker_data (DataFrame): Marker positions (L_FCC_x, L_FCC_y, etc.)
        step_indices (list or array): Frame indices of heel strikes (e.g., from force data)
        side (str): 'L' for left or 'R' for right foot
        sampling_rate (int): Marker sampling rate in Hz (default 200)

    Returns:
        stride_lengths (np.array)
        step_widths (np.array)
        vertical_displacements (np.array)
        velocities (np.array)
    """
    x_col = f'{side}_FCC_x'
    y_col = f'{side}_FCC_y'
    z_col = f'{side}_FCC_z'

    stride_lengths = []
    step_widths = []
    vertical_displacements = []
    velocities = []

    for i in range(len(step_indices) - 1):
        i1, i2 = step_indices[i], step_indices[i + 1]

        # Stride Length (x-direction)
        dx = marker_data[x_col].iloc[i2] - marker_data[x_col].iloc[i1]
        stride_lengths.append(abs(dx))

        # Step Width (y-direction, between L and R at same time)
        if side == 'R':
            y_left = marker_data['L_FCC_y'].iloc[i1]
            y_right = marker_data['R_FCC_y'].iloc[i1]
        else:
            y_left = marker_data['L_FCC_y'].iloc[i1]
            y_right = marker_data['R_FCC_y'].iloc[i1]
        step_widths.append(abs(y_right - y_left))

        # Vertical Displacement (z-range between steps)
        z_range = marker_data[z_col].iloc[i1:i2].max() - marker_data[z_col].iloc[i1:i2].min()
        vertical_displacements.append(z_range)

        # Velocity (distance / time)
        distance = np.sqrt(
            (marker_data[x_col].iloc[i2] - marker_data[x_col].iloc[i1])**2 +
            (marker_data[y_col].iloc[i2] - marker_data[y_col].iloc[i1])**2 +
            (marker_data[z_col].iloc[i2] - marker_data[z_col].iloc[i1])**2
        )
        time = (i2 - i1) / sampling_rate
        velocities.append(distance / time if time > 0 else 0)

    return (np.array(stride_lengths),
            np.array(step_widths),
            np.array(vertical_displacements),
            np.array(velocities))


In [12]:
# Convert force indices to marker indices
peaks_fp2_marker = (peaks_fp2 / 5).astype(int)
peaks_fp1_marker = (peaks_fp1 / 5).astype(int)


In [13]:
# For right foot
stride_R, width_R, zdisp_R, vel_R = extract_marker_features(marker_data, peaks_fp2_marker, side='R')

# For left foot
stride_L, width_L, zdisp_L, vel_L = extract_marker_features(marker_data, peaks_fp1_marker, side='L')


In [14]:
max_index = len(marker_data) - 1
peaks_fp2_marker = np.clip(peaks_fp2_marker, 0, max_index)
peaks_fp1_marker = np.clip(peaks_fp1_marker, 0, max_index)


In [16]:
min_len = min(len(features_df), len(stride_L), len(zdisp_L), len(vel_L), len(stride_R))

features_df = features_df.iloc[:min_len].copy()
features_df['stride_length_left'] = stride_L[:min_len]
features_df['stride_length_right'] = stride_R[:min_len]
features_df['step_width'] = width_R[:min_len]  # shared value
features_df['vertical_disp_left'] = zdisp_L[:min_len]
features_df['vertical_disp_right'] = zdisp_R[:min_len]
features_df['foot_velocity_left'] = vel_L[:min_len]
features_df['foot_velocity_right'] = vel_R[:min_len]
features_df['target'] = 0  # initially mark all as healthy
features_df.head()


Unnamed: 0,step_time_left,step_time_right,peak_force_left,peak_force_right,step_time_diff,force_asymmetry,stance_time_left,swing_time_left,impulse_left,stance_time_right,swing_time_right,impulse_right,stride_length_left,stride_length_right,step_width,vertical_disp_left,vertical_disp_right,foot_velocity_left,foot_velocity_right,target
0,0.899,0.386,843.106689,808.012573,0.513,35.094116,-0.443,1.82,0.0,0.852,0.493,500.868937,0.365202,0.314396,0.093273,0.222142,0.016576,0.406036,0.817861,0
1,0.406,0.957,776.911133,785.990967,0.551,9.079834,-0.489,1.841,0.0,0.885,0.478,535.590799,0.331912,0.360721,0.104885,0.018828,0.209718,0.820999,0.378281,0
2,0.982,0.378,804.047974,783.152161,0.604,20.895813,-0.465,1.825,0.0,0.904,0.472,530.582168,0.3245,0.311836,0.081727,0.219465,0.013464,0.332391,0.821476,0
3,0.432,0.995,775.748108,780.385986,0.563,4.637878,-0.483,1.832,0.0,0.889,0.459,545.180584,0.357742,0.286006,0.120947,0.019713,0.208221,0.823803,0.288242,0
4,0.949,0.39,805.030823,776.606934,0.559,28.423889,-0.461,1.828,0.0,0.866,0.488,523.873655,0.308175,0.320884,0.102005,0.220906,0.011543,0.325678,0.823372,0


# Modify Their Feature Values (Simulate Stroke Patterns)


In [17]:
# generating synthetic anomalies, you would similarly pick some normal rows and tweak features,
import numpy as np

# Define percentage of rows to simulate
n_anomalies = int(0.3 * len(features_df))

# Select random rows to simulate
np.random.seed(42)
anomaly_indices = np.random.choice(features_df.index, n_anomalies, replace=False)

# Increase step time and step time difference
features_df.loc[anomaly_indices, 'step_time_left'] *= 1.4
features_df.loc[anomaly_indices, 'step_time_right'] *= 1.3
features_df.loc[anomaly_indices, 'step_time_diff'] *= 2.0

# Boost force asymmetry
features_df.loc[anomaly_indices, 'force_asymmetry'] *= 1.5

# Reduce stride length and vertical displacement
features_df.loc[anomaly_indices, 'stride_length_left'] *= 0.7
features_df.loc[anomaly_indices, 'stride_length_right'] *= 0.75
features_df.loc[anomaly_indices, 'vertical_disp_left'] *= 0.6
features_df.loc[anomaly_indices, 'vertical_disp_right'] *= 0.6

# Reduce foot velocity (dragging foot)
features_df.loc[anomaly_indices, 'foot_velocity_left'] *= 0.7
features_df.loc[anomaly_indices, 'foot_velocity_right'] *= 0.7


In [18]:
features_df.loc[anomaly_indices, 'target'] = 1
features_df.head()

Unnamed: 0,step_time_left,step_time_right,peak_force_left,peak_force_right,step_time_diff,force_asymmetry,stance_time_left,swing_time_left,impulse_left,stance_time_right,swing_time_right,impulse_right,stride_length_left,stride_length_right,step_width,vertical_disp_left,vertical_disp_right,foot_velocity_left,foot_velocity_right,target
0,0.899,0.386,843.106689,808.012573,0.513,35.094116,-0.443,1.82,0.0,0.852,0.493,500.868937,0.365202,0.314396,0.093273,0.222142,0.016576,0.406036,0.817861,0
1,0.406,0.957,776.911133,785.990967,0.551,9.079834,-0.489,1.841,0.0,0.885,0.478,535.590799,0.331912,0.360721,0.104885,0.018828,0.209718,0.820999,0.378281,0
2,0.982,0.378,804.047974,783.152161,0.604,20.895813,-0.465,1.825,0.0,0.904,0.472,530.582168,0.3245,0.311836,0.081727,0.219465,0.013464,0.332391,0.821476,0
3,0.432,0.995,775.748108,780.385986,0.563,4.637878,-0.483,1.832,0.0,0.889,0.459,545.180584,0.357742,0.286006,0.120947,0.019713,0.208221,0.823803,0.288242,0
4,1.3286,0.507,805.030823,776.606934,1.118,42.635834,-0.461,1.828,0.0,0.866,0.488,523.873655,0.215723,0.240663,0.102005,0.132544,0.006926,0.227975,0.576361,1


In [19]:
# Save to CSV
output_file = 'gait_feature_matrix_with_simulated_stroke_subject1_0.8.csv'
features_df.to_csv(output_file, index=False)

output_file


'gait_feature_matrix_with_simulated_stroke_subject1_0.8.csv'