In [1]:
from matplotlib import pyplot as plt
import numpy as np
import cv2
import os
from tqdm import tqdm
import tensorflow_hub as hub
import tensorflow as tf
import re

# only use the first GPU if there are multiple
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=1

# limit jax and TF from consuming all GPU memory
%env XLA_PYTHON_CLIENT_PREALLOCATE=false

# List available GPUs in TensorFlow
gpus = tf.config.list_physical_devices("GPU")

if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices("GPU")
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
        
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)


# load metrab model 
model = hub.load('https://bit.ly/metrabs_l')  # Takes about 3 minutes
skeleton = 'mpi_inf_3dhp_17'

# load gait transformer model 
from gait_transformer.gait_phase_transformer import load_default_model, get_gait_phase_stride_transformer, gait_phase_stride_inference
from gait_transformer.gait_phase_kalman import gait_kalman_smoother, compute_phases, get_event_times
from tensorflow import keras

pos_divider = 2
transformer_model = load_default_model(pos_divider=pos_divider)

# change joint order 
joint_names = np.array(['htop', 'neck', 'rsho', 'relb', 'rwri', 'lsho', 'lelb', 'lwri',
       'rhip', 'rkne', 'rank', 'lhip', 'lkne', 'lank', 'pelv', 'spin',
       'head'])
# this is the order of joints from the Gast-NET algorithm that the gait transformer was originally trained on
expected_order = ['pelv', 'rhip', 'rkne', 'rank', 'lhip', 'lkne', 'lank', 'spin', 'neck', 'head', 'htop', 'lsho', 'lelb', 'lwri', 'rsho', 'relb', 'rwri']
expected_order_idx = np.array([joint_names.tolist().index(j) for j in expected_order])

2025-05-26 13:24:56.208177: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-26 13:24:56.223050: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748280296.240309  166816 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748280296.245684  166816 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-26 13:24:56.263835: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=1
env: XLA_PYTHON_CLIENT_PREALLOCATE=false
1 Physical GPUs, 1 Logical GPUs


I0000 00:00:1748280298.795650  166816 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 28448 MB memory:  -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:73:00.0, compute capability: 8.6
2025-05-26 13:26:19.052707: W external/xla/xla/service/gpu/nvptx_compiler.cc:765] The NVIDIA driver's CUDA version is 12.4 which is older than the ptxas CUDA version (12.5.82). Because the driver is older than the ptxas version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.


In [4]:
# Main code 

# set up
height_cm = 166 
# Set up directories
input_dir = "./DEMO"

Output_dir = os.path.join(os.path.dirname(input_dir), "Results")
output_dir = os.path.join(os.path.dirname(input_dir), "Results", "mirror_video")
# Make sure the directory exists
os.makedirs(output_dir, exist_ok=True)

# List all .mp4 files in input folder
file_names = [f for f in os.listdir(input_dir) if f.endswith(".mp4")]
file_names.sort(key=extract_parts)

# Track already processed files
processed_files = {os.path.splitext(f)[0] for f in os.listdir(output_dir) if f.endswith(".mp4")}

# Loop through and mirror videos
for file_name in file_names:
    video_name = os.path.splitext(file_name)[0]
    if video_name in processed_files:
        print(f"Skipping {file_name} — already processed.")
        continue

    input_path = os.path.join(input_dir, file_name)
    output_path = os.path.join(output_dir, file_name)
    mirror_video(input_path, output_path)


# Run video processing 
run_full_gait_processing_pipeline(
    video_directory=input_dir,
    keypoints_output_dir=os.path.join(Output_dir, "keypoints"),
    gait_json_output_dir=os.path.join(Output_dir, "JSON"),
    final_excel_path=os.path.join(Output_dir, "gaitevents_video.xlsx"),
    model=model,
    skeleton=skeleton,
    video_reader=video_reader,
    transformer_model=transformer_model,
    height_mm=height_cm * 10,
    expected_order_idx=expected_order_idx,
    extract_parts=extract_parts,
    convert_to_int=convert_to_int,
    shift_invalid_rows=shift_invalid_rows
)


run_full_gait_processing_pipeline(
    video_directory=output_dir,
    keypoints_output_dir=os.path.join(Output_dir, "mirror_keypoints"),
    gait_json_output_dir=os.path.join(Output_dir, "mirror_JSON"),
    final_excel_path=os.path.join(Output_dir, "gaitevents_mirror.xlsx"),
    model=model,
    skeleton=skeleton,
    video_reader=video_reader,
    transformer_model=transformer_model,
    height_mm=height_cm * 10,
    expected_order_idx=expected_order_idx,
    extract_parts=extract_parts,
    convert_to_int=convert_to_int,
    shift_invalid_rows=shift_invalid_rows
)


# Run feature extraction 
import numpy as np
import pandas as pd
import os
import pandas as pd

# Load your file
df_video = pd.read_excel(os.path.join(Output_dir, "gaitevents_video.xlsx"))
df_mirror = pd.read_excel(os.path.join(Output_dir, "gaitevents_mirror.xlsx"))

# filter 
df_video_filtered = filter_numeric_columns(df_video, threshold=12)
df_mirror_filtered = filter_numeric_columns(df_mirror, threshold=12)


df_video_results = analyze_gait_video_features(
    df_video=df_video_filtered,
    keypoints_dir=os.path.join(Output_dir, "keypoints"),
    expected_order_idx=expected_order_idx,
    extract_parts=extract_parts
)


df_mirror_results = analyze_gait_video_features(
    df_video=df_mirror_filtered,
    keypoints_dir=os.path.join(Output_dir, "mirror_keypoints"),
    expected_order_idx=expected_order_idx,
    extract_parts=extract_parts
)


## swap mirror file columns 
# Get the current column names
columns = df_mirror_results.columns.tolist()
# Define the index pairs to swap
swap_pairs = [(8,9), (10, 11), (12, 13), (14,15)]
# Swap the column names
for i, j in swap_pairs:
    columns[i], columns[j] = columns[j], columns[i]
# Assign the new column names back
df_mirror_results.columns = columns

# average 
# Merge on trial name (assumes the first column is trial name)
merged = pd.merge(df_video_results, df_mirror_results, on='trial_name', suffixes=('_video', '_mirror'))
# Identify numeric columns to average (exclude 'trial_name')
numeric_cols = df_video_results.select_dtypes(include='number').columns
# Create a new DataFrame for averaged results
df_avg = pd.DataFrame()
df_avg['trial_name'] = merged['trial_name']
# Average each numeric column
for col in numeric_cols:
    col_video = f"{col}_video"
    col_mirror = f"{col}_mirror"
    df_avg[col] = (merged[col_video] + merged[col_mirror]) / 2


# Save to Excel
df_avg.to_excel(os.path.join(Output_dir, "feature_results.xlsx"), index=False)
print("Excel file 'feature_results.xlsx' has been saved successfully.")

Mirrored video saved as ../Downloads/Results/mirror_video/demo.mp4
Step 1: Extracting pose keypoints from video files...


Processing demo.mp4: 0it [00:00, ?it/s]2025-05-26 13:30:08.725586: E tensorflow/core/util/util.cc:131] oneDNN supports DT_HALF only on platforms with AVX-512. Falling back to the default Eigen-based implementation if present.
I0000 00:00:1748280611.477897  382166 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1748280613.446371  382124 cuda_solvers.cc:178] Creating GpuSolver handles for stream 0xfead710
Processing demo.mp4: 34it [00:55,  1.62s/it]


Processed demo.mp4 and saved keypoints to demo.npy
Step 2: Running Gait Transformer model on extracted keypoints...


2it [00:00,  2.00it/s]


Processed and saved: ../Downloads/Results/JSON/demo_gait_events_L60.json
Step 3: Arranging gait events and exporting to Excel...
Saved arranged gait events to ../Downloads/Results/gaitevents_video.xlsx
✅ Full gait processing pipeline complete.
Step 1: Extracting pose keypoints from video files...


Processing demo.mp4: 34it [00:26,  1.28it/s]


Processed demo.mp4 and saved keypoints to demo.npy
Step 2: Running Gait Transformer model on extracted keypoints...


2it [00:00,  9.58it/s]


Processed and saved: ../Downloads/Results/mirror_JSON/demo_gait_events_L60.json
Step 3: Arranging gait events and exporting to Excel...
Saved arranged gait events to ../Downloads/Results/gaitevents_mirror.xlsx
✅ Full gait processing pipeline complete.
Excel file 'feature_results.xlsx' has been saved successfully.


In [3]:
# Function to convert values to integers
def convert_to_int(data):
    int_data = {}
    for key, values in data.items():
        int_data[key] = [int(value) for value in values]
    return int_data

def extract_parts(filename):
    match = re.match(r'(Diz|Val)_(\d+)_T(\d+)', filename)
    if match:
        prefix = match.group(1)
        part1 = int(match.group(2))
        part2 = int(match.group(3))
        # Use a sorting key that puts "Diz" (which is 0) before "Val" (which is 1)
        prefix_order = 0 if prefix == "Diz" else 1
        return (prefix_order, part1, part2)
    return (float('inf'), float('inf'), float('inf'))  # Unmatched files last


# read video and get keypoints 
def video_reader(filename: str, batch_size: int = 8, width=320):

    cap = cv2.VideoCapture(filename)

    frames = []
    while True:

        ret, frame = cap.read()

        if ret is False:
            
            if len(frames) > 0:
                frames = np.array(frames)
                yield frames

            cap.release()
            return

        else:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            if width is not None:
                # downsample to keep the aspect ratio and output the specified width
                scale = width / frame.shape[1]
                height = int(frame.shape[0] * scale)
                frame = cv2.resize(frame, (width, height))
            
            frames.append(frame)

            if len(frames) >= batch_size:
                frames = np.array(frames)
                yield frames
                
                frames = []

    
def shift_invalid_rows(data):
    # Get the first column values
    first_col = data[:, 0]
    reference_value = first_col[0]  # First value as reference    # Shift rows where the first column value is greater than the reference value
    for i in range(1, len(first_col)):
        if first_col[i] > reference_value:
            data[i] = np.hstack(([np.nan], data[i, :-1]))    
    return data

# mirror video function 
def mirror_video(input_path, output_path):
    """
    Mirrors a single video horizontally and saves the output.
    
    Parameters:
        input_path (str): Path to the input video file.
        output_path (str): Path to save the mirrored video.
    """
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        raise IOError(f"Cannot open video: {input_path}")

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')

    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        flipped_frame = cv2.flip(frame, 1)
        out.write(flipped_frame)

    cap.release()
    out.release()
    print(f"Mirrored video saved as {output_path}")


# pose estimation function 
def process_video_for_keypoints(
    file_name,
    directory_path,
    output_dir,
    model,
    skeleton,
    video_reader,
    processed_files=None
):
    """
    Processes a video to extract 3D pose keypoints if exactly one person is detected per frame.
    Saves the pose data as a .npy file.

    Parameters:
        file_name (str): Name of the video file.
        directory_path (str): Directory where the input video is located.
        output_dir (str): Directory to save the output .npy file.
        model: Pose detection model with .detect_poses_batched().
        skeleton: Skeleton structure used by the model.
        video_reader: Function to read video frames in batches.
        processed_files (set): Set of already processed file base names (without extension).
    """
    video_name = os.path.splitext(file_name)[0]

    if processed_files and video_name in processed_files:
        print(f"Skipping {file_name} — already processed.")
        return

    video_filepath = os.path.join(directory_path, file_name)
    vid = video_reader(video_filepath, width=None)

    multiple_people_detected = False
    nonvalid_pose_detected = False
    accumulated = None

    for i, frame_batch in tqdm(enumerate(vid), desc=f"Processing {file_name}"):
        pred = model.detect_poses_batched(frame_batch, skeleton=skeleton)

        if accumulated is None:
            accumulated = pred
        else:
            for key in accumulated.keys():
                accumulated[key] = tf.concat([accumulated[key], pred[key]], axis=0)

        num_people = [p.shape[0] for p in accumulated['poses2d']]

        if len(set(num_people)) > 1:
            multiple_people_detected = True

        if any(n == 0 for n in num_people):
            nonvalid_pose_detected = True
            break

    if multiple_people_detected:
        print(f"2 - {file_name} has multiple people detected.")
        

    if nonvalid_pose_detected:
        print(f"Skipping {file_name} — one or more frames have no detected person.")
        return

    
    pose3d = np.array([p[0] for p in accumulated['poses3d']])
    output_file_name = f"{video_name}.npy"
    np.save(os.path.join(output_dir, output_file_name), pose3d)
    print(f"Processed {file_name} and saved keypoints to {output_file_name}")


# gait transformer function 
import os
import numpy as np
import json
from gait_transformer.gait_phase_transformer import gait_phase_stride_inference
from gait_transformer.gait_phase_kalman import gait_kalman_smoother, get_event_times

def process_gait_keypoints_to_json(
    file_path,
    output_directory,
    transformer_model,
    height,
    expected_order_idx,
    L=60,
    pos_divider=2
):
    """
    Processes a 3D keypoints .npy file using the gait transformer model and saves gait events to JSON.

    Parameters:
        file_path (str): Path to the .npy file containing 3D keypoints.
        output_directory (str): Directory to save the resulting JSON file.
        transformer_model: Loaded gait transformer model.
        height (float): Subject height in mm (will be scaled internally).
        expected_order_idx (np.array): Indices to reorder joints.
        L (int): Window length for inference (default: 60).
        pos_divider (int): Positional divider used in model loading (default: 2).
    """
    file_name = os.path.basename(file_path)
    keypoints = np.load(file_path)

    # Reorder, normalize and transform keypoints
    keypoints = keypoints[:, expected_order_idx]
    keypoints = keypoints / 1000.0          # mm → m
    keypoints = keypoints - np.mean(keypoints, axis=1, keepdims=True)
    keypoints = keypoints[:, :, [0, 2, 1]]  # reordering axes
    keypoints[:, :, 2] *= -1                # flip z

    # Run inference
    phase, stride = gait_phase_stride_inference(keypoints, height, transformer_model, L * pos_divider)

    # Kalman smoothing
    phase_ordered = np.take(phase, [0, 4, 1, 5, 2, 6, 3, 7], axis=-1)
    state, _, _ = gait_kalman_smoother(phase_ordered)
    timestamps = np.arange(state.shape[0])
    gait_event_dic = get_event_times(state, timestamps)

    # Save result to JSON
    os.makedirs(output_directory, exist_ok=True)
    output_path = os.path.join(output_directory, file_name.replace('.npy', f'_gait_events_L{L}.json'))
    with open(output_path, 'w') as f:
        json.dump({k: v.tolist() for k, v in gait_event_dic.items()}, f, indent=4)

    print(f"Processed and saved: {output_path}")


# arrange gait events function 
import os
import json
import pandas as pd
import numpy as np

def arrange_gait_events_to_excel(
    directory_path,
    output_excel_path,
    extract_parts,
    convert_to_int,
    shift_invalid_rows
):
    """
    Processes gait event JSON files, rearranges data, and saves to a structured Excel sheet.

    Parameters:
        directory_path (str): Directory containing gait event JSON files.
        output_excel_path (str): Path to save the final Excel file.
        extract_parts (function): Function used to sort files naturally.
        convert_to_int (function): Function to convert JSON string data to integers.
        shift_invalid_rows (function): Function to clean/adjust invalid data rows.
    """

    # Get list of L60 JSON files and sort them
    file_names = [f for f in os.listdir(directory_path) if f.endswith('L60.json')]
    file_names.sort(key=extract_parts)

    dfs = []
    for file_name in file_names:
        file_path = os.path.join(directory_path, file_name)
        try:
            with open(file_path, 'r') as f:
                data = json.load(f)
            int_data = convert_to_int(data)
            df = pd.DataFrame.from_dict(int_data, orient='index')
            dfs.append(df)
        except FileNotFoundError:
            print(f"File not found: {file_path}")
        except Exception as e:
            print(f"Error processing {file_path}: {e}")

    if not dfs:
        print("No dataframes created. Exiting.")
        return

    combined_df = pd.concat(dfs)
    NUM_T = int(combined_df.shape[0] / 4)

    # Add trial names
    file_labels = ["_".join(f.split("_")[:3]) for f in file_names for _ in range(4)]
    combined_df.reset_index(inplace=True)
    combined_df.insert(0, 'Trial', file_labels)
    combined_df.rename(columns={'index': 'gait_event'}, inplace=True)

    # Swap stride_L and stride_R
    for k in range(NUM_T):
        i1, i2 = 1 + 4 * k, 2 + 4 * k
        combined_df.iloc[[i1, i2]] = combined_df.iloc[[i2, i1]].values

    # Process data chunks
    chunked_data = []
    for i in range(0, len(combined_df), 4):
        chunk = combined_df.iloc[i:i + 4]
        processed = shift_invalid_rows(chunk.values[:, 2:])
        chunked_data.append(processed)

    final_data = np.vstack(chunked_data)
    final_df = pd.DataFrame(np.hstack([combined_df[['Trial', 'gait_event']], final_data]),
                            columns=['Trial', 'gait_event'] + combined_df.columns[2:].tolist())

    final_df.to_excel(output_excel_path, index=False)
    print(f"Saved arranged gait events to {output_excel_path}")



def run_full_gait_processing_pipeline(
    video_directory,
    keypoints_output_dir,
    gait_json_output_dir,
    final_excel_path,
    model,
    skeleton,
    video_reader,
    transformer_model,
    height_mm,
    expected_order_idx,
    extract_parts,
    convert_to_int,
    shift_invalid_rows,
    pos_divider=2,
    L=60
):
    import os

    # --- STEP 1: Pose Estimation from videos ---
    print("Step 1: Extracting pose keypoints from video files...")

    # Make sure the keypoints output directory exists
    os.makedirs(keypoints_output_dir, exist_ok=True)
    
    video_files = [f for f in os.listdir(video_directory) if f.endswith(".mp4")]
    video_files.sort(key=extract_parts)
    processed_files = {os.path.splitext(f)[0] for f in os.listdir(keypoints_output_dir) if f.endswith('.npy')}

    for file_name in video_files:
        process_video_for_keypoints(
            file_name=file_name,
            directory_path=video_directory,
            output_dir=keypoints_output_dir,
            model=model,
            skeleton=skeleton,
            video_reader=video_reader,
            processed_files=processed_files
        )

    # --- STEP 2: Run Gait Transformer on keypoints ---
    print("Step 2: Running Gait Transformer model on extracted keypoints...")
    keypoint_files = sorted(
        [f for f in os.listdir(keypoints_output_dir) if f.endswith('.npy')],
        key=extract_parts
    )

    for file_name in keypoint_files:
        file_path = os.path.join(keypoints_output_dir, file_name)
        process_gait_keypoints_to_json(
            file_path=file_path,
            output_directory=gait_json_output_dir,
            transformer_model=transformer_model,
            height=height_mm,
            expected_order_idx=expected_order_idx,
            L=L,
            pos_divider=pos_divider
        )

    # --- STEP 3: Arrange Gait Events and Save to Excel ---
    print("Step 3: Arranging gait events and exporting to Excel...")
    arrange_gait_events_to_excel(
        directory_path=gait_json_output_dir,
        output_excel_path=final_excel_path,
        extract_parts=extract_parts,
        convert_to_int=convert_to_int,
        shift_invalid_rows=shift_invalid_rows
    )

    print("✅ Full gait processing pipeline complete.")


# filter function 
import pandas as pd
def filter_numeric_columns(df: pd.DataFrame, threshold: float = 12) -> pd.DataFrame:
    """
    Filters numeric columns in a DataFrame by keeping only values >= threshold.
    Non-numeric columns are preserved and returned as-is.

    Parameters:
        df (pd.DataFrame): Input DataFrame with mixed types.
        threshold (float): Minimum value to retain in numeric columns.

    Returns:
        pd.DataFrame: Filtered DataFrame with numeric values below threshold set to NaN.
    """
    df_numeric = df.select_dtypes(include='number')
    df_filtered = df_numeric.where(df_numeric >= threshold)
    df_non_numeric = df.select_dtypes(exclude='number')
    df_combined = pd.concat([df_non_numeric, df_filtered], axis=1)
    return df_combined


# get gait features function
import os
import numpy as np
import pandas as pd

def analyze_gait_video_features(
    df_video: pd.DataFrame,
    keypoints_dir: str,
    expected_order_idx: list,
    fps: int = 60,
    extract_parts=None
) -> pd.DataFrame:
    """
    Analyze spatiotemporal gait features from gait event timing and 3D keypoints.

    Parameters:
        df_video (pd.DataFrame): Gait event data organized in 4-row blocks (LHS, LTO, RHS, RTO).
        keypoints_dir (str): Directory containing .npy keypoint files.
        expected_order_idx (list): Joint reordering index.
        fps (int): Frames per second.
        output_path (str): Path to save the output Excel file.
        extract_parts (function): Function for natural file sorting.

    Returns:
        pd.DataFrame: DataFrame of extracted gait features.
    """
    # Pre-allocate lists
    trial_name_list, avg_swing_left_list, avg_swing_right_list = [], [], []
    avg_stance_left_list, avg_stance_right_list = [], []
    avg_steptime_left_list, avg_steptime_right_list = [], []
    avg_step_length_left_list, avg_step_length_right_list = [], []
    cadence_list, avg_swing_list, avg_stance_list = [], [], []
    avg_double_list, avg_velocity_list, avg_steplength_list = [], [], []
    avg_steptime_list, correlation_list = [], []

    files = sorted(os.listdir(keypoints_dir), key=extract_parts)
    loop = len(df_video)

    for m in range(loop // 4):
        # Extract gait events
        lhs, ltf = df_video.loc[4*m,0:], df_video.loc[4*m+1,0:]
        rhs, rtf = df_video.loc[4*m+2,0:], df_video.loc[4*m+3,0:]

        # Compute temporal phases
        L_swing = (lhs - ltf).dropna()
        L_stance = (ltf[1:].reset_index(drop=True) - lhs.reset_index(drop=True)).dropna()
        R_swing = (rhs - rtf).dropna()
        R_stance = (rtf[1:].reset_index(drop=True) - rhs.reset_index(drop=True)).dropna()
        L_steptime = (lhs - rhs).dropna()
        R_steptime = (rhs[1:].reset_index(drop=True) - lhs.reset_index(drop=True)).dropna()

        # Combine phases
        swing = np.concatenate([L_swing, R_swing])
        stance = np.concatenate([L_stance, R_stance])
        steptime = np.concatenate([L_steptime, R_steptime])
        double = (rtf[1:].reset_index(drop=True) - lhs.reset_index(drop=True)).dropna().reset_index(drop=True)
        double += (ltf - rhs).reset_index(drop=True).dropna().reset_index(drop=True)

        # Averages (temporal)
        avg_swing_left_list.append(np.mean(L_swing)/fps)
        avg_swing_right_list.append(np.mean(R_swing)/fps)
        avg_stance_left_list.append(np.mean(L_stance)/fps)
        avg_stance_right_list.append(np.mean(R_stance)/fps)
        avg_steptime_left_list.append(np.mean(L_steptime)/fps)
        avg_steptime_right_list.append(np.mean(R_steptime)/fps)
        avg_swing_list.append(np.mean(swing)/fps)
        avg_stance_list.append(np.mean(stance)/fps)
        avg_double_list.append(np.mean(double)/fps)
        avg_steptime = np.mean(steptime)/fps
        avg_steptime_list.append(avg_steptime)
        cadence_list.append(60 / avg_steptime)

        # Load keypoints
        npy_file_path = os.path.join(keypoints_dir, files[m].split(".")[0] + '.npy')
        keypoints = np.load(npy_file_path)
        keypoints = keypoints[:, expected_order_idx] / 1000.0
        keypoints[:, :, 1] *= -1  # Flip Y axis
        z_hip = keypoints[:, 0, 2]

        # Step lengths
        lhs_int, rhs_int = lhs.dropna().astype(int), rhs.dropna().astype(int)
        if (np.isnan(rhs.iloc[0])) and (not np.isnan(lhs.iloc[0])):
            min_len = min(len(lhs_int)-1, len(rhs_int))
            step_length_left = abs(z_hip[lhs_int.iloc[1:(min_len + 1)]] - z_hip[rhs_int.iloc[:min_len]])
            step_length_right = abs(z_hip[rhs_int.iloc[:min_len]] - z_hip[lhs_int.iloc[:min_len]])
        else:
            min_len = min(len(lhs_int), len(rhs_int)-1)
            step_length_left = abs(z_hip[lhs_int.iloc[:min_len]] - z_hip[rhs_int.iloc[:min_len]])
            step_length_right = abs(z_hip[rhs_int.iloc[1:(min_len + 1)]] - z_hip[lhs_int.iloc[:min_len]])

        step_length = np.concatenate([step_length_left, step_length_right])
        sort_heelstrike = sorted(lhs_int.tolist() + rhs_int.tolist())
        avg_velocity = abs((z_hip[sort_heelstrike[-1]] - z_hip[sort_heelstrike[0]]) / 
                           (sort_heelstrike[-1] - sort_heelstrike[0]) * fps)

        # Arm swing symmetry
        keypoints_centered = keypoints - keypoints[:, 0:1]
        correlation = np.corrcoef(
            keypoints_centered[:, 15, 2],  # lelb
            keypoints_centered[:, 12, 2]   # relb
        )[0, 1]

        # Append spatial metrics
        avg_step_length_left_list.append(np.mean(step_length_left))
        avg_step_length_right_list.append(np.mean(step_length_right))
        avg_steplength_list.append(np.mean(step_length))
        avg_velocity_list.append(avg_velocity)
        correlation_list.append(correlation)
        trial_name_list.append(files[m].split('.')[0])

    # Results to DataFrame
    results = pd.DataFrame({
        "trial_name": trial_name_list,
        "avg_stancetime": avg_stance_list,
        "avg_swingtime": avg_swing_list,
        "avg_doublesupporttime": avg_double_list,
        "avg_steptime": avg_steptime_list,
        "avg_steplength": avg_steplength_list,
        "avg_velocity": avg_velocity_list,
        "avg_cadence": cadence_list,
        "avg_stancetime_left": avg_stance_left_list,
        "avg_stancetime_right": avg_stance_right_list,
        "avg_swingtime_left": avg_swing_left_list,
        "avg_swingtime_right": avg_swing_right_list,
        "avg_steptime_left": avg_steptime_left_list,
        "avg_steptime_right": avg_steptime_right_list,
        "avg_steplength_left": avg_step_length_left_list,
        "avg_steplength_right": avg_step_length_right_list,
        "arm_swing_corr": correlation_list
    })

    
    return results

In [5]:
df = pd.read_excel(r"../Downloads/Results/feature_results.xlsx")

In [6]:
df

Unnamed: 0,trial_name,avg_stancetime,avg_swingtime,avg_doublesupporttime,avg_steptime,avg_steplength,avg_velocity,avg_cadence,avg_stancetime_left,avg_stancetime_right,avg_swingtime_left,avg_swingtime_right,avg_steptime_left,avg_steptime_right,avg_steplength_left,avg_steplength_right,arm_swing_corr
0,demo,0.677222,0.409524,0.266667,0.549802,0.825167,1.485678,109.13268,0.681944,0.677778,0.4125,0.413889,0.542361,0.552778,0.789667,0.860667,-0.959253
