In [None]:
import pandas as pd
from pandas.api.types import CategoricalDtype
import numpy as np
import os
import time

In [None]:
# Store file path to folders with gait sequence and characteristics predictions from lower back signal
gait_dir = "../Jake/results_mobilise_ionescu_full_redo/"
# Store file path to folders with signal from wrist sensor
wrist_dir = "dir"
# Output directory
output_dir = 'Outputs/Lower Back Predictions Mapped To Wrist/'

# Extract list of subjects with lower back predictions
subjects = os.listdir(gait_dir)

In [None]:
# Loop through all subjects
for folder_name in subjects:
    print(f"Processing subject: {folder_name}")

    # Start timer
    start_time = time.time()

    # Load wrist signal data
    wrist_folder_path = os.path.join(wrist_dir, folder_name)
    csv_path = os.path.join(wrist_folder_path, 'combined_ax6_df.csv')
    df = pd.read_csv(csv_path, usecols=['accel_x', 'accel_y', 'accel_z'])
    
    signal_load_time = time.time() - start_time
    print(f"Wrist file load time: {signal_load_time:.2f} seconds")
    
    df.reset_index(inplace=True)  # Add row numbers as an "index" column

    # Start timer for walking_df creation
    walking_df_start_time = time.time()

    # Load gait sequence data
    gs_folder_path = os.path.join(gait_dir, folder_name, 'gs_list.csv')
    gs = pd.read_csv(gs_folder_path)

    # Expand gait sequence to a DataFrame of indices for walking intervals
    walking_df = pd.DataFrame({'index': np.concatenate([np.arange(row['start'], row['end'] + 1) for _, row in gs.iterrows()])})
    walking_df['lower_back_mapped_value'] = 1
    
    
    # Check for duplicates in walking_df
    if walking_df.duplicated(subset=['index']).any():
        print(f"Duplicate indices found in walking_df for subject: {folder_name}")
        walking_df.drop_duplicates(subset=['index'], keep='first', inplace=True)

    # Load gait analysis data
    analysis_folder_path = os.path.join(gait_dir, folder_name, 'gait_analysis_results.csv')
    gait_analysis = pd.read_csv(analysis_folder_path, usecols=['sec_center_samples', 'cadence_spm',
                                                               'stride_length_m', 'walking_speed_mps'])

    # Expand gait analysis intervals and add features
    gait_analysis['start'] = (gait_analysis['sec_center_samples'] - 50).astype(int)
    gait_analysis['end'] = (gait_analysis['sec_center_samples'] + 49).astype(int)
    
    analysis_intervals = []
    for row in gait_analysis.itertuples(index=False):
        for idx in range(row.start, row.end + 1):
            analysis_intervals.append({
                'index': idx,
                'cadence_spm': row.cadence_spm,
                'stride_length_m': row.stride_length_m,
                'walking_speed_mps': row.walking_speed_mps
            })
    analysis_intervals = pd.DataFrame(analysis_intervals)
    
    # Check for duplicates in analysis_intervals
    if analysis_intervals.duplicated(subset=['index']).any():
        print(f"Duplicate indices found in analysis_intervals for subject: {folder_name}")
        analysis_intervals.drop_duplicates(subset=['index'], keep='first', inplace=True)

    # Merge the walking intervals and analysis features
    walking_df = walking_df.merge(analysis_intervals, on='index', how='left')

    walking_df_time = time.time() - walking_df_start_time
    print(f"Time to create walking_df: {walking_df_time:.2f} seconds")

    # Step 2: Inner join walking data to wrist signal data
    df = df.merge(walking_df, on='index', how='inner')

    # Subset and save the result
    df = df[['index', 'accel_x', 'accel_y', 'accel_z', 'lower_back_mapped_value',
             'cadence_spm', 'stride_length_m', 'walking_speed_mps']]

    output_folder = os.path.join(output_dir, folder_name)
    os.makedirs(output_folder, exist_ok=True)
    output_path = os.path.join(output_folder, 'wrist_lower_back_df.csv')
    df.to_csv(output_path, index=False, chunksize=10**6)

    # End timer
    total_time = time.time() - start_time
    print(f"Finished processing subject: {folder_name} in {total_time:.2f} seconds")