In [1]:
import os
import re
import pandas as pd
from datetime import datetime
import mobgap  # Ensure mobgap is properly installed and imported
from mobgap.pipeline import MobilisedPipelineHealthy, GsIterator
from mobgap.gait_sequences import GsdIluz, GsdIonescu, GsdAdaptiveIonescu
from mobgap.utils.conversions import to_body_frame
from mobgap.consts import GRAV_MS2
from mobgap.initial_contacts import IcdShinImproved, refine_gs
from mobgap.laterality import LrcUllrich
from mobgap.stride_length import SlZijlstra
from mobgap.turning import TdElGohary
from mobgap.walking_speed import WsNaive
from mobgap.cadence import CadFromIc



# Define directories
subjects_dir = '/domino/datasets/local/dataset/idea_fast/for_s3/'
output_folder = '/mnt/Jake/final_test/'

# Ensure output folder exists
os.makedirs(output_folder, exist_ok=True)

# Preprocessing function for each file
def preprocess_file(file_path):
    print(f"Starting preprocessing for file: {file_path}")
    
    # Load the CSV file
    df = pd.read_csv(file_path)
    df.columns = df.columns.str.strip()
    if 'Time' in df.columns:
        df.rename(columns={'Time': 'timestamp'}, inplace=True)
    # Convert timestamp column to datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # Standardize column names for compatibility
    df.rename(columns={
        'Accel-X (g)': 'acc_x',
        'Accel-Y (g)': 'acc_y',
        'Accel-Z (g)': 'acc_z',
        'Gyro-X (d/s)': 'gyr_x',
        'Gyro-Y (d/s)': 'gyr_y',
        'Gyro-Z (d/s)': 'gyr_z',
        'Mag-X': 'Mag_X',
        'Mag-Y': 'Mag_Y',
        'Mag-Z': 'Mag_Z'
    }, inplace=True)
    # Select relevant columns
    df = df[['timestamp', 'acc_x', 'acc_y', 'acc_z', 'gyr_x', 'gyr_y', 'gyr_z']]
    df[["acc_x", "acc_y", "acc_z"]] = (
        df[["acc_x", "acc_y", "acc_z"]] * GRAV_MS2
    )
    
    print(f"Completed preprocessing for file: {file_path}")
    return df

icd = IcdShinImproved()
lrc = LrcUllrich()
cad = CadFromIc()
sl = SlZijlstra()
speed = WsNaive()
turn = TdElGohary()

sampling_rate_hz = 100
# Process each subject folder
for subject_folder in os.listdir(subjects_dir):
    subject_path = os.path.join(subjects_dir, subject_folder)
    
    # Ensure it's a directory
    if os.path.isdir(subject_path):
        print(f"\nProcessing subject folder: {subject_folder}")
        
        # Find relevant CSV files in the folder based on the pattern
        csv_files = [f for f in os.listdir(subject_path) 
                     if re.match(rf"{subject_folder}-\w{{9}}-\d{{8}}-\d{{8}}\.csv", f)]
        
        # Check if any CSV files were found
        if not csv_files:
            print(f"No relevant CSV files found in {subject_folder}. Skipping this folder.")
            continue
        
        print(f"Found {len(csv_files)} relevant CSV file(s) in {subject_folder}: {csv_files}")
        
        # Initialize an empty list to store data from each file
        subject_data = []
        meta_file = os.path.join(subject_path, 'meta.csv')
        if not os.path.exists(meta_file):
            print(f"Meta file not found for subject {subject_path}. Skipping...")
            continue
            
        try:
            meta_df = pd.read_csv(meta_file, header=None)
            participant_metadata = meta_df.to_dict()
        except Exception as e:
            print(f"Error reading metadata for subject {subject_folder}: {str(e)}. Skipping...")
            continue
        # Process each relevant CSV file
        for csv_file in csv_files:
            file_path = os.path.join(subject_path, csv_file)
            print(f"Processing file: {csv_file}")
            
            # Preprocess the file and append the data
            df = preprocess_file(file_path)
            subject_data.append(df)
        
        # Combine data if any relevant files were found
        if subject_data:
            print(f"Combining data from {len(subject_data)} file(s) for subject {subject_folder}")
            combined_data = pd.concat(subject_data, ignore_index=True)
            subject_data[0]['timestamp'] = pd.to_datetime(subject_data[0]['timestamp'])
            df_time = subject_data[0]['timestamp'] 
            time_diffs= df_time.diff().dropna()
            avg_sampling_rate = time_diffs.mean()
            average_sampling_rate_second = avg_sampling_rate.total_seconds()
            sampling_rate_hz = abs(1/average_sampling_rate_second)
            print("sampling rate: ", sampling_rate_hz)
            # Sort data by timestamp in case of overlapping records
            combined_data.sort_values(by='timestamp', inplace=True)
            combined_data.attrs["participant_metadata"] = participant_metadata
            
            gsd = GsdIonescu()
            imu_data = to_body_frame(combined_data)
            gsd.detect(data=imu_data, sampling_rate_hz=sampling_rate_hz)
            gait_sequences = gsd.gs_list_
            """try:
                start_index = gait_sequences.loc[0, 'start']
                end_index = gait_sequences.loc[0, 'end']
                print(combined_data.iloc[start_index:end_index,:]['mapped_value'].unique())
            except Exception as e:
                print("No gait sequences", e)
                continue"""
            #print(gait_sequences)
            gs_iterator = GsIterator()
            for (_, gs_data), r in gs_iterator.iterate(imu_data, gait_sequences):
                icd = icd.clone().detect(gs_data, sampling_rate_hz=sampling_rate_hz)
                lrc = lrc.clone().predict(gs_data, icd.ic_list_, sampling_rate_hz=sampling_rate_hz)
                r.ic_list = lrc.ic_lr_list_
                turn = turn.clone().detect(gs_data, sampling_rate_hz=sampling_rate_hz)
                r.turn_list = turn.turn_list_

                refined_gs, refined_ic_list = refine_gs(r.ic_list)

                with gs_iterator.subregion(refined_gs) as ((_, refined_gs_data), rr):
                    cad = cad.clone().calculate(
                        refined_gs_data,
                        initial_contacts=refined_ic_list,
                        sampling_rate_hz=sampling_rate_hz
                    )
                    rr.cadence_per_sec = cad.cadence_per_sec_
                    sl = sl.clone().calculate(
                        refined_gs_data,
                        initial_contacts=refined_ic_list,
                        sampling_rate_hz=sampling_rate_hz,
                        sensor_height_m = 1.8
                    )
                    rr.stride_length_per_sec = sl.stride_length_per_sec_
                    speed = speed.clone().calculate(
                        refined_gs_data,
                        initial_contacts=refined_ic_list,
                        cadence_per_sec=cad.cadence_per_sec_,
                        stride_length_per_sec=sl.stride_length_per_sec_,
                        sampling_rate_hz=sampling_rate_hz
                    )
                    rr.walking_speed_per_sec = speed.walking_speed_per_sec_
            results = gs_iterator.results_
            results.ic_list
            gait_analysis_results = pd.concat(
                [
                    results.cadence_per_sec,
                    results.stride_length_per_sec,
                    results.walking_speed_per_sec,
                ],
                axis=1,
            )
            print(gait_analysis_results)
            subject_output_dir = os.path.join(output_folder, subject_folder)
            os.makedirs(subject_output_dir, exist_ok=True)
            gs_list_file = os.path.join(subject_output_dir, "gs_list.csv")
            gait_sequences.to_csv(gs_list_file)
            gait_analysis_results_file = os.path.join(subject_output_dir, "gait_analysis_results.csv")
            gait_analysis_results.to_csv(gait_analysis_results_file)
    
            #pipeline_ha = MobilisedPipelineHealthy()
            #pipeline_ha = pipeline_ha.safe_run(combined_data)
            #print(pipeline_ha.aggregated_parameters_) 
            """
            # Run gait analysis using mobgap
            print(f"Running gait analysis for subject {subject_folder}...")
            try:
                gait_features = mobgap.process_gait(combined_data)
                print(f"Gait analysis completed for subject {subject_folder}.")
            except Exception as e:
                print(f"Error during gait analysis for subject {subject_folder}: {e}")
                continue
            
            # Define output file path
            output_file = os.path.join(output_folder, f"{subject_folder}_gait_features.csv")
            
            # Save the result
            gait_features.to_csv(output_file, index=False)
            print(f"Saved gait features for {subject_folder} to {output_file}")
        else:
            print(f"No data to combine for subject {subject_folder}. Skipping gait analysis.")"""
    
    



Processing subject folder: NJQV67D
Found 2 relevant CSV file(s) in NJQV67D: ['NJQV67D-MMMCK69HP-20210325-20210331.csv', 'NJQV67D-MMMXHXEE6-20210401-20210407.csv']
Processing file: NJQV67D-MMMCK69HP-20210325-20210331.csv
Starting preprocessing for file: /domino/datasets/local/dataset/idea_fast/for_s3/NJQV67D/NJQV67D-MMMCK69HP-20210325-20210331.csv
Completed preprocessing for file: /domino/datasets/local/dataset/idea_fast/for_s3/NJQV67D/NJQV67D-MMMCK69HP-20210325-20210331.csv
Processing file: NJQV67D-MMMXHXEE6-20210401-20210407.csv
Starting preprocessing for file: /domino/datasets/local/dataset/idea_fast/for_s3/NJQV67D/NJQV67D-MMMXHXEE6-20210401-20210407.csv
Completed preprocessing for file: /domino/datasets/local/dataset/idea_fast/for_s3/NJQV67D/NJQV67D-MMMXHXEE6-20210401-20210407.csv
Combining data from 2 file(s) for subject NJQV67D
sampling rate:  100.0
                          cadence_spm  stride_length_m  walking_speed_mps
gs_id sec_center_samples                                  


KeyboardInterrupt

