## IMeasureU Data & Sync File Cropping

For cropping files with multiple start and stop times and good/bad trials

In [1]:
# Import functions ---
import functions.file_import_gui as gui

# For dataframes ---
import pandas as pd

# For saving files ---
import os

In [2]:
# Bring in data and sync files ---

# Subject to process
sub_id = 'imu_val_001'
run_type = 'walk'

# data files ---
# set directory
initialdir = f"data/validity_og/{sub_id}/DataFiles/{run_type}"
# bring in csv files with data
dfs_datafiles, keys_list = gui.read_csv_files_gui_2(initialdir)

# sync files ---
# set directory
initialdir = f"data/validity_og/{sub_id}/SyncFiles/{run_type}"
# bring in csv files with data
dfs_syncfiles, keys_list = gui.read_csv_files_gui_2(initialdir)

In [None]:
results = {}

for id, df in dfs_syncfiles.items():
    # Find all start and stop timestamps
    start_times = df[df['address'] == 0]['timestamp'].tolist()
    stop_times = df[df['address'] == 1]['timestamp'].tolist()

    # Pair start and stop times
    # This assumes an equal number of start and stop times
    # and pairs them in sequential order
    if len(start_times) != len(stop_times):
        raise ValueError(f"DataFrame with id {id} has unequal numbers of start and stop times.")

    time_pairs = list(zip(start_times, stop_times))

    # Store in results
    results[id] = time_pairs

# Convert to a DataFrame
# This will result in a DataFrame with one row per id,
# and a list of tuples (start_time, end_time) for each id
timestamps_df = pd.DataFrame(list(results.items()), columns=['id', 'time_pairs'])

In [None]:
# Crop out data in datafiles based on start and stop timestamps

dfs_cropped = {}

for df_id in dfs_datafiles.keys():
    df = dfs_datafiles[df_id]

    if df_id in timestamps_df['id'].values:
        # Retrieve all start and stop time pairs for this DataFrame
        time_pairs = timestamps_df.loc[timestamps_df['id'] == df_id, 'time_pairs'].iloc[0]

        # Iterate over each pair of start and stop times
        for trial_number, (start_time, end_time) in enumerate(time_pairs, start=1):
            # Crop the DataFrame
            filtered_df = df[df['timestamp'].between(start_time, end_time)]

            # Create a unique identifier for the cropped DataFrame
            cropped_df_id = f"{df_id}_trial{trial_number}"

            # Store the cropped DataFrame
            dfs_cropped[cropped_df_id] = filtered_df
    else:
        # Handle the case where there is no matching ID in timestamps_df
        print(f"No matching timestamp found for DataFrame with ID: {df_id}")


In [None]:
# Save files ---

# Base directory
base_dir = "data/validity_og/individual_trials"

# Iterate through the cropped DataFrames
for df_id, df in dfs_cropped.items():
    # Construct the directory path
    dir_path = os.path.join(base_dir, sub_id, run_type)

    # Check if the directory exists, if not, create it
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    # Construct the file path
    file_path = os.path.join(dir_path, f"{df_id}.csv")

    # Save the DataFrame as a CSV
    df.to_csv(file_path, index=False)

    print(f"{df_id} saved to {file_path}")