## IMU Validation Study Low Back Processing

IMeasureU Blue Thunder Specs:
- Sampling freq for lowg = 500hz
- 5 minutes of data = 337,500 rows (1125hz) or 478,000 (1600hz)

In [None]:
# Import custom functions ---
import functions.file_import_gui as gui
import functions.data_prep as prep
import functions.custom_plots as plots
import functions.low_back_measures as back
import functions.peak_detection as peaks
import functions.stats as stats

# For saving files
import os

# For dataframes ---
import pandas as pd

# For plotting ---
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
# Subject to process
sub_id = 'imu_val_002'
time_pt = 'time2'
run_type = 'run'

# low g (500hz) ---
# set directory
initialdir = f"data/five_min_runs/imu_validation_study/{sub_id}/{time_pt}/low_back/{run_type}"
# bring in csv files with data
dfs_lowg, keys_list = gui.read_csv_files_gui(initialdir)

Prep

In [None]:
# Data prep ---

# crop data for 5 mins (removes extra rows at the beginning so that there are exactly 5 mins of data)
prep.crop_df_five_mins(dfs_lowg, sample_freq = 500)
# calculate and add resultant column
prep.add_resultant_column(dfs_lowg, column_x = 'accel_x (m/s2)', column_y = 'accel_y (m/s2)', column_z = 'accel_z (m/s2)', name_of_res_column = 'res_m/s/s')
# convert accel columns to gs
prep.accel_to_gs_columns(dfs_lowg, column_x = 'accel_x (m/s2)', column_y = 'accel_y (m/s2)', column_z = 'accel_z (m/s2)', name_of_res_column = 'res_m/s/s')
# shift time scale to start at 0
prep.shift_time_s_to_zero(dfs_lowg, time_col='timestamp')


In [None]:
# Meanshift ---
prep.calc_mean_shift(dfs_lowg, ['accel_x (m/s2)', 'accel_y (m/s2)', 'accel_z (m/s2)', 'ax_g', 'ay_g', 'az_g'])
# calculate resultant from meanshifted signals
prep.add_resultant_column(
    dfs_lowg, column_x = 'accel_x (m/s2)_meanshift', column_y = 'accel_y (m/s2)_meanshift', 
    column_z = 'accel_z (m/s2)_meanshift', name_of_res_column = 'res_m/s/s_meanshift'
    )
prep.add_resultant_column(
    dfs_lowg, column_x = 'ax_g_meanshift', column_y = 'ay_g_meanshift', 
    column_z = 'az_g_meanshift', name_of_res_column = 'res_g_meanshift'
    )

In [None]:
# Filter data ---

cutoff_frequency = 50 #hz
filter_order = 4 #th

# lowg ---
sampling_frequency = 500 #hz 
# Filter raw data
columns_to_filter = [
    'accel_x (m/s2)', 'accel_y (m/s2)', 'accel_z (m/s2)', 'res_m/s/s', 
    'ax_g', 'ay_g', 'az_g', 'res_g']
prep.apply_butter_lowpass_filter_to_dfs(dfs_lowg, columns_to_filter, sampling_frequency, cutoff_frequency, filter_order)
# Filter mean shift data
columns_to_filter = [
    'accel_x (m/s2)_meanshift', 'accel_y (m/s2)_meanshift', 'accel_z (m/s2)_meanshift', 'res_m/s/s_meanshift', 
    'ax_g_meanshift', 'ay_g_meanshift', 'az_g_meanshift', 'res_g_meanshift']
prep.apply_butter_lowpass_filter_to_dfs(dfs_lowg, columns_to_filter, sampling_frequency, cutoff_frequency, filter_order)

### Center of Mass (CoM) Measures
- **Root Mean Squared (RMS)** - a single value for each axis of acceleration (m/s/s) VT, ML, AP, resultant (RES)
- **RMS Ratio** - the RMS of each axis is divided by the resultant root mean squared, for example VT_RMS/RES_RMS
- **AVG Peak Acceleration of Resultant** - finds peaks of resultant and calculates the avg of these peaks

**NOTE:** 
- The RMS and RMS Ratio are *mean-shifted* and calculated from the *filtered* signal
- AVG Peak Accel uses the *raw* signal (no mean-shift or filtering)

Axis Orientation:
- **X-axis**: represents the **medial-lateral (ML)** direction, with positive values pointing to the right and negative values pointing to the left. This corresponds to the side-to-side movement of the body.
- **Y-axis**: aligned with the **vertical (VT)** direction, with positive values indicating a superior (upward) direction and negative values indicating an inferior (downward) direction. This corresponds to the up and down movement of the body.
- **Z-axis**: oriented in the **anterior-posterior (AP)** direction, with positive values pointing anterior (forward) and negative values pointing posterior (backward). This represents the forward and backward movement of the body.

#### RMS

In [None]:
# RMS Calculations ---

columns_for_rms = [
    'accel_x (m/s2)', 'accel_y (m/s2)', 'accel_z (m/s2)', 'res_m/s/s', # raw
    'accel_x (m/s2)_filtered', 'accel_y (m/s2)_filtered', 'accel_z (m/s2)_filtered', 'res_m/s/s_filtered', # filtered
    'accel_x (m/s2)_meanshift_filtered', 'accel_y (m/s2)_meanshift_filtered', 'accel_z (m/s2)_meanshift_filtered', 'res_m/s/s_meanshift_filtered' # mean shifted & filtered
    ]
# Use custom function (back.apply_rms_to_dfs)
# returns a table in long format (variables are in a column and the values are in another)
# also adds suffix at end of each value in the column variable (need this later to know which hz)
rms_df_lowg = back.apply_rms_to_dfs(dfs_lowg, columns_for_rms)
rms_df_lowg['variable'] = rms_df_lowg['variable'] + '_500hz'

In [None]:
# Create table to export & append rows to to my processed variables table in Excel ---

df_to_export = prep.export_tbl_imu_val(rms_df_lowg)
file_path = "data/processed_variables/imu_training_load_variables.xlsx"
sheet_name = "variables"
prep.append_df_to_excel(df_to_export, file_path, sheet_name)

#### RMS Ratios

In [None]:
# RMS Ratio Calculations ---

# filtered signal ---
# pivot the rms df so its easier to work with
pivot_rms_df_lowg_wide = rms_df_lowg.pivot(index='key', columns='variable', values='value')
# calculate the ratios for each column (variable)
for axis in ['accel_x (m/s2)_filtered_rms_500hz', 'accel_y (m/s2)_filtered_rms_500hz', 'accel_z (m/s2)_filtered_rms_500hz']:
    pivot_rms_df_lowg_wide[axis+'_ratio'] = pivot_rms_df_lowg_wide[axis] / pivot_rms_df_lowg_wide['res_m/s/s_filtered_rms_500hz']
# keep only the ratio columns
pivot_rms_df_lowg_wide = pivot_rms_df_lowg_wide[[col for col in pivot_rms_df_lowg_wide.columns if 'ratio' in col]]
# melt the df back to a long format
rms_filtered_ratio_df_lowg = pivot_rms_df_lowg_wide.reset_index().melt(id_vars='key', var_name='variable', value_name='value')

# mean shifted filtered signal ---
# pivot the rms df so its easier to work with
pivot_rms_df_lowg_wide = rms_df_lowg.pivot(index='key', columns='variable', values='value')
# calculate the ratios for each column (variable)
for axis in ['accel_x (m/s2)_meanshift_filtered_rms_500hz', 'accel_y (m/s2)_meanshift_filtered_rms_500hz', 'accel_z (m/s2)_meanshift_filtered_rms_500hz']:
    pivot_rms_df_lowg_wide[axis+'_ratio'] = pivot_rms_df_lowg_wide[axis] / pivot_rms_df_lowg_wide['res_m/s/s_meanshift_filtered_rms_500hz']
# keep only the ratio columns
pivot_rms_df_lowg_wide = pivot_rms_df_lowg_wide[[col for col in pivot_rms_df_lowg_wide.columns if 'ratio' in col]]
# melt the df back to a long format
rms_meanshift_filtered_ratio_df_lowg = pivot_rms_df_lowg_wide.reset_index().melt(id_vars='key', var_name='variable', value_name='value')

# combine tables above
rms_ratio_df_lowg = pd.concat([rms_filtered_ratio_df_lowg, rms_meanshift_filtered_ratio_df_lowg], axis=0, ignore_index=True)

In [None]:
# Create table to export & append rows to to my processed variables table in Excel ---

df_to_export = prep.export_tbl_imu_val(rms_ratio_df_lowg)
file_path = "data/processed_variables/imu_training_load_variables.xlsx"
sheet_name = "variables"
prep.append_df_to_excel(df_to_export, file_path, sheet_name)

#### AVG Peak Acceleration of Resultant

In [None]:
# Set parameters for functions below
# The minimum time between peaks i.e. footstrikes corresponding to 0.25 secs for running and 0.35 secs for walking

if run_type == "run":
    lowg_time_between_peaks = 125
elif run_type == "walk":
    lowg_time_between_peaks = 175
else:
    raise ValueError("Invalid run_type. Please set run_type to 'run' or 'walk'.")

In [None]:
# STEP 1:

# Find peaks with no thresholds
# NOTE: This will be used to then determine individual thresholds for max peak height

_, dfs_res_peak_values_lowg_no_threshold = peaks.calc_avg_positive_peaks(
    dfs_lowg, columns=['res_g'], time_column='time_s_scaled', 
    min_peak_height=None, max_peak_height=None,
    min_samples_between_peaks=lowg_time_between_peaks
)

In [None]:
# STEP 2: 

# Determine subject's individual thresholds for max_peak_height and min_peak_height from peaks indentified w/ no threshold

k = 4 #IQR 
z = 4 #SDs

# summary table w/upper bound
summary_tbl_lowg = stats.create_summary_tbl(dfs_res_peak_values_lowg_no_threshold, ['peak_values'], k=k, z=z)

In [None]:
# STEP 3:

# Use individualized max and min peak height threshold as upper limit for finding peaks
# NOTE: This uses a different peak function that takes the summary table as inputs and steps through the indiv rows of each run/sensor

res_peak_accel_lowg_df, dfs_res_peak_values_lowg_threshold = peaks.calc_avg_positive_peaks_from_tbl(
    dfs_lowg, ['res_g'], time_column='time_s_scaled',
    summary_table=summary_tbl_lowg, id_column="id", min_peak_height_column="lower_bound_k", max_peak_height_column="upper_bound_k",
    min_samples_between_peaks=lowg_time_between_peaks
    )

Visually Inspect Peaks

In [None]:
# Set plot keys for specific runs to visualize below ---

# Grab all keys from the dictionaries
dfs_lowg_plots = {key: value for key, value in dfs_lowg.items()}

# Create and store plots for specified columns ---

x_col = 'time_s_scaled'
y_cols = ['res_g']

line_plots_lowg = plots.create_line_plots(dfs_lowg_plots, x_col, y_cols)

In [None]:
peaks_to_plot = dfs_res_peak_values_lowg_threshold

# Iterate over the keys in dfs_lowg
for key in dfs_lowg:
    fig = line_plots_lowg.get(key)
    if fig is not None:
        # If 'peaks' trace already exists, remove it before adding new one
        fig.data = [trace for trace in fig.data if trace.name != 'peaks']

        # Check if key exists in peaks_to_plot
        if key in peaks_to_plot:
            # Get corresponding DataFrame
            df_lowg_peaks = peaks_to_plot[key]
            # Add points to figure
            fig.add_trace(go.Scatter(
                x=df_lowg_peaks['time_s_scaled'], 
                y=df_lowg_peaks['peak_values'], 
                mode='markers',
                marker=dict(
                    size=8,
                    color='black',  # for example, choose a color that stands out
                ),
                name='peaks'  # you can name the trace to be referenced in legend
            ))
        fig.show()
    else:
        print(f"No plot found with key {key}")

In [None]:
# Add suffixes to variables

# lowg ---
res_peak_accel_lowg_df['variable'] = res_peak_accel_lowg_df['variable'] + '_back_500hz'

In [None]:
# Create table to export & append rows to to my processed variables table in Excel ---

df_to_export = prep.export_tbl_imu_val(res_peak_accel_lowg_df)
file_path = "data/processed_variables/imu_training_load_variables.xlsx"
sheet_name = "variables"
prep.append_df_to_excel(df_to_export, file_path, sheet_name)