## IMU Validation Study Tibia Processing

IMeasureU Blue Thunder Specs:
- Sampling freq for lowg = 500hz
- 5 minutes of data = 337,500 rows (1125hz) or 478,000 (1600hz)

In [None]:
# Import custom functions ---
import functions.file_import_gui as gui
import functions.data_prep as prep
import functions.custom_plots as plots
import functions.peak_detection as peaks
import functions.stats as stats
import functions.stride_variables as stride

# For saving files
import os

# For dataframes ---
import pandas as pd

# For plotting ---
import plotly.io as pio
import plotly.graph_objects as go
import matplotlib.pyplot as plt

# For DFA analysis
from sklearn.linear_model import LinearRegression

In [None]:
# Subject to process
sub_id = 'imu_val_002'
time_pt = 'time2'
run_type = 'run'

# low g (500hz) ---
# set directory
initialdir = f"data/five_min_runs/imu_validation_study/{sub_id}/{time_pt}/left_tibia/{run_type}"
# bring in csv files with data
dfs_lt_lowg, keys_list = gui.read_csv_files_gui(initialdir)
# set directory
initialdir = f"data/five_min_runs/imu_validation_study/{sub_id}/{time_pt}/right_tibia/{run_type}"
# bring in csv files with data
dfs_rt_lowg, keys_list = gui.read_csv_files_gui(initialdir)

Prep

In [None]:
# Data prep ---

# crop data for 5 mins (removes extra rows at the beginning so that there are exactly 5 mins of data)
prep.crop_df_five_mins(dfs_lt_lowg, sample_freq = 500)
prep.crop_df_five_mins(dfs_rt_lowg, sample_freq = 500)

# calculate and add resultant column
prep.add_resultant_column(dfs_lt_lowg, column_x = 'accel_x (m/s2)', column_y = 'accel_y (m/s2)', column_z = 'accel_z (m/s2)', name_of_res_column = 'res_m/s/s')
prep.add_resultant_column(dfs_rt_lowg, column_x = 'accel_x (m/s2)', column_y = 'accel_y (m/s2)', column_z = 'accel_z (m/s2)', name_of_res_column = 'res_m/s/s')

# convert accel columns to gs
prep.accel_to_gs_columns(dfs_lt_lowg, column_x = 'accel_x (m/s2)', column_y = 'accel_y (m/s2)', column_z = 'accel_z (m/s2)', name_of_res_column = 'res_m/s/s')
prep.accel_to_gs_columns(dfs_rt_lowg, column_x = 'accel_x (m/s2)', column_y = 'accel_y (m/s2)', column_z = 'accel_z (m/s2)', name_of_res_column = 'res_m/s/s')

# shift time scale to start at 0
prep.shift_time_s_to_zero(dfs_lt_lowg, time_col='timestamp')
prep.shift_time_s_to_zero(dfs_rt_lowg, time_col='timestamp')

### Tibial Acceleration of Resultant
- AVG Peak Tibial Accleration - Finds peaks of the resultant and then calculates the average from these peaks

Find Peaks

In [None]:
# Set parameters for functions below
# The minimum time between peaks i.e. footstrikes corresponding to 0.50 secs for running and 0.70 secs for walking

# running (250) / walking (350)
if run_type == "run":
    lowg_time_between_peaks = 250
elif run_type == "walk":
    lowg_time_between_peaks = 350
else:
    raise ValueError("Invalid run_type. Please set run_type to 'run' or 'walk'.")

In [None]:
# STEP 1:

# Find peaks with no thresholds
# NOTE: This will be used to then determine individual thresholds for max peak height

# lowg ---
# left
_, dfs_lt_res_peak_values_lowg_no_threshold = peaks.calc_avg_positive_peaks(
    dfs_lt_lowg, columns=['res_g'], time_column='time_s_scaled', 
    min_peak_height=None, max_peak_height=None,
    min_samples_between_peaks=lowg_time_between_peaks
)
# right
_, dfs_rt_res_peak_values_lowg_no_threshold = peaks.calc_avg_positive_peaks(
    dfs_rt_lowg, columns=['res_g'], time_column='time_s_scaled', 
    min_peak_height=None, max_peak_height=None,
    min_samples_between_peaks=lowg_time_between_peaks)

In [None]:
# STEP 2: 

# Determine subject's individual thresholds for max_peak_height and min_peak_height from peaks indentified w/ no threshold

k = 4 #IQR 
z = 4 #SDs

# lowg ---
summary_tbl_lt_lowg = stats.create_summary_tbl(dfs_lt_res_peak_values_lowg_no_threshold, ['peak_values'], k=k, z=z)
summary_tbl_rt_lowg = stats.create_summary_tbl(dfs_rt_res_peak_values_lowg_no_threshold, ['peak_values'], k=k, z=z)

In [None]:
# STEP 3:

# Use individualized max and min peak height threshold as upper limit for finding peaks
# NOTE: This uses a different peak function that takes the summary table as inputs and steps through the indiv rows of each run/sensor

# lowg ---
# left
lt_res_peak_accel_lowg_df, dfs_lt_res_peak_values_lowg_threshold = peaks.calc_avg_positive_peaks_from_tbl(
    dfs_lt_lowg, ['res_g'], time_column='time_s_scaled',
    summary_table=summary_tbl_lt_lowg, id_column="id", min_peak_height_column="lower_bound_k", max_peak_height_column="upper_bound_k",
    min_samples_between_peaks=lowg_time_between_peaks
    )

# right
rt_res_peak_accel_lowg_df, dfs_rt_res_peak_values_lowg_threshold = peaks.calc_avg_positive_peaks_from_tbl(
    dfs_rt_lowg, ['res_g'], time_column='time_s_scaled',
    summary_table=summary_tbl_rt_lowg, id_column="id", min_peak_height_column="lower_bound_k", max_peak_height_column="upper_bound_k",
    min_samples_between_peaks=lowg_time_between_peaks)

Visually Inspect Peaks

In [None]:
# Set plot keys for specific runs to visualize below ---

# Grab all keys from the dictionaries
dfs_lt_lowg_plots = {key: value for key, value in dfs_lt_lowg.items()}
dfs_rt_lowg_plots = {key: value for key, value in dfs_rt_lowg.items()}

# Create and store plots for specified columns ---

x_col = 'time_s_scaled'
y_cols = ['res_g']

line_plots_lt_lowg = plots.create_line_plots(dfs_lt_lowg_plots, x_col, y_cols)
line_plots_rt_lowg = plots.create_line_plots(dfs_rt_lowg_plots, x_col, y_cols)


In [None]:
# lowg ---

# left
peaks_to_plot = dfs_lt_res_peak_values_lowg_threshold

# Iterate over the keys in dfs_lt_lowg
for key in dfs_lt_lowg:
    fig = line_plots_lt_lowg.get(key)
    if fig is not None:
        # If 'peaks' trace already exists, remove it before adding new one
        fig.data = [trace for trace in fig.data if trace.name != 'peaks']

        # Check if key exists in peaks_to_plot
        if key in peaks_to_plot:
            # Get corresponding DataFrame
            df_lowg_peaks = peaks_to_plot[key]
            # Add points to figure
            fig.add_trace(go.Scatter(
                x=df_lowg_peaks['time_s_scaled'], 
                y=df_lowg_peaks['peak_values'], 
                mode='markers',
                marker=dict(
                    size=8,
                    color='black',  # for example, choose a color that stands out
                ),
                name='peaks'  # you can name the trace to be referenced in legend
            ))
        fig.show()
    else:
        print(f"No plot found with key {key}")

# right
peaks_to_plot = dfs_rt_res_peak_values_lowg_threshold

# Iterate over the keys in dfs_rt_lowg
for key in dfs_rt_lowg:
    fig = line_plots_rt_lowg.get(key)
    if fig is not None:
        # If 'peaks' trace already exists, remove it before adding new one
        fig.data = [trace for trace in fig.data if trace.name != 'peaks']

        # Check if key exists in peaks_to_plot
        if key in peaks_to_plot:
            # Get corresponding DataFrame
            df_lowg_peaks = peaks_to_plot[key]
            # Add points to figure
            fig.add_trace(go.Scatter(
                x=df_lowg_peaks['time_s_scaled'], 
                y=df_lowg_peaks['peak_values'], 
                mode='markers',
                marker=dict(
                    size=8,
                    color='black',  # for example, choose a color that stands out
                ),
                name='peaks'  # you can name the trace to be referenced in legend
            ))
        fig.show()
    else:
        print(f"No plot found with key {key}")


Export Variables to Excel Table

In [None]:
# Add suffix to varible names

# left
lt_res_peak_accel_lowg_df['variable'] = lt_res_peak_accel_lowg_df['variable'] + '_lt_500hz'
# right
rt_res_peak_accel_lowg_df['variable'] = rt_res_peak_accel_lowg_df['variable'] + '_rt_500hz'

In [None]:
# Create table to export & append rows to to my processed variables table in Excel ---

# left
df_to_export = prep.export_tbl_imu_val(lt_res_peak_accel_lowg_df)
file_path = "data/processed_variables/imu_training_load_variables.xlsx"
sheet_name = "variables"
prep.append_df_to_excel(df_to_export, file_path, sheet_name)
# right
df_to_export = prep.export_tbl_imu_val(rt_res_peak_accel_lowg_df)
file_path = "data/processed_variables/imu_training_load_variables.xlsx"
sheet_name = "variables"
prep.append_df_to_excel(df_to_export, file_path, sheet_name)

### Stride Time

Calculate Stride Times (STs) using the time between each consecutive footstrikes of single leg  (time of step 2 - time of step 1, time of step 3 - time of step 2, etc.)
 - Mean
 - Standard Deviation (SD)
 - Coefficient of Variance (CV)
 - Strides per min (SPM)
 - Fractal Scaling Index (FSI) via Detrended Fluctuation Analysis (DFA)

 NOTE: SPM needs to be doubled (it is reflective of just one leg)

In [None]:
# Using the time column where each peak occured, calculate the difference between consecutive foot strikes
# Creates a new column called stride_times for each run

# left
dfs_lt_stride_times_lowg = stride.calc_stride_times(dfs=dfs_lt_res_peak_values_lowg_threshold , time_column="time_s_scaled")
# right
dfs_rt_stride_times_lowg = stride.calc_stride_times(dfs=dfs_rt_res_peak_values_lowg_threshold , time_column="time_s_scaled")

In [None]:
# Create a summary table for stride times

k = 3 #IQR 
z = 3 #SDs

st_summary_tbl_lt_lowg = stats.create_summary_tbl(dfs_lt_stride_times_lowg, ['stride_times'], k=k, z=z)
st_summary_tbl_rt_lowg = stats.create_summary_tbl(dfs_rt_stride_times_lowg, ['stride_times'], k=k, z=z)

In [None]:
# Remove stride time outliers based on the upper and lower threshold values created in the summary table above

# left
dfs_lt_stride_times_lowg_no_outliers, counts_dfs_lt_stride_times_lowg = stats.remove_outliers(
    dfs_lt_stride_times_lowg, 'stride_times', 
    st_summary_tbl_lt_lowg, id_column='id', lower_threshold_column='lower_bound_k', upper_threshold_column='upper_bound_k')
# right
dfs_rt_stride_times_lowg_no_outliers, counts_dfs_rt_stride_times_lowg = stats.remove_outliers(
    dfs_rt_stride_times_lowg, 'stride_times', 
    st_summary_tbl_rt_lowg, id_column='id', lower_threshold_column='lower_bound_k', upper_threshold_column='upper_bound_k')

In [None]:
# Calculate stride time variables 
# Returns a single table with the columns key, variable, and value

# left
st_vars_lt_lowg_df = stride.calc_stride_times_vars(dfs_lt_stride_times_lowg,'stride_times', total_run_time_mins=5)
# right
st_vars_rt_lowg_df = stride.calc_stride_times_vars(dfs_rt_stride_times_lowg, 'stride_times', total_run_time_mins=5)

Export Variables to Excel Table

In [None]:
# Add suffix to varible names

# left
st_vars_lt_lowg_df['variable'] = st_vars_lt_lowg_df['variable'] + '_lt_500hz'
# right
st_vars_rt_lowg_df['variable'] = st_vars_rt_lowg_df['variable'] + '_rt_500hz'

In [None]:
# Create table to export & append rows to to my processed variables table in Excel ---

# left
df_to_export = prep.export_tbl_imu_val(st_vars_lt_lowg_df)
file_path = "data/processed_variables/imu_training_load_variables.xlsx"
sheet_name = "variables"
prep.append_df_to_excel(df_to_export, file_path, sheet_name)
# right
df_to_export = prep.export_tbl_imu_val(st_vars_rt_lowg_df)
file_path = "data/processed_variables/imu_training_load_variables.xlsx"
sheet_name = "variables"
prep.append_df_to_excel(df_to_export, file_path, sheet_name)