## Heart Rate Processing

#### Edwards eTRIMP

Edwards TRIMP (eTRIMP) was calculated based on time spent in 5 HR zones and multiplied by a zone specific weighting factor: duration in zone 1 (50%–59% of HRmax) multiplied by 1, duration in zone 2 (60%–69% HRmax) multiplied by 2, duration in zone 3 (70%–79% HRmax) multiplied by 3, duration in zone 4 (80%–89% HRmax) multiplied by 4, and duration in zone 5 (90%–100% HRmax) multiplied by 5.

In [None]:
# Import functions & packages ---
import functions.file_import_gui as gui
import os
import pandas as pd

In [None]:
# Bring in data ---

# set directory
initialdir = f"data/polar_hr"

# select csv files with data - heart rate data (4 days light and 4 days heavy)
dfs, keys_list = gui.read_csv_files_gui(initialdir)

# max heart rate for each subject
max_hr_df = pd.read_csv('data/polar_hr/max_hr.csv')

In [None]:
# Data Prep ---

def prep_hr_data(dfs):
    updated_dfs = {}
    
    for key, df in dfs.items():
        # Keep only the 2nd and 3rd columns
        df = df.iloc[:, 1:3]
        
        # Remove the first two rows to get rid of any headers or metadata
        df = df.iloc[2:]
        
        # Set the first row as column names and remove that row
        df.columns = df.iloc[0]
        df = df.iloc[1:]
        
        # Reset the DataFrame index to handle the removal correctly
        df.reset_index(drop=True, inplace=True)
        
        # Directly rename columns to 'time' and 'hr_bpm'
        df.columns = ['time', 'hr_bpm']
        
        # Ensure all rows after "0:34:00" in the 'time' column are removed
        if "0:34:00" in df['time'].values:
            cutoff_index = df[df['time'] == "0:34:00"].index.max()
            df = df.loc[:cutoff_index]

        # Remove rows where 'hr_bpm' contains NaN values and count how many were removed
        initial_row_count = len(df)
        df['hr_bpm'] = pd.to_numeric(df['hr_bpm'], errors='coerce')  # Convert 'hr_bpm' to numeric, errors coerce to NaN
        df = df.dropna(subset=['hr_bpm'])  # Drop rows with NaN in 'hr_bpm'
        final_row_count = len(df)
        rows_removed = initial_row_count - final_row_count

        # Update the dictionary with the modified DataFrame
        updated_dfs[key] = df

        # Only print if rows were actually removed
        if rows_removed > 0:
            print(f"For '{key}', {rows_removed} rows were removed due to NaN in 'hr_bpm'.")
    
    return updated_dfs

dfs_34mins = prep_hr_data(dfs)

In [None]:
# Calculate eTRIMP ---

import pandas as pd

def calculate_eTRIMP(dfs_dict, max_hrs_df):
    results = []
    for key, df in dfs_dict.items():
        # Extract subject ID from key (assuming format 'runXXX_...')
        subject_id = key.split('_')[0]
        
        # Find max_hr for this subject in max_hrs_df
        max_hr = pd.to_numeric(max_hrs_df.loc[max_hrs_df['sub_id'] == subject_id, 'max_hr']).iloc[0]
        
        # Calculate the percentage of HRmax for each heart rate reading
        per_max_hr = df['hr_bpm'] / max_hr * 100
        
        # Define HR zones based on max_hr percentages
        zone_durations = {
            'zone1': ((per_max_hr >= 50) & (per_max_hr <= 59)).sum(),
            'zone2': ((per_max_hr >= 60) & (per_max_hr <= 69)).sum(),
            'zone3': ((per_max_hr >= 70) & (per_max_hr <= 79)).sum(),
            'zone4': ((per_max_hr >= 80) & (per_max_hr <= 89)).sum(),
            'zone5': (per_max_hr > 90).sum(),
        }
        
        # Calculate eTRIMP using the duration in each zone
        eTRIMP = sum(zone_durations[f'zone{zone}'] * zone for zone, zone in enumerate([1, 2, 3, 4, 5], start=1))
        
        # Append the result for this DataFrame to the results list
        result = {
            'key': key,
            'max_hr': max_hr,
            **zone_durations,
            'etrimp': eTRIMP
        }
        results.append(result)
    
    # Convert results to a DataFrame
    results_df = pd.DataFrame(results)
    return results_df

etrimp_df = calculate_eTRIMP(dfs_34mins, max_hr_df)

In [None]:
# Export data ---

# Extract 'sub_id' from the 'key' column and insert it at the beginning (position 0)
sub_ids = etrimp_df['key'].apply(lambda x: x.split('_')[0])
etrimp_df.insert(0, 'sub_id', sub_ids)

# Extract 'run_type' from the 'key' column, capitalize it, and insert it at position 1 (after 'sub_id')
run_types = etrimp_df['key'].apply(lambda x: '_'.join(x.split('_')[1:])).str.upper()
etrimp_df.insert(1, 'run_type', run_types)

# Remove the 'key' and 'max hr'
etrimp_df.drop(['key', 'max_hr'], axis=1, inplace=True)

# Export the DataFrame to a CSV file
output_path = 'data/polar_hr/results/etrimp.csv'
etrimp_df.to_csv(output_path, index=False)