In [None]:
# Moments - S3:
# Baseline 1,2,3,4,5 minute (Marker name: 201), 
# Before the 1st match 1st minute (211), 
# Before the 1st match 2nd minute (212), 
# During the match 1st minute (213), 
# During the match 2nd minute (214), 
# After the 1st match 1st minute (215), 
# After the 1st match 2nd minute, (215)
# and similarly for the 2nd to 8th matches -   
# a total of 53 one-minute intervals from S3 (5 from the baseline and 6 from each match).

In [1]:
import neurokit2 as nk
import pandas as pd
import os
import re
import warnings


In [2]:
def replace_channel(cols_template, channel):
    '''
    Function to modify a template of column names to adapt to each signal.

    Input:
    cols_template (list of str): A list of column names containing a placeholder "[channel]".
    channel (str): The name of the channel to replace the placeholder.

    Returns:
    list of str: A list of column names with the "[channel]" placeholder replaced by the provided channel name.
    '''
    
    return [re.sub(r"\[channel\]", channel, col) for col in cols_template]

def process_hr_hrv_s2(file_path, bunch_of_cols, filename):
     '''
    Function to process heart rate (HR) and heart rate variability (HRV) data from ECG signals in a CSV file.

    Input:
    file_path (str): The path to the CSV file containing the ECG data.
    bunch_of_cols (list of lists of str): A list of lists where each sublist contains column names for HR and HRV data.
    filename (str): The name of the file to be used as the index for the resulting DataFrame.

    Returns:
    pd.DataFrame: A DataFrame with processed HR and HRV data. The index is based on the provided filename,
                  and the columns are defined by the input `bunch_of_cols`.
    '''
    i = 0
    bn_col = 0
    df_sub = pd.read_csv(file_path)
    markers = [201,211,212,213,214,215,216,221,222,223,224,225,226,231,232,233,234,235,236,241,242,243,244,245,246,251,252,253,254,255,256,261,262,263,264,265,266,271,272,273,274,275,276,281,282,
               283,284,285,286] #procedure markers, needed to mark the start of the average counting moment for each interval.
                                #for marker "201" there are 5 columns from each set with the label "baseline". For the next markers there is a single column.
    dataframes = []
    hr_hrv_cols = [col for sublist in bunch_of_cols[:2] for col in sublist]
    df_hr = pd.DataFrame(columns=hr_hrv_cols)
    df_hr.loc[0] = [None] * len(hr_hrv_cols)
    
    if bn_col == 0:
        for marker in markers:
            curr_idx = df_sub.index[df_sub['marker'] == marker] #determining the first index for the given marker in df
            
            if marker == 201:
                
                for y in range(0, 300000, 60000): #calculations initiated from a 5-minute baseline, with minute intervals
                    ecg_signals, info = nk.ecg_process(df_sub["ECG"].iloc[curr_idx[0]+y:curr_idx[0]+y+60000], sampling_rate=1000) #calculating minute intervals hr from the first found index based on the marker, with a sampling rate of 1000hz
                    hr = nk.ecg_intervalrelated(ecg_signals).iloc[0, 0] 
                    df_hr.iloc[0, i] = hr #saving calculations in local df
                    i += 1 #moving to the next column
            else:
                ecg_signals, info = nk.ecg_process(df_sub["ECG"].iloc[curr_idx[0]-1:curr_idx[0]+60000], sampling_rate=1000)
                hr = nk.ecg_intervalrelated(ecg_signals).iloc[0, 0]
                df_hr.iloc[0, i] = hr
                i += 1
        bn_col += 1
    
    if bn_col == 1:
        for marker in markers:
            curr_idx = df_sub.index[df_sub['marker'] == marker]
            if marker == 201:
                for y in range(0, 300000, 60000): #similarly for hrv
                    ecg_signals, info = nk.ecg_process(df_sub["ECG"].iloc[curr_idx[0]+y:curr_idx[0]+y+60000], sampling_rate=1000)
                    hrv = nk.ecg_intervalrelated(ecg_signals).iloc[0, 9]
                    df_hr.iloc[0, i] = hrv
                    i += 1
            else:
                ecg_signals, info = nk.ecg_process(df_sub["ECG"].iloc[curr_idx[0]:curr_idx[0]+60000], sampling_rate=1000)
                hrv = nk.ecg_intervalrelated(ecg_signals).iloc[0, 9]
                df_hr.iloc[0, i] = hrv
                i += 1
    
    df_hr.index = [filename.replace('.csv', '')] #saving to df with the subject's id as the index name
    return df_hr

def process_others_s2(df, file_path, bunch_of_cols, filename):
    '''
    Function to process blood pressure channels together with accelerometers data and merge with the existing DataFrame.

    Input:
    df (pd.DataFrame): The existing DataFrame with calculated HR and HRV means to merge new data into.
    file_path (str): The path to the CSV file containing the data.
    bunch_of_cols (list of lists of str): A list of lists where each sublist contains column names for the different physiological metrics.
    filename (str): The name of the file to be used as the index for the resulting DataFrame.

    Returns:
    pd.DataFrame: A DataFrame with the processed data merged into the input DataFrame `df`.
                  The index is based on the provided filename, and the columns are defined by the input `bunch_of_cols`.
    '''
    i = 0
    bn_col = 0
    df_sub = pd.read_csv(file_path, usecols=['SBP', 'DBP', 'CO', 'TPR', 'wr', 'tl', 'tr', 'marker'])
    markers = [201,211,212,213,214,215,216,221,222,223,224,225,226,231,232,233,234,235,236,241,242,243,244,245,246,251,252,253,254,255,256,261,262,263,264,265,266,271,272,273,274,275,276,281,282,
               283,284,285,286]
    dataframes = []
    for set_cols in bunch_of_cols[2:]: #selecting sets of columns excluding hr and hrv
        df_temp = pd.DataFrame(columns=set_cols)
        df_temp.loc[0] = [None] * len(set_cols)
        for marker in markers:
            curr_idx = df_sub.index[df_sub['marker'] == marker]
            if marker == 201:
                for y in range(0, 300000, 60000):
                    df_sub_means = df_sub.iloc[curr_idx[0]+y:curr_idx[0]+60000+y, :-1].mean().iloc[bn_col] #calculating the average for minute fragments of the baseline
                    df_temp.iloc[0, i] = df_sub_means #incorporating data into df in the correct place
                    i += 1 #moving to the next column 
            else:
                
                # print(df_sub.iloc[curr_idx[0]+y:curr_idx[0]+60000+y, :-1].mean())
                df_sub_means = df_sub.iloc[curr_idx[0]:curr_idx[0]+60000, :-1].mean().iloc[bn_col]
                df_temp.iloc[0, i] = df_sub_means
                i += 1
        dataframes.append(df_temp) #collecting dataframes into a list
        bn_col += 1
        i = 0
    
    df_final = pd.concat(dataframes, axis=1) #merging df with calculated hr and hrv with the list of calculated dfs from the averages of blood pressure and accelerometers
    df_final.index = [filename.replace('.csv', '')]
    
   
    
    df_merged = df.join(df_final, how='outer')
    return df_merged


In [3]:
cols = ['ECG_Rate_mean', 'HRV_RMSSD', 'SBP', 'DBP', 'CO', 'TPR', 'wr', 'tl', 'tr']

In [4]:
list_of_channels = ['HR', 'HRV', 'SBP', 'DBP', 'CO', 'TPR', 'wr', 'tl' ,'tr']

In [5]:
# preparing the column template for channels 
cols_template = ["baseline_visit2_min1_[channel]", "baseline_visit2_min2_[channel]", "baseline_visit2_min3_[channel]", "baseline_visit2_min4_[channel]", "baseline_visit2_min5_[channel]", "tournament1_baseline_min1_[channel]",
"tournament1_baseline_min2_[channel]", "tournament1_gameplay_min1_[channel]", "tournament1_gameplay_min2_[channel]", "tournament1_recovery_min1_[channel]", "tournament1_recovery_min2_[channel]", "tournament2_baseline_min1_[channel]",
"tournament2_baseline_min2_[channel]", "tournament2_gameplay_min1_[channel]", "tournament2_gameplay_min2_[channel]", "tournament2_recovery_min1_[channel]", "tournament2_recovery_min2_[channel]", "tournament3_baseline_min1_[channel]",
"tournament3_baseline_min2_[channel]", "tournament3_gameplay_min1_[channel]", "tournament3_gameplay_min2_[channel]", "tournament3_recovery_min1_[channel]", "tournament3_recovery_min2_[channel]", "tournament4_baseline_min1_[channel]",
"tournament4_baseline_min2_[channel]", "tournament4_gameplay_min1_[channel]", "tournament4_gameplay_min2_[channel]", "tournament4_recovery_min1_[channel]", "tournament4_recovery_min2_[channel]", "tournament5_baseline_min1_[channel]",
"tournament5_baseline_min2_[channel]", "tournament5_gameplay_min1_[channel]", "tournament5_gameplay_min2_[channel]", "tournament5_recovery_min1_[channel]", "tournament5_recovery_min2_[channel]", "tournament6_baseline_min1_[channel]",
"tournament6_baseline_min2_[channel]", "tournament6_gameplay_min1_[channel]", "tournament6_gameplay_min2_[channel]", "tournament6_recovery_min1_[channel]", "tournament6_recovery_min2_[channel]", "tournament7_baseline_min1_[channel]",
"tournament7_baseline_min2_[channel]", "tournament7_gameplay_min1_[channel]", "tournament7_gameplay_min2_[channel]", "tournament7_recovery_min1_[channel]", "tournament7_recovery_min2_[channel]", "tournament8_baseline_min1_[channel]",
"tournament8_baseline_min2_[channel]", "tournament8_gameplay_min1_[channel]", "tournament8_gameplay_min2_[channel]", "tournament8_recovery_min1_[channel]", "tournament8_recovery_min2_[channel]"]

In [6]:
hr_cols = replace_channel(cols_template, list_of_channels[0])
hrv_cols = replace_channel(cols_template, list_of_channels[1])
sbp_cols = replace_channel(cols_template, list_of_channels[2])
dbp_cols = replace_channel(cols_template, list_of_channels[3])
co_cols = replace_channel(cols_template, list_of_channels[4])
tpr_cols = replace_channel(cols_template, list_of_channels[5])
wr_cols = replace_channel(cols_template, list_of_channels[6])
tl_cols = replace_channel(cols_template, list_of_channels[7])
tr_cols = replace_channel(cols_template, list_of_channels[8])

In [7]:
bunch = [hr_cols, hrv_cols, sbp_cols, dbp_cols, co_cols, tpr_cols, wr_cols, tl_cols, tr_cols]

In [8]:
warnings.filterwarnings('ignore')

In [12]:
# Processing all CSV files in the folder
input_folder = '/home/ubuntu/eSportData/jupyter-data/VU_AMS/s2_output/'
output_df = pd.DataFrame()

for file_name in os.listdir(input_folder):  #iterating through the list with the databases of the subjects
    if file_name.endswith('.csv'):
        try:
            print(file_name)
            file_path = os.path.join(input_folder, file_name)
            processed_df = process_hr_hrv_s2(file_path, bunch, file_name) #calling the function to create df with calculated hr and hrv averages
            final_df = process_others_s2(processed_df, file_path, bunch, file_name) #calling the function to calculate blood pressure and accelerometer averages and merge them with hr and hrv into a common df
            output_df = pd.concat([output_df, final_df]) #adding the subject's data from the iteration to the collective df with other subjects
        except:
            pass

# Saving the resulting DataFrame to a CSV file
output_df.to_csv('/home/ubuntu/eSportData/jupyter-data/VU_AMS/processed_means_s2_v2_patched.csv')

S2_p281.csv
S2_p17.csv
S2_p226.csv
S2_p110.csv
S2_p53.csv
S2_p192.csv
S2_p21.csv
S2_p175.csv
S2_p129.csv
S2_p45.csv
S2_p299.csv
S2_p215.csv
S2_p207.csv
S2_p180.csv
S2_p252.csv
S2_p263.csv
S2_p199.csv
S2_p154.csv
S2_p217.csv
S2_p242.csv
S2_p107.csv
S2_p169.csv
S2_p298.csv
S2_p188.csv
S2_p34.csv
S2_p1.csv
S2_p114.csv
S2_p294.csv
S2_p190.csv
S2_p291.csv
S2_p138.csv
S2_p60.csv
S2_p232.csv
S2_p258.csv
S2_p153.csv
S2_p255.csv
S2_p233.csv
S2_p295.csv
S2_p218.csv
S2_p117.csv
S2_p282.csv
S2_p278.csv
S2_p5.csv
S2_p93.csv
S2_p225.csv
S2_p83.csv
S2_p24.csv
S2_p267.csv
S2_p155.csv
S2_p201.csv
S2_p41.csv
S2_p134.csv
S2_p300.csv
S2_p146.csv
S2_p51.csv
S2_p81.csv
S2_p116.csv
S2_p147.csv
S2_p29.csv
S2_p160.csv
S2_p203.csv
S2_p66.csv
S2_p256.csv
S2_p230.csv
S2_p148.csv
S2_p174.csv
S2_p183.csv
S2_p196.csv
S2_p219.csv
S2_p32.csv
S2_p54.csv
S2_p261.csv
S2_p168.csv
S2_p44.csv
S2_p16.csv
S2_p46.csv
S2_p131.csv
S2_p202.csv
S2_p142.csv
S2_p251.csv
S2_p77.csv
S2_p189.csv
S2_p97.csv
S2_p22.csv
S2_p40.csv
S2_p86.