Create dataframes (in `dfs`) with FNIRs channels as columns, mean HbR/HbO over all channels and relative mean HbR/HbO over all channels (mean HbR/HbO normalized to 0 at time 0 for each trial) for each time step

**Notes:**
- I strongly recommend running this on your local machine to avoid uploading large files to Drive, because this code requires the original cleaned data files which are huge
- This code takes approximately 10 minutes to run per dataset (a single participant / session)
- To visualize, save the processed data to csv and use the visualization notebook

**Instructions:**
- Download `\cleaned-data-for-visualization\data-clean-2x` from OneDrive and put the folder into the same path as this Jupyter notebook if running locally or upload to Google Drive if running on Google Colab
- To run on Google colab, set the `path` variable to the path of the `data-clean-2x` folder in section

In [18]:
# If running on Google Colab
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [19]:
import pandas as pd

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from datetime import datetime

from numpy.random import seed
from numpy.random import randn
from numpy.random import normal
from scipy import stats

# Create variables
# Path of the "data-processed" folder
path = "data-clean-2x/"

# For colab only: update the file path of the data-processed folder, otherwise, comment the next line out
path = "/content/drive/MyDrive/BrAIn_Jam/data/data-clean-2x/"

conditions = ['Control', 'Condition 1', 'Condition 2', 'Condition 3']
current_condition = -1

In [21]:
# Load the CSV file
df = pd.read_csv(f'{path}sam-1.csv')
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

channels = list(df['Type'][:100].unique())
channels = channels[1:]
hbo_channels = []
hbr_channels = []
for channel in channels:
    if "HbO" in channel:
        hbo_channels.append(channel)
    else:
        hbr_channels.append(channel)

num_channels = len(hbr_channels)

# Create dataframes
cols = ['Timestamp','Trial','Average HbR', 'Average HbO'] + hbo_channels + hbr_channels
df_c = pd.DataFrame(columns=cols)
df_c1 = pd.DataFrame(columns=cols)
df_c2 = pd.DataFrame(columns=cols)
df_c3 = pd.DataFrame(columns=cols)
dfs = [df_c, df_c1, df_c2, df_c3]


In [27]:
# Format data
for index, row in df.iterrows():
    timestamp = row['Timestamp']
    trigger = str(row['Trigger'])
    value_type = str(row['Type'])
    value = row['Value']

    if "Trigger" in trigger:

        for i, condition in enumerate(conditions):
            if condition in trigger and "Start" in trigger:
                current_condition = i
                df_curr = dfs[current_condition]
                if len(df_curr.index) == 0:
                    trial = 1
                else:
                    trial = df_curr.loc[len(df_curr.index)-1,'Trial']+1

            elif condition in trigger and "End" in trigger:
                current_condition = -1


    if current_condition != -1 and not pd.isnull(value):

        # New trial
        if len(df_curr.index) == 0 or df_curr.loc[len(df_curr.index)-1,value_type] != 0:
            # Add new row
            df_curr.loc[len(df_curr.index)] = 0

            df_curr.loc[len(df_curr.index)-1,'Timestamp'] = timestamp
            df_curr.loc[len(df_curr.index)-1,'Trial'] = trial

        df_curr.loc[len(df_curr.index)-1,value_type] = value

In [23]:
# Calculate Average HbO and HbR over all channels
for df_curr in dfs:
    for i,row in df_curr.iterrows():
        df_curr.loc[i,'Average HbO'] = df_curr.iloc[i,4:4+num_channels].mean()
        df_curr.loc[i,'Average HbR'] = df_curr.iloc[i,4+num_channels:].mean()

In [24]:
# Calculate time since start of each trial
for df_curr in dfs:
     df_curr.insert(1, 'Time', 0, True)

for df_curr in dfs:
    for i, trial in df_curr.groupby("Trial"):
        start_time = trial.loc[trial.index[0],'Timestamp']
        for index, row in trial.iterrows():
            if index > 0:
                    df_curr.loc[index,'Time'] = (row['Timestamp']-start_time).total_seconds()

In [26]:
# Normalize mean HbR/HbOs to 0 at time 0 for each trial

# Number of trials averaged when computing HbR/HbO at time 0
num_trials = 5

for df_curr in dfs:
     df_curr.insert(5, 'Relative HbR', 0, True)
     df_curr.insert(6, 'Relative HbO', 0, True)

for df_curr in dfs:
    for i, trial in df_curr.groupby("Trial"):
        initial_hbo = trial.loc[trial.index[0]:trial.index[0]+num_trials,'Average HbO'].mean()
        initial_hbr = trial.loc[trial.index[0]:trial.index[0]+num_trials,'Average HbR'].mean()
        for index, row in trial.iterrows():
            df_curr.loc[index,'Relative HbO'] = row['Average HbO']-initial_hbo
            df_curr.loc[index,'Relative HbR'] = row['Average HbR']-initial_hbr

In [None]:
# # Save csv
# df_c.to_csv(f'niko-2-c.csv', sep=',', index=False, encoding='utf-8')
# df_c1.to_csv(f'niko-2-c1.csv', sep=',', index=False, encoding='utf-8')
# df_c2.to_csv(f'niko-2-c2.csv', sep=',', index=False, encoding='utf-8')
# df_c3.to_csv(f'niko-2-c3.csv', sep=',', index=False, encoding='utf-8')