In [64]:
# Import common libraries
import pandas as pd
import numpy as np
from copy import deepcopy
import importlib
import mne


# Import MNE processing
from mne.viz import plot_compare_evokeds
from mne import Epochs, events_from_annotations, set_log_level

# Scikit Learn
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Import StatsModels
import statsmodels.formula.api as smf

# Import Plotting Library
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
from mne_nirs.statistics import run_glm

# Internal Packages
import analysis_tools
import system_file_io
import quality_eval
import dynamic_interval_tools
import glm_analysis
import visualizations

# Reset the cache
importlib.reload(analysis_tools)
importlib.reload(system_file_io)
importlib.reload(dynamic_interval_tools)
importlib.reload(glm_analysis)
importlib.reload(visualizations)



<module 'visualizations' from '/Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/visualizations.py'>

## Notes for Future Features

In [65]:
# TODO: Allowing for custom timestamps to be imported

"""
Apparently the way to do this is: 
    events = np.array([[sample, 0, event_id, timestamp], ...])
where:
    sample: the sample number of the event in the data. This is the position in the data where the event occurs.
    0: this column is usually used as a reserved column in MNE functions, it's not used in this case.
    event_id: an integer that identifies the type of event. This can be used to distinguish different types of triggers, for example, different stimulus conditions, button presses, or other experimental events.
    timestamp: the time in seconds of the event.
"""

"\nApparently the way to do this is: \n    events = np.array([[sample, 0, event_id, timestamp], ...])\nwhere:\n    sample: the sample number of the event in the data. This is the position in the data where the event occurs.\n    0: this column is usually used as a reserved column in MNE functions, it's not used in this case.\n    event_id: an integer that identifies the type of event. This can be used to distinguish different types of triggers, for example, different stimulus conditions, button presses, or other experimental events.\n    timestamp: the time in seconds of the event.\n"

In [66]:
# Tip for using machine learning for exploratory type data analysis

# Use principal component analysis (PCA) or independent component analysis (ICA), 
# to identify patterns in the fNIRS data that are not immediately obvious. 
# This can be used to identify latent neural networks or to identify different sources of signal variation.

# ==========================================================

# I'm curious how we could incorportate these into the data analsyis pipeline

# Anomaly detection
# Unsupervised learning algorithms such as one-class SVM, Autoencoder, and Isolation Forest can be used to identify outliers or abnormal patterns in the data.

# Time-series analysis
# Techniques such as time-series decomposition, ARIMA, and LSTM can be used to analyze the temporal dynamics of the fNIRS data and identify trends or patterns over time.

In [67]:
# ------------------------------------------------------------------
# Recurring values that we will allow for users to change in the GUI
# ------------------------------------------------------------------

# Length of the measured interval
interval_length = 30

# Time from trigger point being evaluated
tmin = -1
tmax = 30

# How you would like to rename the numeric triggers from Aurora
# trigger_id = None
# trigger_id = {'4': 'Control', '2': 'Neutral', '3': 'Inflam', '1':'Practice'}
trigger_id = {'1': 'Start', '2': 'Neutral', '3': 'Concern'}
# trigger_id = {'1': 'Stroop', '2': 'Rest'}

# What files would you like to ignore while looping through subjects
# ignore = [".DS_Store", "sub-03", ".git", ".gitignore"]
ignore = [".DS_Store", ".git", ".gitignore"]

# Mock data folder to test pulling meta data from folder structure.
# path = "../../LabResearch/IndependentStudy/Data/BIDS-NIRS-Tapping"
# path = "../../LabResearch/IndependentStudy/Data/FA"
path = '../../FV_Prelim_Data'
# path = '../../../Desktop/oculus-test'

# Does the study have variable task lengths or does the study follow a block design?
variable_epoch_time = False

# Specifies what columns to use for the GLM constrast. If None, contrast is skipped
# Only two columns can be specified
# columns_for_glm_contrast = ['Neutral', 'Inflam']
columns_for_glm_contrast = ['Neutral', 'Concern']
# columns_for_glm_contrast = ['Stroop', 'Rest']

# Specifies what columns to use for group level analysis.
# columns_for_group_analysis = ['Neutral', 'Inflam']
columns_for_group_analysis = ['Neutral', 'Concern']
# columns_for_group_analysis = ['Stroop', 'Rest']


## Find Groups and Subjects from Folder Structure

In [68]:
importlib.reload(system_file_io)
groups, df = system_file_io.import_data_folder(path, ignore)
df

Unnamed: 0,group,sub_name,snirf_path
0,all_groups,sub-12,../../FV_Prelim_Data/all_groups/sub-12/nirs/20...
1,all_groups,sub-08,../../FV_Prelim_Data/all_groups/sub-08/nirs/20...
2,all_groups,sub-01,../../FV_Prelim_Data/all_groups/sub-01/nirs/20...
3,all_groups,sub-06,../../FV_Prelim_Data/all_groups/sub-06/nirs/20...
4,all_groups,sub-07,../../FV_Prelim_Data/all_groups/sub-07/nirs/20...
5,all_groups,sub-09,../../FV_Prelim_Data/all_groups/sub-09/nirs/20...
6,all_groups,sub-10,../../FV_Prelim_Data/all_groups/sub-10/nirs/20...
7,all_groups,sub-11,../../FV_Prelim_Data/all_groups/sub-11/nirs/20...
8,all_groups,sub-05,../../FV_Prelim_Data/all_groups/sub-05/nirs/20...
9,all_groups,sub-02,../../FV_Prelim_Data/all_groups/sub-02/nirs/20...


In [69]:
groups

['all_groups']

# Loop through subjects for individual analysis

In [70]:
# TODO: Need to loop through this in order to get both groups
group = 'all_groups'
paths = df[df.eq(group).any(1)]['snirf_path'].to_numpy()
print(paths)

['../../FV_Prelim_Data/all_groups/sub-12/nirs/2023-02-10_003.snirf'
 '../../FV_Prelim_Data/all_groups/sub-08/nirs/2023-02-17_002.snirf'
 '../../FV_Prelim_Data/all_groups/sub-01/nirs/2023-02-17_002.snirf'
 '../../FV_Prelim_Data/all_groups/sub-06/nirs/2023-02-10_004.snirf'
 '../../FV_Prelim_Data/all_groups/sub-07/nirs/2023-02-17_001.snirf'
 '../../FV_Prelim_Data/all_groups/sub-09/nirs/2023-03-01_004.snirf'
 '../../FV_Prelim_Data/all_groups/sub-10/nirs/2023-03-08_001.snirf'
 '../../FV_Prelim_Data/all_groups/sub-11/nirs/2023-02-10_001.snirf'
 '../../FV_Prelim_Data/all_groups/sub-05/nirs/2023-02-10_003.snirf'
 '../../FV_Prelim_Data/all_groups/sub-02/nirs/2023-02-23_002.snirf'
 '../../FV_Prelim_Data/all_groups/sub-03/nirs/2023-02-10_001.snirf'
 '../../FV_Prelim_Data/all_groups/sub-04/nirs/2023-02-10_002.snirf']


In [71]:
# Get Meta Data on the scans
columns = ['subject', 'length', 'starttime']
df = pd.DataFrame(columns=columns)
for path in paths:
    raw = mne.io.read_raw_snirf(path, verbose=True, preload=False)
    start_time = raw.info['meas_date']

    # Get Subject ID
    ls = path.split('/')
    res = list(filter(lambda a: 'sub' in a, ls))
    id = int(res[0].split('-')[-1])
    # Variables
    subject = f'sub-{id}'
    length = f'{raw.times[-1]/60} minutes' 
    start_time_str = start_time.strftime('%Y-%m-%d %H:%M:%S.%f')

    this_df = pd.DataFrame({ 'subject': subject, 'length': length, 'starttime': start_time_str}, index=[0])
    df = pd.concat([df, this_df], ignore_index=True)

# df.to_csv('scan_metadata.csv')
df

Loading /Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/../../FV_Prelim_Data/all_groups/sub-12/nirs/2023-02-10_003.snirf
Loading /Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/../../FV_Prelim_Data/all_groups/sub-08/nirs/2023-02-17_002.snirf
Loading /Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/../../FV_Prelim_Data/all_groups/sub-01/nirs/2023-02-17_002.snirf
Loading /Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/../../FV_Prelim_Data/all_groups/sub-06/nirs/2023-02-10_004.snirf
Loading /Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/../../FV_Prelim_Data/all_groups/sub-07/nirs/2023-02-17_001.snirf
Loading /Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/../../FV_Prelim_Data/all_groups/sub-09/nirs/2023-03-01_004.snirf
Loading /Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/../../FV_Prelim_Data/all_groups/sub-10/nirs/2023-03-08_001.snirf
Loading /Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/../../FV_Prelim_Data/all_groups/sub-11/nirs/2023-02-10_00

Unnamed: 0,subject,length,starttime
0,sub-12,14.335416666666665 minutes,2023-02-10 13:56:36.000000
1,sub-8,14.889173333333332 minutes,2023-02-17 15:07:06.000000
2,sub-1,14.274798333333333 minutes,2023-02-17 09:30:45.000000
3,sub-6,15.207009999999999 minutes,2023-02-10 15:57:57.000000
4,sub-7,15.498633333333332 minutes,2023-02-17 14:44:01.000000
5,sub-9,21.46544333333333 minutes,2023-03-01 14:43:12.000000
6,sub-10,18.0872 minutes,2023-03-08 14:52:59.000000
7,sub-11,16.547166666666666 minutes,2023-02-10 09:32:52.000000
8,sub-5,15.672296666666664 minutes,2023-02-10 15:39:55.000000
9,sub-2,14.384566666666666 minutes,2023-02-23 15:58:52.000000


In [72]:
importlib.reload(quality_eval)
# Run a Signal Quality Check

# for path in paths:
    # quality_eval.evaluate_raw_signal(path)
    # quality_eval.evaluate_sci(path)


<module 'quality_eval' from '/Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/quality_eval.py'>

In [73]:
importlib.reload(analysis_tools)

# Works for both block and variable interval lengths
all_epochs, all_data = analysis_tools.aggregate_epochs(paths=paths, trigger_id=trigger_id, variable_epoch_time = variable_epoch_time)


Loading /Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/../../FV_Prelim_Data/all_groups/sub-12/nirs/2023-02-10_003.snirf
Reading 0 ... 8750  =      0.000 ...   860.125 secs...
<Epochs |  25 events (all good), -1.25 - 15 sec, baseline -1.25 – 0 sec, ~258 kB, data loaded,
 'Concern': 12
 'Neutral': 12
 'Start': 1>
Loading /Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/../../FV_Prelim_Data/all_groups/sub-08/nirs/2023-02-17_002.snirf
Reading 0 ... 9088  =      0.000 ...   893.350 secs...
<Epochs |  25 events (all good), -1.25 - 15 sec, baseline -1.25 – 0 sec, ~258 kB, data loaded,
 'Concern': 12
 'Neutral': 12
 'Start': 1>
Loading /Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/../../FV_Prelim_Data/all_groups/sub-01/nirs/2023-02-17_002.snirf
Reading 0 ... 8713  =      0.000 ...   856.488 secs...
<Epochs |  25 events (all good), -1.25 - 15 sec, baseline -1.25 – 0 sec, ~258 kB, data loaded,
 'Concern': 12
 'Neutral': 12
 'Start': 1>
Loading /Users/nolanbrady/Desktop/fNIRs-dat

ValueError: too many values to unpack (expected 2)

In [None]:
glm_data = glm_analysis.create_design_matrix(all_data)


None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48
None None
48

In [None]:

df_cha, df_con = glm_analysis.create_glm_df(glm_data, columns_for_glm_contrast)


In [None]:
importlib.reload(glm_analysis)
importlib.reload(analysis_tools)

model = glm_analysis.group_level_glm_analysis(df_cha, columns_for_group_analysis)
significant_channels = analysis_tools.find_significant_channels(df_cha)
significant_channels



variable,Condition,df,mse,p_value,se,t,theta,Source,Detector,Chroma,Significant,ch_name
648,Neutral,38.0,8.573442e-11,0.037322,3.759364e-06,-2.157878,-0.000008,8,7,hbo,True,S8_D7 hbo
1332,Neutral,38.0,1.290143e-11,0.037322,1.458330e-06,2.157878,0.000003,8,7,hbr,True,S8_D7 hbr
2016,Neutral,38.0,8.573442e-11,0.037322,3.759364e-06,-2.157878,-0.000008,8,7,hbo,True,S8_D7 hbo
2700,Neutral,38.0,1.290143e-11,0.037322,1.458330e-06,2.157878,0.000003,8,7,hbr,True,S8_D7 hbr
3384,Neutral,38.0,8.573442e-11,0.037322,3.759364e-06,-2.157878,-0.000008,8,7,hbo,True,S8_D7 hbo
...,...,...,...,...,...,...,...,...,...,...,...,...
47181,Practice,38.0,7.055283e-12,0.004212,1.244802e-06,3.045002,0.000004,5,3,hbr,True,S5_D3 hbr
47219,Practice,38.0,3.328877e-12,0.016632,8.550510e-07,2.505473,0.000002,5,4,hbr,True,S5_D4 hbr
47257,Practice,38.0,1.238761e-11,0.015711,1.725760e-06,2.528980,0.000004,5,6,hbr,True,S5_D6 hbr
47406,Control,38.0,6.778732e-12,0.000317,1.289033e-06,3.961293,0.000005,7,7,hbr,True,S7_D7 hbr


In [None]:
importlib.reload(visualizations)
sub_dir = f'{path}/{group}'
raw_haemo = all_data[0]['raw_haemo']
visualizations.group_cortical_surface_projection(sub_dir, raw_haemo)

Using pyvistaqt 3d backend.

Could not estimate rigid Talairach alignment, using identity matrix


FileNotFoundError: File does not exist: /Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/../../LabResearch/IndependentStudy/Data/direct/fsaverage/surf/lh.pial

# Extract Evoked Amplitude


In [None]:
importlib.reload(analysis_tools)

# df = analysis_tools.extract_average_amplitudes(all_epochs, tmin, tmax)

df = analysis_tools.extract_channel_values(all_epochs)
df

Unnamed: 0,S1_D1 hbo,S1_D2 hbo,S2_D1 hbo,S2_D3 hbo,S3_D3 hbo,S3_D4 hbo,S4_D2 hbo,S4_D4 hbo,S4_D5 hbo,S5_D3 hbo,...,S4_D5 hbr,S5_D3 hbr,S5_D4 hbr,S5_D6 hbr,S6_D4 hbr,S6_D6 hbr,S7_D5 hbr,S7_D7 hbr,S8_D6 hbr,S8_D7 hbr
Practice-1,17.702929,-8.623968,-3.368849,8.721129,16.267715,15.884978,-11.438275,21.056053,8.86011,-2.719491,...,-2.791293,0.792263,-4.348873,-2.820148,-1.480844,-2.428397,-0.518181,-5.524335,-4.202776,-2.059823
Practice-2,11.03518,5.319438,10.583767,-3.949526,-0.629819,7.811384,-6.877663,2.189097,13.309548,3.758954,...,-4.193046,-1.095087,-0.612583,-2.458589,-0.36804,1.991679,-2.29226,-2.053164,12.333841,1.413435
Practice-3,25.229876,11.759789,7.745309,21.207158,14.645354,-4.449754,3.092528,-17.669199,28.78482,0.319342,...,-9.068383,-0.093033,-3.752188,1.779989,1.443567,-2.179939,4.519215,3.632492,15.759772,-1.561754
Practice-4,-4.550028,-5.282907,-2.697666,-3.860762,-8.345165,0.957357,5.960556,-1.663939,5.092744,-4.569828,...,-3.478242,1.511384,5.482388,2.521277,4.195866,1.241599,10.590122,-0.067321,-0.662879,-4.74692
Practice-5,-5.835436,-3.737674,-2.942152,-9.004813,-3.156599,-1.572402,-6.280808,-1.487244,-3.418762,-3.690404,...,2.334946,1.220531,1.30819,4.017965,0.305746,1.223866,2.882331,7.031397,5.167868,12.738443
Practice-6,-5.378223,-3.144947,6.29467,-4.129801,-5.645108,4.675895,-1.165414,3.36469,-2.105232,-13.118751,...,1.437831,4.338778,-1.360516,-1.611406,-2.947104,-0.252415,1.101141,-5.54379,-1.806421,-9.363525
Practice-7,-11.507705,-1.925583,-15.159979,-13.202584,-12.612608,-7.510918,-1.439651,-1.72208,-3.529647,2.527423,...,1.778572,-1.240545,1.829729,3.961041,3.21697,3.381514,3.433613,1.611971,-0.961802,4.945385
Practice-8,8.824151,-0.780372,-2.530528,3.389688,-12.332502,0.137297,-1.131462,-3.224559,2.508262,-1.337473,...,-1.263901,0.656477,5.8994,1.845734,3.301471,3.5103,1.522952,5.792254,-2.350076,1.948929
Practice-9,10.111093,0.252229,5.686241,12.638317,-17.265312,-2.289805,4.980699,1.36223,-2.32973,10.495255,...,1.17394,-5.151429,-2.14125,-2.83822,-14.378463,-6.248634,5.061772,-5.316438,-0.290552,-2.909934
Control-1,13.181988,15.509015,-0.258058,-8.030033,20.753472,3.101221,33.966527,8.026896,12.520304,-11.393447,...,-3.944402,3.319225,0.351815,1.217974,-0.316901,3.936225,-2.986308,-2.347648,8.9601,0.567344


In [None]:
#TODO: This should be fixed to create the DF properly but it's not imperetive to the project.
# more of a nice to have if anything.

# importlib.reload(analysis_tools)
# Returns all measurements taken with each column being a sample taking at a certain time and the rows are a participant hbo or hbr.
# amplitudes = analysis_tools.extract_all_amplitudes(all_epochs, tmin, tmax)
# amplitudes

# Statistical Tests

In [None]:
# Plot the Data
sns.catplot(x="Condition", y="Value", hue="ID", data=df.query("Chroma == 'hbo'"), ci=None, palette="muted", height=4, s=10)
plt.show()

UndefinedVariableError: name 'Chroma' is not defined

In [None]:
# Inflammatory vs Neutral Prompt Analysis

input_data = df.query("Condition in ['Neutral', 'Inflammatory']")
input_data = input_data.query("Chroma in ['hbo']")

model = smf.mixedlm("Value ~ Condition", input_data, groups=input_data["ID"]).fit()
model.summary()

In [None]:
# Control vs Neutral Prompt Analysis

input_data = df.query("Condition in ['Control', 'Neutral']")
input_data = input_data.query("Chroma in ['hbo']")

model = smf.mixedlm("Value ~ Condition", input_data, groups=input_data["ID"]).fit()
model.summary()

In [None]:
# Control vs Inflam

input_data = df.query("Condition in ['Control', 'Inflammatory']")
input_data = input_data.query("Chroma in ['hbo']")

model = smf.mixedlm("Value ~ Condition", input_data, groups=input_data["ID"]).fit()
model.summary()