In [58]:
# Import common libraries
import pandas as pd
import numpy as np
from copy import deepcopy
import importlib


# Import MNE processing
from mne.viz import plot_compare_evokeds
from mne import Epochs, events_from_annotations, set_log_level

# Scikit Learn
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Import StatsModels
import statsmodels.formula.api as smf

# Import Plotting Library
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict

# Internal Packages
import analysis_tools
import system_file_io
import quality_eval
import dynamic_interval_tools
import glm_analysis

# Reset the cache
importlib.reload(analysis_tools)
importlib.reload(system_file_io)
importlib.reload(dynamic_interval_tools)
importlib.reload(glm_analysis)



<module 'glm_analysis' from '/Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/glm_analysis.py'>

## Notes for Future Features

In [59]:
# TODO: Allowing for custom timestamps to be imported

"""
Apparently the way to do this is: 
    events = np.array([[sample, 0, event_id, timestamp], ...])
where:
    sample: the sample number of the event in the data. This is the position in the data where the event occurs.
    0: this column is usually used as a reserved column in MNE functions, it's not used in this case.
    event_id: an integer that identifies the type of event. This can be used to distinguish different types of triggers, for example, different stimulus conditions, button presses, or other experimental events.
    timestamp: the time in seconds of the event.
"""

"\nApparently the way to do this is: \n    events = np.array([[sample, 0, event_id, timestamp], ...])\nwhere:\n    sample: the sample number of the event in the data. This is the position in the data where the event occurs.\n    0: this column is usually used as a reserved column in MNE functions, it's not used in this case.\n    event_id: an integer that identifies the type of event. This can be used to distinguish different types of triggers, for example, different stimulus conditions, button presses, or other experimental events.\n    timestamp: the time in seconds of the event.\n"

In [60]:
# Tip for using machine learning for exploratory type data analysis

# Use principal component analysis (PCA) or independent component analysis (ICA), 
# to identify patterns in the fNIRS data that are not immediately obvious. 
# This can be used to identify latent neural networks or to identify different sources of signal variation.

# ==========================================================

# I'm curious how we could incorportate these into the data analsyis pipeline

# Anomaly detection
# Unsupervised learning algorithms such as one-class SVM, Autoencoder, and Isolation Forest can be used to identify outliers or abnormal patterns in the data.

# Time-series analysis
# Techniques such as time-series decomposition, ARIMA, and LSTM can be used to analyze the temporal dynamics of the fNIRS data and identify trends or patterns over time.

In [61]:
# Recurring values that we will allow for users to change in the GUI

# Length of the measured interval
interval_length = 15

# Time from trigger point being evaluated
tmin = -1
tmax = 15

# How you would like to rename the numeric triggers from Aurora
trigger_id = {'4': 'Control', '2': 'Neutral', '3': 'Inflam', '1':'Practice'}

# What files would you like to ignore while looping through subjects
ignore = [".DS_Store", "sub-03"]

# File path to the folder containing the BIDS data.
root_dir = '../../LabResearch/IndependentStudy/DataAnalysis'

# Mock data folder to test pulling meta data from folder structure.
path = "../../LabResearch/IndependentStudy/Data"

# Does the study have variable task lengths or is it consistent?
variable_epoch_time = True


## Find Groups and Subjects from Folder Structure

In [62]:
groups, df = system_file_io.import_data_folder(path, ignore)
df

Unnamed: 0,group,sub_name,snirf_path
0,anon,sub-06,../../LabResearch/IndependentStudy/Data/anon/s...
1,anon,sub-07,../../LabResearch/IndependentStudy/Data/anon/s...
2,anon,sub-05,../../LabResearch/IndependentStudy/Data/anon/s...
3,direct,sub-01,../../LabResearch/IndependentStudy/Data/direct...
4,direct,sub-02,../../LabResearch/IndependentStudy/Data/direct...
5,direct,sub-04,../../LabResearch/IndependentStudy/Data/direct...


# Loop through subjects for individual analysis

In [63]:
# TODO: Need to loop through this in order to get both groups
group = 'direct'
paths = df[df.eq(group).any(1)]['snirf_path'].to_numpy()
print(paths)

['../../LabResearch/IndependentStudy/Data/direct/sub-01/nirs/sub-01_task-AnonCom_nirs.snirf'
 '../../LabResearch/IndependentStudy/Data/direct/sub-02/nirs/sub-02_task-AnonCom_nirs.snirf'
 '../../LabResearch/IndependentStudy/Data/direct/sub-04/nirs/sub-04_task-AnonCom_nirs.snirf']


In [64]:
importlib.reload(quality_eval)
# Run a Signal Quality Check

# for path in paths:
    # quality_eval.evaluate_raw_signal(path)
    # quality_eval.evaluate_sci(path)


<module 'quality_eval' from '/Users/nolanbrady/Desktop/fNIRs-data-pipeline/python/quality_eval.py'>

In [None]:
# Works for both block and variable interval lengths
all_epochs, all_data = analysis_tools.aggregate_epochs(paths=paths, trigger_id=trigger_id, variable_epoch_time = variable_epoch_time)


In [71]:
importlib.reload(glm_analysis)
glm_data = glm_analysis.create_design_matrix(all_data)


ValueError: too many values to unpack (expected 3)

# Extract Evoked Amplitude


In [None]:
importlib.reload(analysis_tools)

# df = analysis_tools.extract_average_amplitudes(all_epochs, tmin, tmax)

df = analysis_tools.extract_channel_values(all_epochs)
df

Unnamed: 0,S1_D1 hbo,S1_D2 hbo,S2_D1 hbo,S2_D3 hbo,S3_D3 hbo,S3_D4 hbo,S4_D2 hbo,S4_D4 hbo,S4_D5 hbo,S5_D3 hbo,...,S4_D5 hbr,S5_D3 hbr,S5_D4 hbr,S5_D6 hbr,S6_D4 hbr,S6_D6 hbr,S7_D5 hbr,S7_D7 hbr,S8_D6 hbr,S8_D7 hbr
Practice-1,17.702929,-8.623968,-3.368849,8.721129,16.267715,15.884978,-11.438275,21.056053,8.86011,-2.719491,...,-2.791293,0.792263,-4.348873,-2.820148,-1.480844,-2.428397,-0.518181,-5.524335,-4.202776,-2.059823
Practice-2,11.03518,5.319438,10.583767,-3.949526,-0.629819,7.811384,-6.877663,2.189097,13.309548,3.758954,...,-4.193046,-1.095087,-0.612583,-2.458589,-0.36804,1.991679,-2.29226,-2.053164,12.333841,1.413435
Practice-3,25.229876,11.759789,7.745309,21.207158,14.645354,-4.449754,3.092528,-17.669199,28.78482,0.319342,...,-9.068383,-0.093033,-3.752188,1.779989,1.443567,-2.179939,4.519215,3.632492,15.759772,-1.561754
Practice-4,-4.550028,-5.282907,-2.697666,-3.860762,-8.345165,0.957357,5.960556,-1.663939,5.092744,-4.569828,...,-3.478242,1.511384,5.482388,2.521277,4.195866,1.241599,10.590122,-0.067321,-0.662879,-4.74692
Practice-5,-5.835436,-3.737674,-2.942152,-9.004813,-3.156599,-1.572402,-6.280808,-1.487244,-3.418762,-3.690404,...,2.334946,1.220531,1.30819,4.017965,0.305746,1.223866,2.882331,7.031397,5.167868,12.738443
Practice-6,-5.378223,-3.144947,6.29467,-4.129801,-5.645108,4.675895,-1.165414,3.36469,-2.105232,-13.118751,...,1.437831,4.338778,-1.360516,-1.611406,-2.947104,-0.252415,1.101141,-5.54379,-1.806421,-9.363525
Practice-7,-11.507705,-1.925583,-15.159979,-13.202584,-12.612608,-7.510918,-1.439651,-1.72208,-3.529647,2.527423,...,1.778572,-1.240545,1.829729,3.961041,3.21697,3.381514,3.433613,1.611971,-0.961802,4.945385
Practice-8,8.824151,-0.780372,-2.530528,3.389688,-12.332502,0.137297,-1.131462,-3.224559,2.508262,-1.337473,...,-1.263901,0.656477,5.8994,1.845734,3.301471,3.5103,1.522952,5.792254,-2.350076,1.948929
Practice-9,10.111093,0.252229,5.686241,12.638317,-17.265312,-2.289805,4.980699,1.36223,-2.32973,10.495255,...,1.17394,-5.151429,-2.14125,-2.83822,-14.378463,-6.248634,5.061772,-5.316438,-0.290552,-2.909934
Control-1,13.181988,15.509015,-0.258058,-8.030033,20.753472,3.101221,33.966527,8.026896,12.520304,-11.393447,...,-3.944402,3.319225,0.351815,1.217974,-0.316901,3.936225,-2.986308,-2.347648,8.9601,0.567344


In [None]:
#TODO: This should be fixed to create the DF properly but it's not imperetive to the project.
# more of a nice to have if anything.

# importlib.reload(analysis_tools)
# Returns all measurements taken with each column being a sample taking at a certain time and the rows are a participant hbo or hbr.
# amplitudes = analysis_tools.extract_all_amplitudes(all_epochs, tmin, tmax)
# amplitudes

# Statistical Tests

In [None]:
# Plot the Data
sns.catplot(x="Condition", y="Value", hue="ID", data=df.query("Chroma == 'hbo'"), ci=None, palette="muted", height=4, s=10)
plt.show()

UndefinedVariableError: name 'Chroma' is not defined

In [None]:
# Inflammatory vs Neutral Prompt Analysis

input_data = df.query("Condition in ['Neutral', 'Inflammatory']")
input_data = input_data.query("Chroma in ['hbo']")

model = smf.mixedlm("Value ~ Condition", input_data, groups=input_data["ID"]).fit()
model.summary()

In [None]:
# Control vs Neutral Prompt Analysis

input_data = df.query("Condition in ['Control', 'Neutral']")
input_data = input_data.query("Chroma in ['hbo']")

model = smf.mixedlm("Value ~ Condition", input_data, groups=input_data["ID"]).fit()
model.summary()

In [None]:
# Control vs Inflam

input_data = df.query("Condition in ['Control', 'Inflammatory']")
input_data = input_data.query("Chroma in ['hbo']")

model = smf.mixedlm("Value ~ Condition", input_data, groups=input_data["ID"]).fit()
model.summary()