In [129]:
# sphinx_gallery_thumbnail_number = 2

# Authors: Robert Luke <mail@robertluke.net>
#
# License: BSD (3-clause)

# Import common libraries
from collections import defaultdict
from copy import deepcopy
from itertools import compress
from pprint import pprint

# Import Plotting Library
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
import os
import pywt


# Import StatsModels
import statsmodels.formula.api as smf
from mne import Epochs, events_from_annotations, set_log_level
from mne.preprocessing.nirs import (
    beer_lambert_law,
    optical_density,
    scalp_coupling_index,
    temporal_derivative_distribution_repair,
    
)

# Import MNE processing
from mne.viz import plot_compare_evokeds

# Import MNE-BIDS processing
from mne_bids import BIDSPath, read_raw_bids


# Import MNE-NIRS processing
from mne_nirs.channels import get_long_channels, picks_pair_to_idx
from mne_nirs.datasets import fnirs_motor_group
from mne_nirs.signal_enhancement import (enhance_negative_correlation, short_channel_regression)
from mne_nirs.channels import (get_long_channels,
                               get_short_channels,
                               picks_pair_to_idx)

from collections import defaultdict
import numpy as np
from itertools import compress
from sklearn.decomposition import PCA
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from collections import defaultdict
from mne_bids import BIDSPath
from mne import EpochsArray
from mne.viz import plot_compare_evokeds
from copy import deepcopy
import mne
from statsmodels.formula.api import mixedlm


from scipy.stats import ttest_ind

# Set general parameters
set_log_level("WARNING")  # Don't show info, as it is repetitive for many subjects

# Preprocessing

In [125]:

# cropping the signal before sci calculation
def individual_analysis(bids_path):
    # Read data with annotations in BIDS format
    raw_intensity = read_raw_bids(bids_path=bids_path, verbose=False)
    # check if coordinates of the channels
    
    
    #print(raw_intensity.ch_names)

    
    # Get event timings
    #print("Extracting event timings...")
    Breaks, _ = mne.events_from_annotations(raw_intensity, {'Xstart': 4, 'Xend': 5})
    AllEvents, _ = mne.events_from_annotations(raw_intensity)
    Breaks = Breaks[:, 0] / raw_intensity.info['sfreq']
    LastEvent = AllEvents[-1, 0] / raw_intensity.info['sfreq']
    
    if len(Breaks) % 2 == 0:
        raise ValueError("Breaks array should have an odd number of elements.")
    
    original_duration = raw_intensity.times[-1] - raw_intensity.times[0]
    #print(f"Original duration: {original_duration:.2f} seconds")
    
    # Cropping dataset
    #print("Cropping the dataset...")
    cropped_intensity = raw_intensity.copy().crop(Breaks[0], Breaks[1])
    for j in range(2, len(Breaks) - 1, 2):
        block = raw_intensity.copy().crop(Breaks[j], Breaks[j + 1])
        cropped_intensity.append(block)
    cropped_intensity.append(raw_intensity.copy().crop(Breaks[-1], LastEvent + 15.25))
    
    cropped_duration = cropped_intensity.times[-1] - cropped_intensity.times[0]
    #print(f"Cropped duration: {cropped_duration:.2f} seconds")
    
    if cropped_duration >= original_duration:
        print(f"WARNING: Cropping did not reduce duration!")
    
    raw_intensity_cropped = cropped_intensity.copy()

    

    
    # Remove break annotations
    #print("Removing break annotations for the orginal raw...")
    raw_intensity.annotations.delete(np.where(
        (raw_intensity.annotations.description == 'Xstart') | 
        (raw_intensity.annotations.description == 'Xend') | 
        (raw_intensity.annotations.description == 'BAD boundary') | 
        (raw_intensity.annotations.description == 'EDGE boundary')
    )[0])
    
    print("Removing break annotations for the cropped raw...")
    raw_intensity_cropped.annotations.delete(np.where(
        (raw_intensity_cropped.annotations.description == 'Xstart') | 
        (raw_intensity_cropped.annotations.description == 'Xend') | 
        (raw_intensity_cropped.annotations.description == 'BAD boundary') | 
        (raw_intensity_cropped.annotations.description == 'EDGE boundary')
    )[0]) 
    
    
    
    # Convert signal to optical density and determine bad channels
    raw_od = optical_density(raw_intensity)
    raw_od_cropped = optical_density(raw_intensity_cropped)
    
    # get the total number of short channels
    short_chs = get_short_channels(raw_od)
    tot_number_of_short_channels = len(short_chs.ch_names)
    

    # sci calculated
    sci = scalp_coupling_index(raw_od_cropped, l_freq=0.7, h_freq=1.45)
    #sci = scalp_coupling_index(raw_od_cropped, h_freq=1.35)
    bad_channels= list(compress(raw_od.ch_names, sci < 0.8))
    
    if len(bad_channels) > 55:
        print(f"‚ùå Too many bad channels ({len(bad_channels)}). Excluding subject from analysis.")
        return None, None, None, None, None
    
    raw_od.info["bads"] = bad_channels
    raw_intensity_cropped.info["bads"] = bad_channels
    
    #print(f"Bad channels: {raw_od.info['bads']}")
    # print the number of bad channels
    #print(f"Number of bad channels: {len(raw_od.info['bads'])}")
    
    # Remove bad channels
   
    """ raw_od.drop_channels(bad_channels)
    raw_intensity_cropped.drop_channels(bad_channels) """
    
    raw_od = temporal_derivative_distribution_repair(raw_od)
    raw_od_cropped = temporal_derivative_distribution_repair(raw_od_cropped)

    
     # Get long channels
    long_chs = get_long_channels(raw_od)
    bad_long_chs = long_chs.info["bads"]
    
    """ print(f"Number of all (good and bad) short channels: {tot_number_of_short_channels}")
    print(f"Number of bad long channels: {len(bad_long_chs)}")
    print(f"Number of long and short bad channels: {len(bad_channels)}") """
    # print the number of short bad channels
    len_bad_short_chs = len(bad_channels) - len(bad_long_chs)
    num_good_short_channels = tot_number_of_short_channels - len_bad_short_chs
    # Print diagnostics
    """ print(f"Number of all (good and bad) short channels: {tot_number_of_short_channels}")
    print(f"Number of bad long channels: {len(bad_long_chs)}")
    print(f"Number of bad short channels: {len_bad_short_chs}")
    print(f"‚úÖ Number of good short channels: {num_good_short_channels}") """
    #print(f"Number of bad short channels: {len_bad_short_chs}")
    
    # Determine if there are short channels
    if num_good_short_channels < 4:
        print("‚ùå No short channels found. Skipping the subject.")
        return None, None, None, None, None # Keep the data unchanged
    else:
        #print("Applying short-channel regression.")
        raw_od_corrected = short_channel_regression(raw_od)
        #raw_od_corrected=raw_od.copy()
        # drop the bad channels
        #raw_od_corrected.drop_channels(bad_channels)
        
        # interpolate the bad channels
        #raw_od_corrected.interpolate_bads()
        
    # short-channel regression subtracts a scaled version of the signal obtained from the nearest short channel from the signal obtained from the long channel. 

    raw_haemo = beer_lambert_law(raw_od_corrected, ppf=0.1)
    
    #raw_haemo = get_long_channels(raw_haemo, min_dist=0.02, max_dist=0.04) # max_dist 40mm
    raw_haemo = get_long_channels(raw_haemo, min_dist=0.02) 
    
    
   
    

    # Convert to haemoglobin and filter
    
    # check the ppf
    
    #raw_haemo.plot_psd(average=True, show=True)
    
    """ raw_haemo = raw_haemo.filter(
    l_freq=0.01, h_freq=0.7, method="fir", fir_design="firwin", verbose=False,
    h_trans_bandwidth=0.3, l_trans_bandwidth=0.005) """
    
    # improved filter
    """ raw_haemo = raw_haemo.filter(
    l_freq=0.05, h_freq=0.2, method="fir", fir_design="firwin", verbose=False,
    h_trans_bandwidth=0.01, l_trans_bandwidth=0.01) """
    
    #raw_haemo.plot_psd(average=True, show=True)

    raw_haemo = raw_haemo.filter(l_freq = None, h_freq = 0.2,  
                                 method="iir", iir_params =dict(order=5, ftype='butter'))
     #high-pass
    raw_haemo= raw_haemo.filter(l_freq =  0.05, h_freq = None, method="iir", iir_params =dict(order=5, ftype='butter')) #t0.05 was cutoff in andreas analysis
      
    """ # drop the bad channels
    raw_heamo_good = raw_haemo.copy()
    
    raw_heamo_good.drop_channels(raw_haemo.info['bads']) """
    
    
    #raw_haemo.plot(n_channels=len(raw_haemo.ch_names), duration=5000,show_scrollbars=True )

    # Extract events but ignore those with
    # the word Ends (i.e. drop ExperimentEnds events)
    events, event_dict = events_from_annotations(
        raw_haemo, verbose=False, regexp="^(?![Ends]).*$"
    )
    epochs = Epochs(
        raw_haemo,
        events,
        event_id=event_dict,
        tmin=-5,
        tmax=15,
        reject=dict(hbo=100e-6),
        reject_by_annotation=True,
        proj=True,
        baseline=(None, 0),
        detrend=1,
        preload=True,
        verbose=None,
    )
    """ epochs['Control'].plot()
    epochs['Noise'].plot()
    epochs['Speech'].plot()
     """


    return raw_haemo, epochs, event_dict, raw_od, events

# Epoch Rejection

In [3]:
def reject_epochs(max_data, min_data, condition_name, thresholds, threshold_factor = 2, max_reject_ratio=0.25):
            mean_max = thresholds[condition_name]["mean_max"]
            mean_min = thresholds[condition_name]["mean_min"]
            std_max = thresholds[condition_name]["std_max"]
            std_min = thresholds[condition_name]["std_min"]

            upper_bound_max = mean_max + (threshold_factor * std_max)
            lower_bound_min = mean_min - (threshold_factor * std_min)

            rejection_scores = []  # (epoch_idx, num_bad_channels, total_violation)

            for epoch_idx in range(max_data.shape[0]):
                epoch_max = max_data[epoch_idx, :]
                epoch_min = min_data[epoch_idx, :]

                bad_max = epoch_max > upper_bound_max
                bad_min = epoch_min < lower_bound_min
                bad_channels = bad_max | bad_min

                num_bad_channels = np.sum(bad_channels)

                if num_bad_channels > 0:
                    max_violation = np.sum(epoch_max[bad_max] - upper_bound_max)
                    min_violation = np.sum(lower_bound_min - epoch_min[bad_min])
                    total_violation = max_violation + min_violation

                    rejection_scores.append((epoch_idx, num_bad_channels, total_violation))

            # Sort by: 1) num_bad_channels DESC, 2) total_violation DESC
            sorted_scores = sorted(rejection_scores, key=lambda x: (-x[1], -x[2]))

            max_allowed_rejections = int(max_reject_ratio * max_data.shape[0])
            rejected_epochs = [idx for idx, _, _ in sorted_scores[:max_allowed_rejections]]

            # Remove rejected epochs
            cleaned_max = np.delete(max_data, rejected_epochs, axis=0)
            cleaned_min = np.delete(min_data, rejected_epochs, axis=0)

            # Logging
            """ for idx, chs, dev in sorted_scores[:max_allowed_rejections]:
                print(f"üö´ Rejecting Epoch {idx} in {condition_name} - {chs} bad channels, total deviation {dev:.2e}")
            """
            return cleaned_max, cleaned_min, rejected_epochs

In [96]:
def plot_evoked_rois(evoked_dict, roi_definitions, output_path, title_prefix=""):
    # Number of ROIs (columns) to create
    fig, axes = plt.subplots(1, len(roi_definitions), figsize=(6 * len(roi_definitions), 5))
    if len(roi_definitions) == 1:
        axes = [axes]
    
    # Define colors for each condition
    condition_colors = {
        "Control": "r",  # Red for Control
        "Noise": "g",    # Green for Noise
        "Speech": "b"    # Blue for Speech
    }
    
    # Iterate through each ROI
    for ax, (roi_name, optodes) in zip(axes, roi_definitions.items()):
        # Create a dictionary to store evoked data for the legend
        evoked_for_legend = {}

        # For each condition, plot evoked responses with different colors
        for condition_name, evoked in evoked_dict.items():
            picks = [ch for ch in optodes if ch in evoked.ch_names]
            if not picks:
                print(f"‚ö†Ô∏è No channels found in ROI '{roi_name}' for {condition_name}")
                continue
            
            # Plot the evoked response for the current condition with the corresponding color
            plot_compare_evokeds(
                {condition_name: evoked},
                picks=picks,
                axes=ax,
                combine='mean',
                show=False,
                colors=[condition_colors.get(condition_name, 'k')],  # Default to black if not defined
                ylim=dict(hbo=[-5, 12])
            )

            # Store the evoked for the condition in the dictionary for the legend
            evoked_for_legend[condition_name] = evoked
        
        # Set the title for the ROI plot
        ax.set_title(f"{title_prefix} ROI: {roi_name}")
        
        # Create a legend with conditions
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles, labels, title="Conditions")
    
    # Save the plot
    plt.savefig(output_path)
    # Show the plot
    plt.show()

# WAA individual analysis

## ttest_ind approach

In [116]:

# === Setup ===
output_root = "individual_analysis_outputs"
os.makedirs(output_root, exist_ok=True)

bids_root = r"C:\\Datasets\\Test-retest study\\bids_dataset"

# Get subject folders from the dataset
subject_list = sorted([d for d in os.listdir(bids_root) if d.startswith("sub-")])
subject_list = [s.replace("sub-", "") for s in subject_list]  # Extract subject numbers
#subject_list= subject_list[:1]

print("Detected subjects:", subject_list)
id = 0
all_data = pd.DataFrame()
ttest_results = []  # Store t-test results here

# === Start processing ===
for sub in subject_list:
    for ses in range(1, 3):
        bids_path = BIDSPath(
            subject=f"{sub}",
            session=f"{ses:02d}",
            task="auditory",
            datatype="nirs",
            root=bids_root,
            suffix="nirs",
            extension=".snirf",
        )

        raw_haemo, epochs, event_dict, raw_od, events = individual_analysis(bids_path)

        if raw_haemo is None or len(epochs) < 10:
            print(f"‚ö†Ô∏è Skipping Subject {sub}, Session {ses:02d} (insufficient data)")
            continue

        output_dir = os.path.join(output_root, f"sub-{sub}", f"ses-{ses:02d}")
        os.makedirs(output_dir, exist_ok=True)

        # === EPOCH REJECTION ===
        bad_ch = epochs.info['bads']
        epochs.drop_channels(bad_ch)
        epochs_before_cleaning = epochs.copy()

        hbo_data = epochs.copy().pick("hbo")
        data = hbo_data.get_data()
        ev = epochs.events[:, 2]
        index_column = np.arange(0, len(ev)).reshape(-1, 1)
        updated_matrix = np.hstack((index_column, ev.reshape(-1, 1)))

        time_slice = data[:, :, 26:105]
        max_values = np.max(time_slice, axis=2)
        min_values = np.min(time_slice, axis=2)

        control_idx = updated_matrix[updated_matrix[:, 1] == 1][:, 0]
        noise_idx = updated_matrix[updated_matrix[:, 1] == 2][:, 0]
        speech_idx = updated_matrix[updated_matrix[:, 1] == 3][:, 0]

        control_max = max_values[control_idx, :]
        noise_max = max_values[noise_idx, :]
        speech_max = max_values[speech_idx, :]

        control_min = min_values[control_idx, :]
        noise_min = min_values[noise_idx, :]
        speech_min = min_values[speech_idx, :]

        thresholds = {
            "Control": {"mean_max": 4.16e-6, "mean_min": -4.23e-6, "std_max": 2.51e-6, "std_min": 2.32e-6},
            "Noise": {"mean_max": 6.08e-6, "mean_min": -3.61e-6, "std_max": 2.55e-6, "std_min": 1.54e-6},
            "Speech": {"mean_max": 7.16e-6, "mean_min": -3.88e-6, "std_max": 3.16e-6, "std_min": 1.5e-6},
        }

        control_max_cleaned, control_min_cleaned, rejected_control = reject_epochs(control_max, control_min, "Control", thresholds)
        noise_max_cleaned, noise_min_cleaned, rejected_noise = reject_epochs(noise_max, noise_min, "Noise", thresholds)
        speech_max_cleaned, speech_min_cleaned, rejected_speech = reject_epochs(speech_max, speech_min, "Speech", thresholds)

        epochs_cleaned = epochs_before_cleaning.copy()
        epochs_cleaned = epochs_cleaned.drop([idx for idx in rejected_control + rejected_noise + rejected_speech])
        
        # === Plot AFTER cleaning ===
        indiv_evoked_after = defaultdict(list)
        for condition in event_dict:
            indiv_evoked_after[condition] = epochs_cleaned[condition].average()
        
        data_driven_rois = {
            "Noise_ROI": [ "S10_D11 hbo","S5_D3 hbo", "S11_D11 hbo","S10_D9 hbo", "S10_D12 hbo", "S10_D10 hbo" ],
            "Speech_ROI": ["S10_D11 hbo", "S10_D10 hbo",  "S10_D12 hbo", "S11_D12 hbo","S1_D1 hbo","S10_D9 hbo","S8_D7 hbo","S7_D7 hbo", "S8_D8 hbo"],
            "Common_ROI": [ "S10_D11 hbo","S10_D9 hbo", "S10_D12 hbo", "S10_D10 hbo" ],
            "Only_Noise_ROI": ["S5_D3 hbo", "S11_D11 hbo"],
            "Only_Speech_ROI": [ "S11_D12 hbo","S1_D1 hbo", "S8_D7 hbo","S7_D7 hbo", "S8_D8 hbo"],  # fixed comma
        }
        
        predefined_rois = {
            "Left_Auditory": ["S4_D2 hbo", "S4_D3 hbo", "S5_D2 hbo", "S5_D3 hbo", "S5_D4 hbo", "S5_D5 hbo"],
            "Right_Auditory": ["S10_D9 hbo", "S10_D10 hbo", "S10_D11 hbo", "S10_D12 hbo", "S11_D11 hbo", "S11_D12 hbo"],
            "Visual": ["S6_D6 hbo", "S6_D8 hbo", "S7_D6 hbo", "S7_D7 hbo","S8_D7 hbo", "S8_D8 hbo", "S9_D8 hbo", "S7_D8 hbo"],
            "Front": ["S1_D1 hbo", "S2_D1 hbo", "S3_D1 hbo", "S3_D2 hbo","S12_D1 hbo"]
        }

        
        """ plot_evoked_rois(
            evoked_dict=indiv_evoked_after,
            roi_definitions=data_driven_rois,
            output_path=os.path.join(output_dir, "data_driven_rois.png"),
            title_prefix="Data-Driven ROIs"
        )
        
        plot_evoked_rois(
            evoked_dict=indiv_evoked_after,
            roi_definitions=predefined_rois,
            output_path=os.path.join(output_dir, "predefined_rois.png"),
            title_prefix="Predefined ROIs"
        )
           
         """

        # === INDIVIDUAL PER-OPTODE T-TEST ===
        tmin, tmax = 4, 7
        hbo_optodes = [ch for ch in epochs_cleaned.info["ch_names"] if "hbo" in ch]

        for opt in hbo_optodes:
            try:
                epochs_ctrl = epochs_cleaned["Control"].copy().pick(opt).pick("hbo").crop(tmin=tmin, tmax=tmax).get_data()
                epochs_noise = epochs_cleaned["Noise"].copy().pick(opt).pick("hbo").crop(tmin=tmin, tmax=tmax).get_data()
                epochs_speech = epochs_cleaned["Speech"].copy().pick(opt).pick("hbo").crop(tmin=tmin, tmax=tmax).get_data()

                ctrl_vals = epochs_ctrl.max(axis=2).flatten() 
                noise_vals = epochs_noise.max(axis=2).flatten() 
                speech_vals = epochs_speech.max(axis=2).flatten() 

                t_ctrl_vs_noise, p_ctrl_vs_noise = ttest_ind(ctrl_vals, noise_vals)
                t_ctrl_vs_speech, p_ctrl_vs_speech = ttest_ind(ctrl_vals, speech_vals)

                ttest_results.extend([
                    {"Subject": sub, "Session": f"{ses:02d}", "Optode": opt, "Comparison": "Control vs Noise", "t_stat": t_ctrl_vs_noise, "p_value": p_ctrl_vs_noise},
                    {"Subject": sub, "Session": f"{ses:02d}", "Optode": opt, "Comparison": "Control vs Speech", "t_stat": t_ctrl_vs_speech, "p_value": p_ctrl_vs_speech},
                ])
            except Exception as e:
                print(f"‚ùå Failed t-test for {opt} in Subject {sub}, Session {ses:02d}: {e}")

# === Save results ===
df_ttest = pd.DataFrame(ttest_results)
df_ttest.to_csv("per_optode_ttest_results.csv", index=False)

print("\n‚úÖ Done with per-optode t-tests!")


Detected subjects: ['01', '02', '03', '04', '05', '07', '08', '10', '11', '12', '13', '16', '17', '19', '21', '24']
Removing break annotations for the orginal raw...
Removing break annotations for the cropped raw...
Bad channels: ['S4_D14 785', 'S4_D14 830', 'S5_D15 785', 'S5_D15 830', 'S6_D6 785', 'S6_D6 830', 'S6_D8 785', 'S6_D8 830', 'S7_D6 785', 'S7_D6 830', 'S7_D8 785', 'S7_D8 830', 'S8_D17 785', 'S8_D17 830', 'S9_D8 785', 'S9_D8 830']
Number of bad channels: 16
Number of all (good and bad) short channels: 16
Number of bad long channels: 10
Number of bad short channels: 6
‚úÖ Number of good short channels: 10
Removing break annotations for the orginal raw...
Removing break annotations for the cropped raw...
Bad channels: ['S2_D13 785', 'S2_D13 830', 'S7_D6 785', 'S7_D6 830', 'S7_D8 785', 'S7_D8 830', 'S8_D7 785', 'S8_D7 830', 'S8_D17 785', 'S8_D17 830', 'S9_D8 785', 'S9_D8 830', 'S10_D18 785', 'S10_D18 830', 'S12_D20 785', 'S12_D20 830']
Number of bad channels: 16
Number of all (g

In [117]:
import pandas as pd

# Set significance threshold
alpha = 0.05
df_ttest["Significant"] = df_ttest["p_value"] < alpha

# Define your ROIs
roi_definitions = {
    "Noise_ROI": [ "S10_D11 hbo","S5_D3 hbo", "S11_D11 hbo","S10_D9 hbo", "S10_D12 hbo", "S10_D10 hbo" ],
    "Speech_ROI": ["S10_D11 hbo", "S10_D10 hbo",  "S10_D12 hbo", "S11_D12 hbo","S1_D1 hbo","S10_D9 hbo","S8_D7 hbo","S7_D7 hbo", "S8_D8 hbo"],
    "Common_ROI": [ "S10_D11 hbo","S10_D9 hbo", "S10_D12 hbo", "S10_D10 hbo" ],
    "Only_Noise_ROI": ["S5_D3 hbo", "S11_D11 hbo"],
    "Only_Speech_ROI": [ "S11_D12 hbo","S1_D1 hbo", "S8_D7 hbo","S7_D7 hbo", "S8_D8 hbo"],  # fixed comma
    "Left_Auditory": ["S4_D2 hbo", "S4_D3 hbo", "S5_D2 hbo", "S5_D3 hbo", "S5_D4 hbo", "S5_D5 hbo"],
    "Right_Auditory": ["S10_D9 hbo", "S10_D10 hbo", "S10_D11 hbo", "S10_D12 hbo", "S11_D11 hbo", "S11_D12 hbo"],
    "Visual": ["S6_D6 hbo", "S6_D8 hbo", "S7_D6 hbo", "S7_D7 hbo","S8_D7 hbo", "S8_D8 hbo", "S9_D8 hbo", "S7_D8 hbo"],
    "Front": ["S1_D1 hbo", "S2_D1 hbo", "S3_D1 hbo", "S3_D2 hbo","S12_D1 hbo"]
}

# === ROI summary ===
summary = []

grouped = df_ttest[df_ttest['Significant']].groupby(['Subject', 'Session', 'Comparison'])

for (sub, ses, comp), group in grouped:
    sig_optodes = group['Optode'].unique()

    for roi_name, optodes in roi_definitions.items():
        active_optodes = [opt for opt in sig_optodes if opt in optodes]
        count = len(active_optodes)

        summary.append({
            "Subject": sub,
            "Session": ses,
            "Comparison": comp,
            "ROI": roi_name,
            "Num_Significant_Optodes": count,
            "Total_Optodes_in_ROI": len(optodes),
            "Percentage_Active": count / len(optodes) * 100,
            "Significant_Optodes_in_ROI": ", ".join(active_optodes)
        })

df_roi_summary = pd.DataFrame(summary)

# === General summary per subject ===
sig_optode_summary = (
    df_ttest[df_ttest["Significant"]]
    .groupby(["Subject", "Session", "Comparison"])["Optode"]
    .unique()
    .reset_index()
)

sig_optode_summary["Significant_Optodes"] = sig_optode_summary["Optode"].apply(lambda x: ", ".join(x))
sig_optode_summary["Num_Significant_Optodes"] = sig_optode_summary["Optode"].apply(len)
sig_optode_summary = sig_optode_summary.drop(columns=["Optode"])

# Optional: Save to CSV
df_roi_summary.to_csv("roi_activation_summary.csv", index=False)
sig_optode_summary.to_csv("subject_significant_optodes_summary.csv", index=False)

print("‚úÖ ROI summary and optode summary saved.")


‚úÖ ROI summary and optode summary saved.


In [118]:
sig_optode_summary

Unnamed: 0,Subject,Session,Comparison,Significant_Optodes,Num_Significant_Optodes
0,1,1,Control vs Noise,"S5_D3 hbo, S7_D7 hbo, S8_D7 hbo, S10_D11 hbo, ...",5
1,1,1,Control vs Speech,"S8_D7 hbo, S10_D11 hbo, S10_D12 hbo, S11_D11 h...",5
2,1,2,Control vs Noise,S10_D12 hbo,1
3,1,2,Control vs Speech,"S10_D10 hbo, S10_D12 hbo, S11_D11 hbo",3
4,2,1,Control vs Speech,S11_D12 hbo,1
5,2,2,Control vs Noise,"S5_D2 hbo, S5_D3 hbo, S5_D4 hbo, S5_D5 hbo, S1...",11
6,2,2,Control vs Speech,"S4_D3 hbo, S5_D4 hbo, S5_D5 hbo, S10_D9 hbo, S...",10
7,3,2,Control vs Noise,"S6_D8 hbo, S9_D8 hbo, S10_D11 hbo",3
8,3,2,Control vs Speech,"S1_D1 hbo, S8_D7 hbo, S9_D8 hbo, S10_D11 hbo",4
9,4,1,Control vs Noise,S10_D12 hbo,1


## lme model approach

In [None]:

output_root = "individual_analysis_outputs"
os.makedirs(output_root, exist_ok=True)

bids_root = r"C:\\Datasets\\Test-retest study\\bids_dataset"
subject_list = sorted([d for d in os.listdir(bids_root) if d.startswith("sub-")])
subject_list = [s.replace("sub-", "") for s in subject_list]
print("Detected subjects:", subject_list)

results_lme = []

for sub in subject_list:
    for ses in range(1, 3):
        bids_path = BIDSPath(
            subject=sub,
            session=f"{ses:02d}",
            task="auditory",
            datatype="nirs",
            root=bids_root,
            suffix="nirs",
            extension=".snirf",
        )
        print(f"Processing Subject {sub}, Session {ses:02d}")
        raw_haemo, epochs, event_dict, raw_od, events = individual_analysis(bids_path)
        if raw_haemo is None or len(epochs) < 10:
            continue

        # Drop bad channels & clean epochs as before
        bad_ch = epochs.info['bads']
        epochs.drop_channels(bad_ch)
        epochs_cleaned = epochs.copy()  # Assume cleaned

        hbo_optodes = [ch for ch in epochs_cleaned.info["ch_names"] if "hbo" in ch]
        tmin, tmax = 4, 6

        for opt in hbo_optodes:
            try:
                # Prepare list for epoch-level data
                optode_epochs_data = []

                for cond_label, cond_name in [(1, "Control"), (2, "Noise"), (3, "Speech")]:
                    # Pick epochs for condition and optode, crop time window
                    cond_epochs = epochs_cleaned[cond_name].copy().pick(opt).pick("hbo").crop(tmin=tmin, tmax=tmax)
                    activations = cond_epochs.get_data().mean(axis=2).flatten()  # Max activation per epoch

                    # Append each epoch's activation and condition
                    for act in activations:
                        optode_epochs_data.append({
                            "Activation": act,
                            "Condition": cond_name
                        })

                # Create DataFrame for LME
                df_lme = pd.DataFrame(optode_epochs_data)
                df_lme["Condition"] = pd.Categorical(df_lme["Condition"], categories=["Control", "Noise", "Speech"])

                # Fit LME with random intercept per epoch index to capture variability (optional)
                # Here, we don't have explicit epoch ID, so random intercept by subject is trivial.
                # Since within a single subject and session, no grouping variable, use OLS with categorical predictor:
                # But better to include epoch as group for LME, so we create epoch IDs:

                df_lme["EpochID"] = df_lme.index  # Simple unique id per epoch

                model = mixedlm("Activation ~ Condition", df_lme, groups=df_lme["EpochID"])
                result = model.fit()

                # Extract p-values and coefficients for Noise and Speech vs Control
                p_noise = result.pvalues.get("Condition[T.Noise]", np.nan)
                coef_noise = result.params.get("Condition[T.Noise]", np.nan)
                p_speech = result.pvalues.get("Condition[T.Speech]", np.nan)
                coef_speech = result.params.get("Condition[T.Speech]", np.nan)

                # Store results per optode
                results_lme.append({
                    "Subject": sub,
                    "Session": f"{ses:02d}",
                    "Optode": opt,
                    "Comparison": "Control vs Noise",
                    "Effect": coef_noise,
                    "p_value": p_noise,
                })

                results_lme.append({
                    "Subject": sub,
                    "Session": f"{ses:02d}",
                    "Optode": opt,
                    "Comparison": "Control vs Speech",
                    "Effect": coef_speech,
                    "p_value": p_speech,
                })

            except Exception as e:
                print(f"Error in LME for Sub {sub} Ses {ses:02d} Optode {opt}: {e}")
                continue

df_results = pd.DataFrame(results_lme)
df_results.to_csv("lme_per_optode_per_subject_session.csv", index=False)
print("LME per-epoch per-optode analysis done!")


Detected subjects: ['01', '02', '03', '04', '05', '07', '08', '10', '11', '12', '13', '16', '17', '19', '21', '24']
Processing Subject 01, Session 01
Removing break annotations for the cropped raw...
Processing Subject 01, Session 02
Removing break annotations for the cropped raw...
Processing Subject 02, Session 01
Removing break annotations for the cropped raw...
Processing Subject 02, Session 02
Removing break annotations for the cropped raw...
Processing Subject 03, Session 01
Removing break annotations for the cropped raw...
Processing Subject 03, Session 02
Removing break annotations for the cropped raw...
Processing Subject 04, Session 01
Removing break annotations for the cropped raw...
Processing Subject 04, Session 02
Removing break annotations for the cropped raw...
Processing Subject 05, Session 01
Removing break annotations for the cropped raw...
Processing Subject 05, Session 02
Removing break annotations for the cropped raw...
Processing Subject 07, Session 01
Removing b

In [130]:
# Set significance threshold
alpha = 0.05
df_results["Significant"] = df_results["p_value"] < alpha

# === General summary per subject/session/comparison: significant optodes list and count ===
sig_optode_summary = (
    df_results[df_results["Significant"]]
    .groupby(["Subject", "Session", "Comparison"])["Optode"]
    .unique()
    .reset_index()
)

sig_optode_summary["Significant_Optodes"] = sig_optode_summary["Optode"].apply(lambda x: ", ".join(x))
sig_optode_summary["Num_Significant_Optodes"] = sig_optode_summary["Optode"].apply(len)
sig_optode_summary = sig_optode_summary.drop(columns=["Optode"])

sig_optode_summary


Unnamed: 0,Subject,Session,Comparison,Significant_Optodes,Num_Significant_Optodes
0,01,01,Control vs Noise,"S1_D1 hbo, S4_D3 hbo, S5_D2 hbo, S5_D3 hbo, S5...",14
1,01,01,Control vs Speech,"S3_D1 hbo, S4_D3 hbo, S5_D3 hbo, S5_D4 hbo, S7...",10
2,01,02,Control vs Noise,"S1_D1 hbo, S3_D1 hbo, S5_D3 hbo, S5_D4 hbo, S5...",11
3,01,02,Control vs Speech,"S5_D4 hbo, S10_D9 hbo, S10_D10 hbo, S10_D11 hb...",7
4,02,01,Control vs Noise,"S4_D3 hbo, S9_D8 hbo",2
...,...,...,...,...,...
57,21,02,Control vs Speech,"S1_D1 hbo, S5_D3 hbo, S5_D5 hbo, S7_D7 hbo, S1...",8
58,24,01,Control vs Noise,"S6_D8 hbo, S8_D8 hbo",2
59,24,01,Control vs Speech,"S3_D2 hbo, S6_D6 hbo, S8_D8 hbo, S10_D9 hbo, S...",8
60,24,02,Control vs Noise,"S3_D1 hbo, S3_D2 hbo, S4_D2 hbo, S5_D2 hbo, S5...",13


In [133]:
import pandas as pd

# === Significance threshold ===
alpha = 0.05
df_results["Significant"] = df_results["p_value"] < alpha

# === ROI definitions ===
roi_definitions = {
    "Noise_ROI": ["S10_D11 hbo", "S5_D3 hbo", "S11_D11 hbo", "S10_D9 hbo", "S10_D12 hbo", "S10_D10 hbo"],
    "Speech_ROI": ["S10_D11 hbo", "S10_D10 hbo", "S10_D12 hbo", "S11_D12 hbo", "S1_D1 hbo", "S10_D9 hbo", "S8_D7 hbo", "S7_D7 hbo", "S8_D8 hbo"],
    "Common_ROI": ["S10_D11 hbo", "S10_D9 hbo", "S10_D12 hbo", "S10_D10 hbo"],
    "Only_Noise_ROI": ["S5_D3 hbo", "S11_D11 hbo"],
    "Only_Speech_ROI": ["S11_D12 hbo", "S1_D1 hbo", "S8_D7 hbo", "S7_D7 hbo", "S8_D8 hbo"],
    "Left_Auditory": ["S4_D2 hbo", "S4_D3 hbo", "S5_D2 hbo", "S5_D3 hbo", "S5_D4 hbo", "S5_D5 hbo"],
    "Right_Auditory": ["S10_D9 hbo", "S10_D10 hbo", "S10_D11 hbo", "S10_D12 hbo", "S11_D11 hbo", "S11_D12 hbo"],
    "Visual": ["S6_D6 hbo", "S6_D8 hbo", "S7_D6 hbo", "S7_D7 hbo", "S8_D7 hbo", "S8_D8 hbo", "S9_D8 hbo", "S7_D8 hbo"],
    "Front": ["S1_D1 hbo", "S2_D1 hbo", "S3_D1 hbo", "S3_D2 hbo", "S12_D1 hbo"]
}

# === General optode summary ===
sig_optode_summary = (
    df_results[df_results["Significant"]]
    .groupby(["Subject", "Session", "Comparison"])["Optode"]
    .unique()
    .reset_index()
)

sig_optode_summary["Significant_Optodes"] = sig_optode_summary["Optode"].apply(lambda x: ", ".join(sorted(x)))
sig_optode_summary["Num_Significant_Optodes"] = sig_optode_summary["Optode"].apply(len)
sig_optode_summary = sig_optode_summary.drop(columns=["Optode"])

# === ROI summaries for each comparison ===
summary_noise = []
summary_speech = []

grouped = df_results[df_results["Significant"]].groupby(["Subject", "Session", "Comparison"])

for (sub, ses, comp), group in grouped:
    sig_optodes = group["Optode"].unique()

    for roi_name, optodes in roi_definitions.items():
        active_optodes = [opt for opt in sig_optodes if opt in optodes]
        count = len(active_optodes)
        row = {
            "Subject": sub,
            "Session": ses,
            "Comparison": comp,
            "ROI": roi_name,
            "Num_Significant_Optodes": count,
            "Total_Optodes_in_ROI": len(optodes),
            "Percentage_Active": count / len(optodes) * 100,
            "Significant_Optodes_in_ROI": ", ".join(sorted(active_optodes))
        }

        if comp == "Control vs Noise":
            summary_noise.append(row)
        elif comp == "Control vs Speech":
            summary_speech.append(row)

# Convert to DataFrames
df_roi_summary_noise = pd.DataFrame(summary_noise)
df_roi_summary_speech = pd.DataFrame(summary_speech)



In [136]:
df_roi_summary_noise

Unnamed: 0,Subject,Session,Comparison,ROI,Num_Significant_Optodes,Total_Optodes_in_ROI,Percentage_Active,Significant_Optodes_in_ROI
0,01,01,Control vs Noise,Noise_ROI,5,6,83.333333,"S10_D11 hbo, S10_D12 hbo, S10_D9 hbo, S11_D11 ..."
1,01,01,Control vs Noise,Speech_ROI,8,9,88.888889,"S10_D11 hbo, S10_D12 hbo, S10_D9 hbo, S11_D12 ..."
2,01,01,Control vs Noise,Common_ROI,3,4,75.000000,"S10_D11 hbo, S10_D12 hbo, S10_D9 hbo"
3,01,01,Control vs Noise,Only_Noise_ROI,2,2,100.000000,"S11_D11 hbo, S5_D3 hbo"
4,01,01,Control vs Noise,Only_Speech_ROI,5,5,100.000000,"S11_D12 hbo, S1_D1 hbo, S7_D7 hbo, S8_D7 hbo, ..."
...,...,...,...,...,...,...,...,...
265,24,02,Control vs Noise,Only_Speech_ROI,2,5,40.000000,"S11_D12 hbo, S8_D8 hbo"
266,24,02,Control vs Noise,Left_Auditory,4,6,66.666667,"S4_D2 hbo, S5_D2 hbo, S5_D3 hbo, S5_D4 hbo"
267,24,02,Control vs Noise,Right_Auditory,4,6,66.666667,"S10_D10 hbo, S10_D11 hbo, S11_D11 hbo, S11_D12..."
268,24,02,Control vs Noise,Visual,2,8,25.000000,"S6_D8 hbo, S8_D8 hbo"


### Noise

In [137]:
# Choose your input DataFrame (noise or speech)
df_roi_summary = df_roi_summary_noise  # or df_roi_summary_speech

# === Find most activated ROI(s) per subject/session ===
def get_max_rois(group):
    max_percentage = group["Percentage_Active"].max()
    top_rois = group[group["Percentage_Active"] == max_percentage]
    return pd.Series({
        "Most_Activated_ROI": ", ".join(sorted(top_rois["ROI"].unique())),
        "Max_Percentage_Active": max_percentage
    })

df_max_roi_noise = (
    df_roi_summary
    .groupby(["Subject", "Session", "Comparison"])
    .apply(get_max_rois)
    .reset_index()
)

df_max_roi_noise


Unnamed: 0,Subject,Session,Comparison,Most_Activated_ROI,Max_Percentage_Active
0,1,1,Control vs Noise,"Only_Noise_ROI, Only_Speech_ROI",100.0
1,1,2,Control vs Noise,Common_ROI,75.0
2,2,1,Control vs Noise,Left_Auditory,16.666667
3,2,2,Control vs Noise,"Common_ROI, Noise_ROI, Only_Noise_ROI, Right_A...",100.0
4,3,1,Control vs Noise,Only_Noise_ROI,50.0
5,3,2,Control vs Noise,Common_ROI,100.0
6,4,1,Control vs Noise,Only_Noise_ROI,100.0
7,4,2,Control vs Noise,Only_Noise_ROI,50.0
8,5,1,Control vs Noise,"Common_ROI, Visual",25.0
9,5,2,Control vs Noise,Common_ROI,75.0


In [142]:
min_optodes = 1  # Set your desired threshold

# Start collecting results
roi_occurrences = []

# Loop through each ROI
for roi in df_roi_summary_noise['ROI'].unique():  # Or df_max_roi_speech['ROI']
    filtered = df_roi_summary_noise[
        (df_roi_summary_noise['ROI'] == roi) &
        (df_roi_summary_noise['Num_Significant_Optodes'] >= min_optodes)
    ]
    
    sub_ses_list = filtered.apply(lambda row: f"{row['Subject']}-{row['Session']}", axis=1).tolist()
    
    roi_occurrences.append({
        "ROI": roi,
        "Num_Times_Appearing": len(sub_ses_list),
        "Subjects_Sessions": ", ".join(sub_ses_list)
    })

# Create final DataFrame
df_roi_appearance_summary_noise = pd.DataFrame(roi_occurrences).sort_values(by="Num_Times_Appearing", ascending=False).reset_index(drop=True)
df_roi_appearance_summary_noise



Unnamed: 0,ROI,Num_Times_Appearing,Subjects_Sessions
0,Noise_ROI,25,"01-01, 01-02, 02-02, 03-01, 03-02, 04-01, 04-0..."
1,Speech_ROI,25,"01-01, 01-02, 02-02, 03-01, 03-02, 04-01, 05-0..."
2,Left_Auditory,24,"01-01, 01-02, 02-01, 02-02, 03-01, 03-02, 04-0..."
3,Right_Auditory,24,"01-01, 01-02, 02-02, 03-01, 03-02, 04-01, 04-0..."
4,Common_ROI,22,"01-01, 01-02, 02-02, 03-01, 03-02, 04-01, 05-0..."
5,Visual,20,"01-01, 01-02, 02-01, 03-01, 03-02, 04-01, 05-0..."
6,Front,20,"01-01, 01-02, 02-02, 03-02, 04-01, 04-02, 05-0..."
7,Only_Speech_ROI,18,"01-01, 01-02, 02-02, 03-01, 03-02, 04-01, 05-0..."
8,Only_Noise_ROI,14,"01-01, 01-02, 02-02, 03-01, 03-02, 04-01, 04-0..."


In [145]:
# Choose your ROI summary dataframe
df = df_roi_summary_noise  # or df_roi_summary_speech

# Filter rows where no optodes were significantly active
no_activation_rows = df[df["Num_Significant_Optodes"] == 0].copy()

# Create a combined identifier for subject-session
no_activation_rows["Subject_Session"] = no_activation_rows["Subject"] + "-" + no_activation_rows["Session"]

# Group by ROI and aggregate
roi_no_activation_summary = (
    no_activation_rows.groupby("ROI")["Subject_Session"]
    .agg([("Num_No_Activations", "count"), ("Subject_Sessions", lambda x: ", ".join(sorted(x)))])
    .reset_index()
)

# Show or export the result
roi_no_activation_summary
# roi_no_activation_summary.to_csv("roi_no_activation_summary.csv", index=False)


Unnamed: 0,ROI,Num_No_Activations,Subject_Sessions
0,Common_ROI,8,"02-01, 04-02, 12-01, 13-02, 16-01, 19-01, 21-0..."
1,Front,10,"02-01, 03-01, 05-01, 07-01, 11-01, 12-02, 13-0..."
2,Left_Auditory,6,"07-01, 10-02, 16-01, 17-02, 19-01, 24-01"
3,Noise_ROI,5,"02-01, 12-01, 16-01, 19-01, 24-01"
4,Only_Noise_ROI,16,"02-01, 05-01, 07-01, 07-02, 08-01, 10-02, 11-0..."
5,Only_Speech_ROI,12,"02-01, 04-02, 07-01, 07-02, 10-02, 11-01, 12-0..."
6,Right_Auditory,6,"02-01, 12-01, 16-01, 19-01, 21-01, 24-01"
7,Speech_ROI,5,"02-01, 04-02, 12-01, 16-01, 19-01"
8,Visual,10,"02-02, 04-02, 08-02, 11-02, 12-01, 12-02, 13-0..."


In [139]:
from collections import Counter

# Assume df_max_roi is already defined (from the previous step)

# Step 1: Flatten all ROI names into a list
all_rois = df_max_roi_noise["Most_Activated_ROI"].str.split(", ")
flattened_rois = [roi for sublist in all_rois for roi in sublist]

# Step 2: Count occurrences
roi_counts = Counter(flattened_rois)

# Step 3: Convert to DataFrame for sorting and display
df_roi_counts = pd.DataFrame(roi_counts.items(), columns=["ROI", "Count"])
df_roi_counts_noise = df_roi_counts.sort_values(by="Count", ascending=False).reset_index(drop=True)

df_roi_counts_noise


Unnamed: 0,ROI,Count
0,Only_Noise_ROI,10
1,Common_ROI,9
2,Left_Auditory,6
3,Front,5
4,Visual,3
5,Right_Auditory,3
6,Only_Speech_ROI,2
7,Noise_ROI,1


### Speech

In [138]:
# Choose your input DataFrame (noise or speech)
df_roi_summary = df_roi_summary_speech # or df_roi_summary_speech

# === Find most activated ROI(s) per subject/session ===
def get_max_rois(group):
    max_percentage = group["Percentage_Active"].max()
    top_rois = group[group["Percentage_Active"] == max_percentage]
    return pd.Series({
        "Most_Activated_ROI": ", ".join(sorted(top_rois["ROI"].unique())),
        "Max_Percentage_Active": max_percentage
    })

df_max_roi_speech = (
    df_roi_summary
    .groupby(["Subject", "Session", "Comparison"])
    .apply(get_max_rois)
    .reset_index()
)

df_max_roi_speech


Unnamed: 0,Subject,Session,Comparison,Most_Activated_ROI,Max_Percentage_Active
0,1,1,Control vs Speech,Only_Noise_ROI,100.0
1,1,2,Control vs Speech,"Common_ROI, Right_Auditory",100.0
2,2,1,Control vs Speech,Only_Speech_ROI,20.0
3,2,2,Control vs Speech,"Common_ROI, Noise_ROI, Only_Noise_ROI, Right_A...",100.0
4,3,1,Control vs Speech,Front,40.0
5,3,2,Control vs Speech,"Common_ROI, Visual",75.0
6,4,1,Control vs Speech,Left_Auditory,83.333333
7,4,2,Control vs Speech,Left_Auditory,83.333333
8,5,1,Control vs Speech,Common_ROI,100.0
9,5,2,Control vs Speech,Only_Noise_ROI,50.0


In [148]:
min_optodes = 2  # Set your desired threshold

# Start collecting results
roi_occurrences = []

# Loop through each ROI
for roi in df_roi_summary_speech['ROI'].unique():  # Or df_max_roi_speech['ROI']
    filtered = df_roi_summary_speech[
        (df_roi_summary_speech['ROI'] == roi) &
        (df_roi_summary_speech['Num_Significant_Optodes'] >= min_optodes)
    ]
    
    sub_ses_list = filtered.apply(lambda row: f"{row['Subject']}-{row['Session']}", axis=1).tolist()
    
    roi_occurrences.append({
        "ROI": roi,
        "Num_Times_Appearing": len(sub_ses_list),
        "Subjects_Sessions": ", ".join(sub_ses_list)
    })

# Create final DataFrame
df_roi_appearance_summary_speech = pd.DataFrame(roi_occurrences).sort_values(by="Num_Times_Appearing", ascending=False).reset_index(drop=True)
df_roi_appearance_summary_speech



Unnamed: 0,ROI,Num_Times_Appearing,Subjects_Sessions
0,Noise_ROI,20,"01-01, 01-02, 02-02, 03-02, 04-01, 04-02, 05-0..."
1,Speech_ROI,20,"01-01, 01-02, 02-02, 03-02, 04-01, 04-02, 05-0..."
2,Right_Auditory,20,"01-01, 01-02, 02-02, 03-02, 04-01, 04-02, 05-0..."
3,Common_ROI,17,"01-01, 01-02, 02-02, 03-02, 04-01, 04-02, 05-0..."
4,Left_Auditory,16,"01-01, 02-02, 03-02, 04-01, 04-02, 07-02, 08-0..."
5,Visual,14,"01-01, 03-02, 04-01, 04-02, 05-02, 07-02, 10-0..."
6,Front,14,"02-02, 03-01, 04-02, 05-01, 07-02, 08-01, 11-0..."
7,Only_Speech_ROI,13,"01-01, 03-02, 04-02, 05-01, 07-02, 12-01, 12-0..."
8,Only_Noise_ROI,4,"01-01, 02-02, 21-02, 24-02"


In [146]:
# Choose your ROI summary dataframe
df = df_roi_summary_speech  # or df_roi_summary_speech

# Filter rows where no optodes were significantly active
no_activation_rows = df[df["Num_Significant_Optodes"] == 0].copy()

# Create a combined identifier for subject-session
no_activation_rows["Subject_Session"] = no_activation_rows["Subject"] + "-" + no_activation_rows["Session"]

# Group by ROI and aggregate
roi_no_activation_summary_speech = (
    no_activation_rows.groupby("ROI")["Subject_Session"]
    .agg([("Num_No_Activations", "count"), ("Subject_Sessions", lambda x: ", ".join(sorted(x)))])
    .reset_index()
)

# Show or export the result
roi_no_activation_summary_speech
# roi_no_activation_summary.to_csv("roi_no_activation_summary.csv", index=False)


Unnamed: 0,ROI,Num_No_Activations,Subject_Sessions
0,Common_ROI,10,"02-01, 05-02, 07-01, 08-02, 10-01, 10-02, 11-0..."
1,Front,7,"01-02, 02-01, 07-01, 08-02, 10-01, 10-02, 12-02"
2,Left_Auditory,6,"03-01, 05-02, 08-02, 16-01, 19-02, 24-01"
3,Noise_ROI,7,"02-01, 07-01, 10-01, 11-02, 16-01, 17-01, 19-02"
4,Only_Noise_ROI,12,"02-01, 03-01, 03-02, 07-01, 08-01, 10-01, 11-0..."
5,Only_Speech_ROI,9,"03-01, 05-02, 07-01, 08-01, 10-02, 11-02, 13-0..."
6,Right_Auditory,7,"02-01, 07-01, 10-02, 11-02, 16-01, 17-01, 19-02"
7,Speech_ROI,6,"05-02, 07-01, 10-02, 11-02, 17-01, 19-02"
8,Visual,8,"01-02, 02-02, 03-01, 07-01, 10-01, 11-02, 13-0..."


In [140]:
from collections import Counter

# Assume df_max_roi is already defined (from the previous step)

# Step 1: Flatten all ROI names into a list
all_rois = df_max_roi_speech["Most_Activated_ROI"].str.split(", ")
flattened_rois = [roi for sublist in all_rois for roi in sublist]

# Step 2: Count occurrences
roi_counts = Counter(flattened_rois)

# Step 3: Convert to DataFrame for sorting and display
df_roi_counts = pd.DataFrame(roi_counts.items(), columns=["ROI", "Count"])
df_roi_counts_speech = df_roi_counts.sort_values(by="Count", ascending=False).reset_index(drop=True)

df_roi_counts_speech


Unnamed: 0,ROI,Count
0,Only_Noise_ROI,9
1,Common_ROI,9
2,Front,8
3,Left_Auditory,7
4,Only_Speech_ROI,7
5,Right_Auditory,3
6,Noise_ROI,2
7,Visual,2
8,Speech_ROI,1


## lme_model epoch averaging accross ROI
Running a separate LME per optode treats each optode as independent, which ignores potential shared noise and the fact that brain regions (ROIs) act more like functional units

In [None]:
import os
import numpy as np
import pandas as pd
from statsmodels.formula.api import mixedlm
from mne_bids import BIDSPath

# === Define your ROIs ===
roi_definitions = {
    "Noise_ROI": ["S10_D11 hbo", "S5_D3 hbo", "S11_D11 hbo", "S10_D9 hbo", "S10_D12 hbo", "S10_D10 hbo"],
    "Speech_ROI": ["S10_D11 hbo", "S10_D10 hbo", "S10_D12 hbo", "S11_D12 hbo", "S1_D1 hbo", "S10_D9 hbo", "S8_D7 hbo", "S7_D7 hbo", "S8_D8 hbo"],
    "Common_ROI": ["S10_D11 hbo", "S10_D9 hbo", "S10_D12 hbo", "S10_D10 hbo"],
    "Only_Noise_ROI": ["S5_D3 hbo", "S11_D11 hbo"],
    "Only_Speech_ROI": ["S11_D12 hbo", "S1_D1 hbo", "S8_D7 hbo", "S7_D7 hbo", "S8_D8 hbo"],
    "Left_Auditory": ["S4_D2 hbo", "S4_D3 hbo", "S5_D2 hbo", "S5_D3 hbo", "S5_D4 hbo", "S5_D5 hbo"],
    "Right_Auditory": ["S10_D9 hbo", "S10_D10 hbo", "S10_D11 hbo", "S10_D12 hbo", "S11_D11 hbo", "S11_D12 hbo"],
    "Visual": ["S6_D6 hbo", "S6_D8 hbo", "S7_D6 hbo", "S7_D7 hbo", "S8_D7 hbo", "S8_D8 hbo", "S9_D8 hbo", "S7_D8 hbo"],
    "Front": ["S1_D1 hbo", "S2_D1 hbo", "S3_D1 hbo", "S3_D2 hbo", "S12_D1 hbo"]
}

output_root = "individual_analysis_outputs"
os.makedirs(output_root, exist_ok=True)

bids_root = r"C:\\Datasets\\Test-retest study\\bids_dataset"
subject_list = sorted([d for d in os.listdir(bids_root) if d.startswith("sub-")])
subject_list = [s.replace("sub-", "") for s in subject_list]
print("Detected subjects:", subject_list)

results_noise = []
results_speech = []

for sub in subject_list:
    for ses in range(1, 3):
        bids_path = BIDSPath(
            subject=sub,
            session=f"{ses:02d}",
            task="auditory",
            datatype="nirs",
            root=bids_root,
            suffix="nirs",
            extension=".snirf",
        )
        print(f"Processing Subject {sub}, Session {ses:02d}")
        raw_haemo, epochs, event_dict, raw_od, events = individual_analysis(bids_path)
        if raw_haemo is None or len(epochs) < 10:
            continue

        # Drop bad channels & clean epochs as before
        bad_ch = epochs.info['bads']
        epochs.drop_channels(bad_ch)
        epochs_cleaned = epochs.copy()

        tmin, tmax = 4, 6

        for roi_name, optode_list in roi_definitions.items():
            try:
                available_optodes = [opt for opt in optode_list if opt in epochs_cleaned.info["ch_names"]]
                #print(f"ROI: {roi_name}, Available Optodes: {available_optodes}")
                if len(available_optodes) < 2:
                    continue

                roi_epochs_data = []

                for cond_name in ["Control", "Noise", "Speech"]:
                    cond_epochs = epochs_cleaned[cond_name].copy().pick(available_optodes).pick("hbo").crop(tmin=tmin, tmax=tmax)
                    data = cond_epochs.get_data()

                    roi_mean_epochs = data.mean(axis=1).mean(axis=1)  # (n_epochs,)

                    for act in roi_mean_epochs:
                        roi_epochs_data.append({
                            "Activation": act,
                            "Condition": cond_name
                        })

                df_lme = pd.DataFrame(roi_epochs_data)
                df_lme["Condition"] = pd.Categorical(df_lme["Condition"], categories=["Control", "Noise", "Speech"])
                df_lme["EpochID"] = df_lme.index

                model = mixedlm("Activation ~ Condition", df_lme, groups=df_lme["EpochID"])
                result = model.fit()

                # Append results for each comparison separately
                results_noise.append({
                    "Subject": sub,
                    "Session": f"{ses:02d}",
                    "ROI": roi_name,
                    "Comparison": "Control vs Noise",
                    "Effect": result.params.get("Condition[T.Noise]", np.nan),
                    "p_value": result.pvalues.get("Condition[T.Noise]", np.nan),
                })
                results_speech.append({
                    "Subject": sub,
                    "Session": f"{ses:02d}",
                    "ROI": roi_name,
                    "Comparison": "Control vs Speech",
                    "Effect": result.params.get("Condition[T.Speech]", np.nan),
                    "p_value": result.pvalues.get("Condition[T.Speech]", np.nan),
                })

            except Exception as e:
                print(f"Error in LME for Sub {sub} Ses {ses:02d} ROI {roi_name}: {e}")
                continue

# Save the two separate DataFrames
df_results_roi_noise = pd.DataFrame(results_noise)
df_results_roi_speech = pd.DataFrame(results_speech)


Detected subjects: ['01', '02', '03', '04', '05', '07', '08', '10', '11', '12', '13', '16', '17', '19', '21', '24']
Processing Subject 01, Session 01
Removing break annotations for the cropped raw...
ROI: Noise_ROI, Available Optodes: ['S10_D11 hbo', 'S5_D3 hbo', 'S11_D11 hbo', 'S10_D9 hbo', 'S10_D12 hbo', 'S10_D10 hbo']
ROI: Speech_ROI, Available Optodes: ['S10_D11 hbo', 'S10_D10 hbo', 'S10_D12 hbo', 'S11_D12 hbo', 'S1_D1 hbo', 'S10_D9 hbo', 'S8_D7 hbo', 'S7_D7 hbo', 'S8_D8 hbo']
ROI: Common_ROI, Available Optodes: ['S10_D11 hbo', 'S10_D9 hbo', 'S10_D12 hbo', 'S10_D10 hbo']
ROI: Only_Noise_ROI, Available Optodes: ['S5_D3 hbo', 'S11_D11 hbo']
ROI: Only_Speech_ROI, Available Optodes: ['S11_D12 hbo', 'S1_D1 hbo', 'S8_D7 hbo', 'S7_D7 hbo', 'S8_D8 hbo']
ROI: Left_Auditory, Available Optodes: ['S4_D2 hbo', 'S4_D3 hbo', 'S5_D2 hbo', 'S5_D3 hbo', 'S5_D4 hbo', 'S5_D5 hbo']
ROI: Right_Auditory, Available Optodes: ['S10_D9 hbo', 'S10_D10 hbo', 'S10_D11 hbo', 'S10_D12 hbo', 'S11_D11 hbo', 'S11_D1

In [161]:
# Set significance threshold
alpha = 0.05
df_results_roi_noise["Significant"] = df_results_roi_noise["p_value"] < alpha
df_results_roi_speech["Significant"] = df_results_roi_speech["p_value"] < alpha




In [164]:
def summarize_significance(df_results_roi, alpha=0.05):
    df = df_results_roi.copy()
    df["Significant"] = df["p_value"] < alpha

    summary = (
        df.groupby("ROI")["Significant"]
        .agg(
            Num_Significant="sum",
            Num_Not_Significant=lambda x: (~x).sum(),
            Total="count"
        )
        .reset_index()
    )
    summary["Percentage_Significant"] = (summary["Num_Significant"] / summary["Total"]) * 100
    return summary

# Apply to both
summary_roi_noise = summarize_significance(df_results_roi_noise)
summary_roi_speech = summarize_significance(df_results_roi_speech)

print("=== Control vs Noise ===")
print(summary_roi_noise)

print("\n=== Control vs Speech ===")
print(summary_roi_speech)


=== Control vs Noise ===
               ROI  Num_Significant  Num_Not_Significant  Total  \
0       Common_ROI                9                   23     32   
1            Front                5                   27     32   
2    Left_Auditory               10                   22     32   
3        Noise_ROI               10                   22     32   
4   Only_Noise_ROI               10                   19     29   
5  Only_Speech_ROI                7                   25     32   
6   Right_Auditory                8                   24     32   
7       Speech_ROI                9                   23     32   
8           Visual                8                   24     32   

   Percentage_Significant  
0               28.125000  
1               15.625000  
2               31.250000  
3               31.250000  
4               34.482759  
5               21.875000  
6               25.000000  
7               28.125000  
8               25.000000  

=== Control vs Speech =

In [168]:
summary_roi_speech

Unnamed: 0,ROI,Num_Significant,Num_Not_Significant,Total,Percentage_Significant
0,Common_ROI,16,16,32,50.0
1,Front,7,25,32,21.875
2,Left_Auditory,8,24,32,25.0
3,Noise_ROI,18,14,32,56.25
4,Only_Noise_ROI,16,13,29,55.172414
5,Only_Speech_ROI,15,17,32,46.875
6,Right_Auditory,16,16,32,50.0
7,Speech_ROI,17,15,32,53.125
8,Visual,9,23,32,28.125


In [166]:
def count_subject_sessions_with_significant_roi(df_results_roi, alpha=0.05):
    df = df_results_roi.copy()
    df["Significant"] = df["p_value"] < alpha

    # Total subject-session pairs in the DataFrame
    total_pairs = df[["Subject", "Session"]].drop_duplicates()

    # Filter for significant ROI entries
    df_sig = df[df["Significant"]]

    # Subject-session pairs with at least one significant ROI
    sig_pairs = df_sig[["Subject", "Session"]].drop_duplicates()

    # Count
    return {
        "num_significant": len(sig_pairs),
        "num_total": len(total_pairs),
        "percentage": 100 * len(sig_pairs) / len(total_pairs) if len(total_pairs) > 0 else 0,
        "significant_pairs": sig_pairs
    }

# For Noise
results_noise = count_subject_sessions_with_significant_roi(df_results_roi_noise)
print(f"üîä Control vs Noise: {results_noise['num_significant']} out of {results_noise['num_total']} subject-session pairs ({results_noise['percentage']:.1f}%) had at least one significant ROI")

# For Speech
results_speech = count_subject_sessions_with_significant_roi(df_results_roi_speech)
print(f"üó£Ô∏è Control vs Speech: {results_speech['num_significant']} out of {results_speech['num_total']} subject-session pairs ({results_speech['percentage']:.1f}%) had at least one significant ROI")

# Optional: access the IDs
# print(results_noise["significant_pairs"])
# print(results_speech["significant_pairs"])


üîä Control vs Noise: 21 out of 32 subject-session pairs (65.6%) had at least one significant ROI
üó£Ô∏è Control vs Speech: 25 out of 32 subject-session pairs (78.1%) had at least one significant ROI
