In [1]:
# Fixed Effects Parcellation
# Parcellates fixed effects maps with the smorgasbord atlas (includes both cortical and subcortical) 
# and saves the df in smor_parcel_dfs_fixed

# 1. Load each subject's fixed effects map individually
# 2. Parcellate each subject's map (average t-stats within parcels per subject)
# 3. Average the parcel values across subjects

In [2]:
import os
import numpy as np
import pandas as pd
import nibabel as nib
import nilearn
import json
import datetime
import pickle
import seaborn as sns
import gc
import psutil
import math
import scipy.stats as stats
from matplotlib.patches import Patch
from nilearn import plotting
from nilearn.glm.first_level import FirstLevelModel
from nilearn.glm.second_level import SecondLevelModel
from nilearn.glm import threshold_stats_img
from nilearn.image import concat_imgs, mean_img, index_img
from nilearn.reporting import make_glm_report
from nilearn import masking, image
from nilearn import datasets
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
from collections import defaultdict
from nilearn.maskers import NiftiLabelsMasker
from sklearn.utils import Bunch

In [3]:
# general helper functions:
def build_contrast_map_path(base_dir, level, subject, session, task, contrast_name):
    """Build the file path for a contrast map."""
    filename = f'{subject}_{session}_task-{task}_contrast-{contrast_name}_rtmodel-rt_centered_stat-effect-size.nii.gz'
    
    # NOTE: as of 7/6/25 for sub 10 in flanker the format is different: sub-s10_ses-01_run-1_task-flanker_contrast-incongruent-congruent_rtmodel-rt_centered_stat-effect-size.nii.gz
    if (subject == 'sub-s10' and task == 'flanker'):
        filename = f'{subject}_{session}_run-1_task-{task}_contrast-{contrast_name}_rtmodel-rt_centered_stat-effect-size.nii.gz'
    # NOTE: as of 10/1/25 for sub 3 in all tasks the format is different: (also has run-1)
    if (subject == 'sub-s03'):
        filename = f'{subject}_{session}_run-1_task-{task}_contrast-{contrast_name}_rtmodel-rt_centered_stat-effect-size.nii.gz'
        
    return os.path.join(base_dir, level, subject, task, 'indiv_contrasts', filename)

def is_valid_contrast_map(img_path):
    """Check if a contrast map has sufficient variance and no NaN values."""
    try:
        img = nib.load(img_path)
        data = img.get_fdata()
        return np.std(data) > 1e-10 and not np.isnan(data).any()
    except Exception as e:
        print(f"Error validating {img_path}: {e}")
        return False
        
def clean_z_map_data(z_map, task, contrast_name, encounter):
    """Clean z-map data by handling NaN and infinity values."""
    data = z_map.get_fdata()
    if np.isnan(data).any() or np.isinf(data).any():
        data = np.nan_to_num(data)
        z_map = nib.Nifti1Image(data, z_map.affine, z_map.header)
        print(f"Warning: Fixed NaN/Inf values in {task}:{contrast_name}:encounter-{encounter+1}")
    return z_map

def cleanup_memory():
    """
    Clean up memory between batches
    """
    # Force garbage collection
    gc.collect()
    
    # Get memory info
    memory = psutil.virtual_memory()
    print(f"Memory after cleanup: {memory.percent:.1f}% used ({memory.available/(1024**3):.1f}GB available)")
    
def convert_to_regular_dict(d):
    if isinstance(d, defaultdict):
        return {k: convert_to_regular_dict(v) for k, v in d.items()}
    elif isinstance(d, list):
        return [convert_to_regular_dict(i) for i in d]
    else:
        return d

In [5]:
# all tasks and contrasts
TASKS = ["nBack","flanker","directedForgetting","goNogo", "shapeMatching", "stopSignal", "cuedTS", "spatialTS"]
CONTRASTS = {}
CONTRASTS["nBack"] = ["twoBack-oneBack", "match-mismatch","task-baseline","response_time"] # the nback contrasts
CONTRASTS["flanker"] = ["incongruent-congruent", "task-baseline"]
CONTRASTS["directedForgetting"] = ["neg-con", "task-baseline","response_time"]
CONTRASTS["goNogo"] = ["nogo_success-go", "nogo_success","task-baseline","response_time"] # go_rtModel check
CONTRASTS["shapeMatching"] = ["DDD", "DDS", "DNN", "DSD", "main_vars", "SDD", "SNN", "SSS", "task-baseline","response_time"]
CONTRASTS["stopSignal"] = ["go", "stop_failure-go", "stop_failure", "stop_failure-stop_success", "stop_success-go", "stop_success", "stop_success-stop_failure", "task-baseline","response_time"]
CONTRASTS["cuedTS"] = ["cue_switch_cost", "task_switch_cost", "task_switch_cue_switch-task_stay_cue_stay", "task-baseline","response_time"]
CONTRASTS["spatialTS"] = ["cue_switch_cost", "task_switch_cost", "task_switch_cue_switch-task_stay_cue_stay", "task-baseline","response_time"]

# interested in looking at them all now:
requested_task_contrasts = defaultdict(lambda: defaultdict(list))
requested_task_contrasts['nBack'] = CONTRASTS["nBack"]
requested_task_contrasts['flanker'] = CONTRASTS["flanker"]
requested_task_contrasts['directedForgetting'] = CONTRASTS["directedForgetting"]
requested_task_contrasts['goNogo'] = CONTRASTS["goNogo"]
requested_task_contrasts['shapeMatching'] = CONTRASTS["shapeMatching"]
requested_task_contrasts['stopSignal'] = CONTRASTS["stopSignal"]
requested_task_contrasts['cuedTS'] = CONTRASTS["cuedTS"]
requested_task_contrasts['spatialTS'] = CONTRASTS["spatialTS"] 

# compiled_req_contrasts = ["twoBack-oneBack", 'task-baseline', "incongruent-congruent", "neg-con", "nogo_success-go", "main_vars", "stop_failure-go","task_switch_cost"]

encounters = ['01', '02','03','04','05']

# compile all requested contrasts into one list
compiled_req_contrasts = []
for task in TASKS:
    for contrast in requested_task_contrasts[task]:
        if (contrast not in compiled_req_contrasts):
            compiled_req_contrasts.append(contrast)

In [6]:
# smorgasbord stuff
SMORG_PARCELLATED_DIR = 'smor_parcel_dfs'
smor_files = {'mean':f'discovery_parcel_indiv_mean_updated'}
smor_date_updated = '1027'
indices = [1,2,3]

# get smorgasbord atlas
with open(f'{SMORG_PARCELLATED_DIR}/smorgasbord_atlas_files/smorgasbord_atlas.pkl', 'rb') as f:
    smorgasbord_atlas = pickle.load(f)
SMORG_IMG = smorgasbord_atlas.maps
SMORG_DATA = SMORG_IMG.get_fdata()

# Load fixed effect maps

In [7]:
# load the fixed effects maps
# LOADING ALL FIXED EFFECTS:
# Organize by subject, then parcellate each subject individually
# Then average parcel values across subjects (not voxel-wise averaging)
BASE_DIR = '/oak/stanford/groups/russpold/data/network_grant/discovery_BIDS_20250402/derivatives/'
LEVEL = 'output_lev1_mni'
# subjects in the discovery sample
SUBJECTS = ['sub-s03', 'sub-s10', 'sub-s19', 'sub-s29', 'sub-s43']
SESSIONS = ['ses-01', 'ses-02', 'ses-03', 'ses-04', 'ses-05', 'ses-06', 'ses-07', 'ses-08', 'ses-09','ses-10']

# Organize fixed effects maps by subject (not averaged)
fe_all_contrast_maps = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))

for task in TASKS:
    for contrast_name in CONTRASTS[task]:
        for subject in SUBJECTS:
            filename = f'{subject}_task-{task}_contrast-{contrast_name}_rtmodel-rt_centered_stat-fixed-effects.nii.gz'
            contrast_map_path = os.path.join(BASE_DIR, LEVEL, subject, task,'fixed_effects',filename)

            if os.path.exists(contrast_map_path):
                try:
                    # Load the map for this subject
                    fe_all_contrast_maps[task][contrast_name][subject] = nib.load(contrast_map_path)
                    print(f"Loaded fixed effects map for {subject}/{task}/{contrast_name}")
                except Exception as e:
                    print(f"Error loading {contrast_map_path}: {e}")
            else:
                print(f"{contrast_map_path} does not exist.")

print(f"\nFixed effects maps organized by subject")
print(f"Structure: task -> contrast -> subject -> NiftiImage")

Loaded fixed effects map for sub-s03/nBack/twoBack-oneBack
Loaded fixed effects map for sub-s10/nBack/twoBack-oneBack
Loaded fixed effects map for sub-s19/nBack/twoBack-oneBack
Loaded fixed effects map for sub-s29/nBack/twoBack-oneBack
Loaded fixed effects map for sub-s43/nBack/twoBack-oneBack
Loaded fixed effects map for sub-s03/nBack/match-mismatch
Loaded fixed effects map for sub-s10/nBack/match-mismatch
Loaded fixed effects map for sub-s19/nBack/match-mismatch
Loaded fixed effects map for sub-s29/nBack/match-mismatch
Loaded fixed effects map for sub-s43/nBack/match-mismatch
Loaded fixed effects map for sub-s03/nBack/task-baseline
Loaded fixed effects map for sub-s10/nBack/task-baseline
Loaded fixed effects map for sub-s19/nBack/task-baseline
Loaded fixed effects map for sub-s29/nBack/task-baseline
Loaded fixed effects map for sub-s43/nBack/task-baseline
Loaded fixed effects map for sub-s03/nBack/response_time
Loaded fixed effects map for sub-s10/nBack/response_time
Loaded fixed eff

# general loading and plotting functions that can apply across all tasks

In [8]:
def standardize_mask(mask_img, dtype=bool):
    """Ensure mask has consistent data type and format"""
    mask_data = mask_img.get_fdata()
    # Convert to binary and specified dtype
    binary_data = (mask_data > 0).astype(dtype)
    return image.new_img_like(mask_img, binary_data)

# Parcellate across all task/contrasts/subjects

In [9]:
# Check structure of loaded fixed effects maps
print("Sample structure check:")
for task in list(TASKS)[:2]:  # Just check first 2 tasks
    for contrast in list(CONTRASTS[task])[:2]:  # Just first 2 contrasts
        if task in fe_all_contrast_maps and contrast in fe_all_contrast_maps[task]:
            subjects_loaded = list(fe_all_contrast_maps[task][contrast].keys())
            print(f"{task}/{contrast}: {len(subjects_loaded)} subjects - {subjects_loaded}")

Sample structure check:
nBack/twoBack-oneBack: 5 subjects - ['sub-s03', 'sub-s10', 'sub-s19', 'sub-s29', 'sub-s43']
nBack/match-mismatch: 5 subjects - ['sub-s03', 'sub-s10', 'sub-s19', 'sub-s29', 'sub-s43']
flanker/incongruent-congruent: 5 subjects - ['sub-s03', 'sub-s10', 'sub-s19', 'sub-s29', 'sub-s43']
flanker/task-baseline: 5 subjects - ['sub-s03', 'sub-s10', 'sub-s19', 'sub-s29', 'sub-s43']


In [10]:
req_atlas = "smor"

# Select atlas configuration
if req_atlas == "schafer":
    main_dir = SCHAFER_PARCELLATED_DIR
    main_files = schafer_files
    date_updated = schafer_date_updated
    atlas_obj = schaefer_atlas
elif req_atlas == "smor":
    main_dir = SMORG_PARCELLATED_DIR
    main_files = smor_files
    date_updated = smor_date_updated
    atlas_obj = smorgasbord_atlas
else:
    raise ValueError(f"Unknown atlas: {req_atlas}. Use 'schafer' or 'smor'")

In [11]:
# Check the atlas data
atlas_data = smorgasbord_atlas.maps.get_fdata()
print(f"Atlas data shape: {atlas_data.shape}")
print(f"Unique ROI values: {len(np.unique(atlas_data))}")
print(f"ROI range: {atlas_data.min()} to {atlas_data.max()}")

# Check if number of labels matches number of ROIs
unique_rois = np.unique(atlas_data)
print(f"\nNumber of unique ROIs (excluding 0/background): {len(unique_rois[unique_rois > 0])}")
print(f"Number of labels: {len(smorgasbord_atlas.labels)}")

# Inspect first few labels
print(f"\nFirst 5 labels:")
for i in range(5):
    print(f"  {i}: {smorgasbord_atlas.labels[i]}")

Atlas data shape: (193, 229, 193)
Unique ROI values: 430
ROI range: 0.0 to 606.0

Number of unique ROIs (excluding 0/background): 429
Number of labels: 429

First 5 labels:
  0: 7Networks_LH_Vis_1
  1: 7Networks_LH_Vis_2
  2: 7Networks_LH_Vis_3
  3: 7Networks_LH_Vis_4
  4: 7Networks_LH_Vis_5


In [12]:
# Get actual ROI values
atlas_data = smorgasbord_atlas.maps.get_fdata()
roi_values = np.unique(atlas_data)
roi_values = roi_values[roi_values > 0]  # Remove background (0)

print(f"ROI values range: {roi_values.min()} to {roi_values.max()}")
print(f"First 10 ROI values: {roi_values[:10]}")
print(f"Last 10 ROI values: {roi_values[-10:]}")

# Create a mapping from ROI value to label
roi_to_label = dict(zip(roi_values, smorgasbord_atlas.labels))

# Example: What label corresponds to ROI value 1?
print(f"\nROI value 1 -> {roi_to_label.get(1, 'Not found')}")
print(f"ROI value 606 -> {roi_to_label.get(606, 'Not found')}")

ROI values range: 1.0 to 606.0
First 10 ROI values: [ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]
Last 10 ROI values: [517. 518. 519. 521. 601. 602. 603. 604. 605. 606.]

ROI value 1 -> 7Networks_LH_Vis_1
ROI value 606 -> amygdala_basolateral_right


In [14]:
# Add the ROI-to-label mapping to atlas
smorgasbord_atlas.roi_values = roi_values
smorgasbord_atlas.roi_to_label = roi_to_label

In [15]:
# Parcellate each subject's fixed effects map individually,
# then average parcel values across subjects (not voxel-wise averaging)
def save_subset_of_parcels(run_num, atlas='schaefer'):
    """
    Extract parcel-wise activation values from fixed effects maps.
    
    1. Parcellate each subject's fixed effects map individually (average within parcels)
    2. Average the parcel values across subjects
    
    Parameters:
    -----------
    run_num : str/int
        Run identifier for output filename
    atlas : str or Bunch
        Either 'schaefer' or a custom atlas Bunch object (e.g., smorgasbord_atlas)
    """
    parcel_dict = {}
    
    # Select atlas
    if atlas == 'schaefer':
        current_atlas = schaefer_atlas
        atlas_name = 'schaefer400'
    else:
        current_atlas = atlas
        atlas_name = 'smor_parcel'

    # Create masker once (reuse for all subjects)
    masker = NiftiLabelsMasker(
        labels_img=current_atlas.maps,
        standardize=False, 
        memory='nilearn_cache',
        strategy='mean'  # Average activation within each region
    )
    
    # Handle labels (decode if bytes) - do this once
    region_labels = [
        label.decode('utf-8') if isinstance(label, bytes) else label 
        for label in current_atlas.labels
    ]
        
    for curr_task in requested_task_contrasts:
        parcel_dict[curr_task] = {}

        for curr_contrast in requested_task_contrasts[curr_task]:
            print(f"Processing: {curr_task} - {curr_contrast} - Fixed effects")

            try:
                # Step 1: Parcellate each subject's fixed effects map individually
                subject_parcel_values = {}  # subject -> array of parcel values
                
                for subject in SUBJECTS:
                    if subject not in fe_all_contrast_maps[curr_task][curr_contrast]:
                        print(f"  Warning: No fixed effects map for {subject}")
                        continue
                    
                    try:
                        fmri_img = fe_all_contrast_maps[curr_task][curr_contrast][subject]
                        print(f"  Parcellating {subject} | Shape: {fmri_img.shape}")
                        
                        # Extract regional values for this subject
                        regional_values = masker.fit_transform(fmri_img)
                        subject_parcel_values[subject] = regional_values.flatten()
                        
                    except Exception as e:
                        print(f"  Error parcellating {subject}: {str(e)}")
                        continue
                
                if len(subject_parcel_values) == 0:
                    print(f"  No valid subjects for {curr_task}/{curr_contrast}")
                    parcel_dict[curr_task][curr_contrast] = None
                    continue
                else:
                    # save these individual parcellated fixed effect maps:
                    output_dir = 'smor_parcel_dfs_fixed'
                    os.makedirs(output_dir, exist_ok=True)
                    
                    output_file = f'{output_dir}/discovery_parcel_fixedeffects_indiv_updated_1116_{run_num}.pkl'
                    with open(output_file, 'wb') as f:
                        pickle.dump(subject_parcel_values, f)
                    
                    print(f"\nSaved to: {output_file}")
                
                                
                # Step 2: Average parcel values across subjects
                # Stack all subject parcel values into a matrix (subjects x parcels)
                parcel_matrix = np.array([subject_parcel_values[subj] for subj in subject_parcel_values.keys()])
                
                # Average across subjects (axis=0)
                avg_parcel_values = np.mean(parcel_matrix, axis=0)
                std_parcel_values = np.std(parcel_matrix, axis=0)
                n_subjects = parcel_matrix.shape[0]
                
                # Create activation dataframe with averaged values
                activation_df = pd.DataFrame({
                    'region': region_labels,
                    'activation': avg_parcel_values,
                    'activation_std': std_parcel_values,
                    'n_subjects': n_subjects
                })
                
                # Add network information (handle both Schaefer and other atlases)
                activation_df['network'] = activation_df['region'].apply(
                    lambda x: x.split('_')[1] if 'Networks' in x else 'Subcortical'
                )
                
                # Add ROI values if available (for smorgasbord atlas)
                if hasattr(current_atlas, 'roi_values'):
                    activation_df['roi_value'] = current_atlas.roi_values
                
                # Save the activation df
                parcel_dict[curr_task][curr_contrast] = activation_df
                print(f"  Extracted {len(activation_df)} regions from {n_subjects} subjects")
                    
            except KeyError as e:
                print(f"Warning: Data not found - Missing key: {e}")
                parcel_dict[curr_task][curr_contrast] = None
                continue
            except Exception as e:
                print(f"Error processing data: {str(e)}")
                import traceback
                traceback.print_exc()
                parcel_dict[curr_task][curr_contrast] = None
                continue

    # Save to pickle
    output_dir = 'smor_parcel_dfs_fixed'
    os.makedirs(output_dir, exist_ok=True)
    
    output_file = f'{output_dir}/discovery_parcel_fixedeffects_mean_updated_1116_{run_num}.pkl'
    with open(output_file, 'wb') as f:
        pickle.dump(parcel_dict, f)
    
    print(f"\nSaved to: {output_file}")
    
    # Delete from cache
    del parcel_dict
    cleanup_memory()

# With Smorgasbord atlas
# save_subset_of_parcels(subject_list, run_num=1, atlas=smorgasbord_atlas)

In [16]:
save_subset_of_parcels(2, atlas=smorgasbord_atlas)

Processing: nBack - twoBack-oneBack - Fixed effects
  Parcellating sub-s03 | Shape: (97, 115, 97)
  Parcellating sub-s10 | Shape: (97, 115, 97)
  Parcellating sub-s19 | Shape: (97, 115, 97)
  Parcellating sub-s29 | Shape: (97, 115, 97)
  Parcellating sub-s43 | Shape: (97, 115, 97)

Saved to: smor_parcel_dfs_fixed/discovery_parcel_fixedeffects_indiv_updated_1116_2.pkl
  Extracted 429 regions from 5 subjects
Processing: nBack - match-mismatch - Fixed effects
  Parcellating sub-s03 | Shape: (97, 115, 97)
  Parcellating sub-s10 | Shape: (97, 115, 97)
  Parcellating sub-s19 | Shape: (97, 115, 97)
  Parcellating sub-s29 | Shape: (97, 115, 97)
  Parcellating sub-s43 | Shape: (97, 115, 97)

Saved to: smor_parcel_dfs_fixed/discovery_parcel_fixedeffects_indiv_updated_1116_2.pkl
  Extracted 429 regions from 5 subjects
Processing: nBack - task-baseline - Fixed effects
  Parcellating sub-s03 | Shape: (97, 115, 97)
  Parcellating sub-s10 | Shape: (97, 115, 97)
  Parcellating sub-s19 | Shape: (97, 11