In [1]:
# Fixed Effects Parcellation
# Parcellates fixed effects maps with the smorgasbord atlas (includes both cortical and subcortical) 
# and saves the df in smor_parcel_dfs_fixed

# 1. Load each subject's fixed effects map individually (and save this parcellated map)
# 2. Parcellate each subject's map (average t-stats within parcels per subject) and saves this
# 3. Average the parcel values across subjects and saves this

In [2]:
import os
import numpy as np
import pandas as pd
import nibabel as nib
import nilearn
import json
import datetime
import pickle
import seaborn as sns
import gc
import psutil
import math
import scipy.stats as stats
from matplotlib.patches import Patch
from nilearn import plotting
from nilearn.glm.first_level import FirstLevelModel
from nilearn.glm.second_level import SecondLevelModel
from nilearn.glm import threshold_stats_img
from nilearn.image import concat_imgs, mean_img, index_img
from nilearn.reporting import make_glm_report
from nilearn import masking, image
from nilearn import datasets
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
from collections import defaultdict
from nilearn.maskers import NiftiLabelsMasker
from sklearn.utils import Bunch

# Import shared utilities and configuration
# need to do it this way because in a sub-directory (later turn config and utils into part of a package)
from utils import (
    TASKS, CONTRASTS, SUBJECTS, SESSIONS, ENCOUNTERS,
    build_first_level_contrast_map_path, is_valid_contrast_map, clean_z_map_data,
    convert_to_regular_dict, create_smor_atlas,load_smor_atlas, load_schaefer_atlas, cleanup_memory, standardize_mask
)
from config import BASE_DIR, INPUT_LEVEL, OUTPUT_DIRS

In [3]:
encounters = ['01', '02','03','04','05']

# compile all requested contrasts into one list
compiled_req_contrasts = []
for task in TASKS:
    for contrast in CONTRASTS[task]:
        if (contrast not in compiled_req_contrasts):
            compiled_req_contrasts.append(contrast)

In [4]:
# get smorgasbord atlas
# GET ATLASES:
smor_atlas_path = 'processed_data_dfs/smor_parcel_dfs/smorgasbord_atlas_files/smorgasbord_atlas.pkl'
if not os.path.exists(smor_atlas_path):
    print("Smorgasbord atlas not found. Creating new atlas...")
    create_smor_atlas()
else:
    print("atlas already exists")
    smorgasbord_atlas = load_smor_atlas()
SMORG_IMG = smorgasbord_atlas.maps
SMORG_DATA = SMORG_IMG.get_fdata()

atlas already exists
Loading Smorgasbord atlas...
Atlas loaded with 429 regions
Atlas shape: (193, 229, 193)


# Load fixed effect maps

In [5]:
# load the fixed effects maps
# LOADING ALL FIXED EFFECTS:
# Organize by subject, then parcellate each subject individually
# Then average parcel values across subjects (not voxel-wise averaging)

# Organize fixed effects maps by subject (not averaged)
fe_all_contrast_maps = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))

for task in TASKS:
    for contrast_name in CONTRASTS[task]:
        for subject in SUBJECTS:
            filename = f'{subject}_task-{task}_contrast-{contrast_name}_rtmodel-rt_centered_stat-fixed-effects.nii.gz'
            contrast_map_path = os.path.join(BASE_DIR, INPUT_LEVEL, subject, task,'fixed_effects',filename)

            if os.path.exists(contrast_map_path):
                try:
                    # Load the map for this subject
                    fe_all_contrast_maps[task][contrast_name][subject] = nib.load(contrast_map_path)
                    print(f"Loaded fixed effects map for {subject}/{task}/{contrast_name}")
                except Exception as e:
                    print(f"Error loading {contrast_map_path}: {e}")
            else:
                print(f"{contrast_map_path} does not exist.")

print(f"\nFixed effects maps organized by subject")
print(f"Structure: task -> contrast -> subject -> NiftiImage")

Loaded fixed effects map for sub-s03/nBack/twoBack-oneBack
Loaded fixed effects map for sub-s10/nBack/twoBack-oneBack
Loaded fixed effects map for sub-s19/nBack/twoBack-oneBack
Loaded fixed effects map for sub-s29/nBack/twoBack-oneBack
Loaded fixed effects map for sub-s43/nBack/twoBack-oneBack
Loaded fixed effects map for sub-s03/nBack/match-mismatch
Loaded fixed effects map for sub-s10/nBack/match-mismatch
Loaded fixed effects map for sub-s19/nBack/match-mismatch
Loaded fixed effects map for sub-s29/nBack/match-mismatch
Loaded fixed effects map for sub-s43/nBack/match-mismatch
Loaded fixed effects map for sub-s03/nBack/task-baseline
Loaded fixed effects map for sub-s10/nBack/task-baseline
Loaded fixed effects map for sub-s19/nBack/task-baseline
Loaded fixed effects map for sub-s29/nBack/task-baseline
Loaded fixed effects map for sub-s43/nBack/task-baseline
Loaded fixed effects map for sub-s03/nBack/response_time
Loaded fixed effects map for sub-s10/nBack/response_time
Loaded fixed eff

# Parcellate across all task/contrasts/subjects

In [6]:
# Check structure of loaded fixed effects maps
print("Sample structure check:")
for task in list(TASKS)[:2]:  # Just check first 2 tasks
    for contrast in list(CONTRASTS[task])[:2]:  # Just first 2 contrasts
        if task in fe_all_contrast_maps and contrast in fe_all_contrast_maps[task]:
            subjects_loaded = list(fe_all_contrast_maps[task][contrast].keys())
            print(f"{task}/{contrast}: {len(subjects_loaded)} subjects - {subjects_loaded}")

Sample structure check:
nBack/twoBack-oneBack: 5 subjects - ['sub-s03', 'sub-s10', 'sub-s19', 'sub-s29', 'sub-s43']
nBack/match-mismatch: 5 subjects - ['sub-s03', 'sub-s10', 'sub-s19', 'sub-s29', 'sub-s43']
flanker/incongruent-congruent: 5 subjects - ['sub-s03', 'sub-s10', 'sub-s19', 'sub-s29', 'sub-s43']
flanker/task-baseline: 5 subjects - ['sub-s03', 'sub-s10', 'sub-s19', 'sub-s29', 'sub-s43']


In [7]:
encounters = ['01', '02','03','04','05']

def save_fixed_subset_of_parcels(run_num, date, atlas='schaefer'):
    """
    Extract parcel-wise activation values from fixed effects maps.
    
    1. Parcellate each subject's fixed effects map individually (average within parcels)
    2. Average the parcel values across subjects
    
    Parameters:
    -----------
    subs_requested : list
        Subject IDs to process
    run_num : str/int
        Run identifier for output filename
    atlas : str
        Either 'schaefer' or 'smor'
    data: str
        Added as processed date
    """
    parcel_dict = {}
    
    # Select atlas
    if atlas == 'schaefer':
        current_atlas = load_schaefer_atlas()
        atlas_name = 'schaefer400'
        print(f'doing {atlas_name}')
    else:
        current_atlas = load_smor_atlas()
        atlas_name = 'smor_parcel'
        print(f'doing {atlas_name}')

    for curr_subj in SUBJECTS:
        # STEP 1: parcellate each subjects task/contrast fixed effect map
        parcel_dict[curr_subj] = {}
        
        for curr_task in CONTRASTS:
            parcel_dict[curr_subj][curr_task] = {}
    
            for curr_contrast in CONTRASTS[curr_task]:
                parcel_dict[curr_subj][curr_task][curr_contrast] = {}
    
                print(f"Processing: {curr_subj} - {curr_task} - {curr_contrast}")
    
                try:
                    fmri_img = fe_all_contrast_maps[curr_task][curr_contrast][curr_subj]
                    print(f"fMRI data loaded | Shape: {fmri_img.shape}")
    
                    # Create the masker and get regional avg activation
                    masker = NiftiLabelsMasker(
                        labels_img=current_atlas.maps,
                        standardize=False, 
                        memory='nilearn_cache',
                        strategy='mean'  # Average activation within each region
                    )
                    
                    # Extract regional values
                    regional_values = masker.fit_transform(fmri_img)
                    
                    # Handle labels (decode if bytes)
                    region_labels = [
                        label.decode('utf-8') if isinstance(label, bytes) else label 
                        for label in current_atlas.labels
                    ]
                    
                    # Create activation dataframe
                    activation_df = pd.DataFrame({
                        'region': region_labels,
                        'activation': regional_values.flatten()
                    })
                    
                    # Add network information (handle both Schaefer and other atlases)
                    activation_df['network'] = activation_df['region'].apply(
                        lambda x: x.split('_')[1] if 'Networks' in x else 'Subcortical'
                    )
                    
                    # Add ROI values if available (for smorgasbord atlas)
                    if hasattr(current_atlas, 'roi_values'):
                        activation_df['roi_value'] = current_atlas.roi_values
                    
                    # Save the activation df
                    parcel_dict[curr_subj][curr_task][curr_contrast] = activation_df
                    print(f"Extracted {len(activation_df)} regions")
                    
                except KeyError as e:
                    print(f"Warning: Data not found - Missing key: {e}")
                    parcel_dict[curr_subj][curr_task][curr_contrast] = None
                    continue
                except Exception as e:
                    print(f"Error processing data: {str(e)}")
                    parcel_dict[curr_subj][curr_task][curr_contrast] = None
                    continue

    # Save to pickle
    output_dir = f'processed_data_dfs/{atlas_name}_dfs_fixed'
    os.makedirs(output_dir, exist_ok=True)
    
    output_file = f'{output_dir}/discovery_parcel_indiv_mean_updated_{date}_{run_num}.pkl'
    with open(output_file, 'wb') as f:
        pickle.dump(parcel_dict, f)
    
    print(f"\nSaved to: {output_file}")

In [8]:
save_fixed_subset_of_parcels(1,'1117', atlas="smor")

Loading Smorgasbord atlas...
Atlas loaded with 429 regions
Atlas shape: (193, 229, 193)
doing smor_parcel
Processing: sub-s03 - nBack - twoBack-oneBack
fMRI data loaded | Shape: (97, 115, 97)
Extracted 429 regions
Processing: sub-s03 - nBack - match-mismatch
fMRI data loaded | Shape: (97, 115, 97)
Extracted 429 regions
Processing: sub-s03 - nBack - task-baseline
fMRI data loaded | Shape: (97, 115, 97)
Extracted 429 regions
Processing: sub-s03 - nBack - response_time
fMRI data loaded | Shape: (97, 115, 97)
Extracted 429 regions
Processing: sub-s03 - flanker - incongruent-congruent
fMRI data loaded | Shape: (97, 115, 97)
Extracted 429 regions
Processing: sub-s03 - flanker - task-baseline
fMRI data loaded | Shape: (97, 115, 97)
Extracted 429 regions
Processing: sub-s03 - directedForgetting - neg-con
fMRI data loaded | Shape: (97, 115, 97)
Extracted 429 regions
Processing: sub-s03 - directedForgetting - task-baseline
fMRI data loaded | Shape: (97, 115, 97)
Extracted 429 regions
Processing:

In [12]:
def average_fixed_subset_of_parcels(run_num, date, atlas='schaefer'):
    # Select atlas
    if atlas == 'schaefer':
        atlas_name = 'schaefer400'
        current_atlas = load_schaefer_atlas()
    else:
        atlas_name = 'smor_parcel'
        current_atlas = load_smor_atlas()

    # Load the saved parcellated fixed effects maps for each subject/task/contrast:
    saved_dir = f'processed_data_dfs/{atlas_name}_dfs_fixed'
    input_file = f'{saved_dir}/discovery_parcel_indiv_mean_updated_{date}_{run_num}.pkl'
    try:
        with open(input_file, 'rb') as f:
            parcel_dict = pickle.load(f)
    except Exception as e:
        print(f"Error loading the file: {e}")
        return

    # Average these parcellated maps for each task/contrast and save
    averaged_parcel_dict = {}
    for task in TASKS:
        averaged_parcel_dict[task] = {}
        for contrast in CONTRASTS[task]:
            valid_subjects = [subj for subj in SUBJECTS if parcel_dict[subj][task][contrast] is not None]
            if not valid_subjects:
                print(f"No valid data for {task} - {contrast}")
                continue
            
            list_parcel_maps = [parcel_dict[subj][task][contrast]['activation'] for subj in valid_subjects]
            parcel_matrix = np.array(list_parcel_maps)

            # Average across subjects (axis=0)
            avg_parcel_values = np.mean(parcel_matrix, axis=0)
            std_parcel_values = np.std(parcel_matrix, axis=0)
            n_subjects = parcel_matrix.shape[0]
    
            # Create activation dataframe with averaged values
            activation_df = pd.DataFrame({
                'region': parcel_dict[valid_subjects[0]][task][contrast]['region'],
                'activation': avg_parcel_values,
                'activation_std': std_parcel_values,
                'n_subjects': n_subjects
            })
            
            # Add network information (handle both Schaefer and other atlases)
            activation_df['network'] = activation_df['region'].apply(
                lambda x: x.split('_')[1] if 'Networks' in x else 'Subcortical'
            )
            
            # Add ROI values if available (for smorgasbord atlas)
            if hasattr(current_atlas, 'roi_values'):
                activation_df['roi_value'] = current_atlas.roi_values
            
            # Save the activation df
            averaged_parcel_dict[task][contrast] = activation_df
            print(f"  Extracted {len(activation_df)} regions from {n_subjects} subjects for {task}|{contrast} averaging")
    
    # Save to pickle
    output_dir = f'processed_data_dfs/{atlas_name}_dfs_fixed'
    os.makedirs(output_dir, exist_ok=True)
    
    output_file = f'{output_dir}/discovery_parcel_averaged_across_{date}_{run_num}.pkl'
    with open(output_file, 'wb') as f:
        pickle.dump(averaged_parcel_dict, f)
    
    print(f"\nSaved to: {output_file}")

    print(averaged_parcel_dict)

In [13]:
average_fixed_subset_of_parcels(1,'1117', atlas="smor")

Loading Smorgasbord atlas...
Atlas loaded with 429 regions
Atlas shape: (193, 229, 193)
  Extracted 429 regions from 5 subjects for nBack|twoBack-oneBack averaging
  Extracted 429 regions from 5 subjects for nBack|match-mismatch averaging
  Extracted 429 regions from 5 subjects for nBack|task-baseline averaging
  Extracted 429 regions from 5 subjects for nBack|response_time averaging
  Extracted 429 regions from 5 subjects for flanker|incongruent-congruent averaging
  Extracted 429 regions from 5 subjects for flanker|task-baseline averaging
  Extracted 429 regions from 5 subjects for directedForgetting|neg-con averaging
  Extracted 429 regions from 5 subjects for directedForgetting|task-baseline averaging
  Extracted 429 regions from 5 subjects for directedForgetting|response_time averaging
  Extracted 429 regions from 5 subjects for goNogo|nogo_success-go averaging
  Extracted 429 regions from 5 subjects for goNogo|nogo_success averaging
  Extracted 429 regions from 5 subjects for goN

In [11]:
print(averaged_parcel_dict)

NameError: name 'averaged_parcel_dict' is not defined