Cleaned up coding pipeline for the discovery sample exploratory analyses. 

1_generate_second_model.ipynb loads in the first level contrast maps for all task/contrasts for each of the 5 discovery sample subjects. Then, it generates second level models (for each task/contrast, it creates 1 second level model for each encounter of the task). For example, since every subject has 5 sessions with a given task--although the specific session numbers are different--the subjects get combined for their first encounter of a given task, then the next model combines them for their second encounter of a given task, and so on. 

These second level models are then saved in the home directory: '/home/users/nklevak/network_data_updated/' under each specific task name folder (i.e. cuedTS, directedForgetting, flanker, goNogo, nBack, shapeMatching, spatialTS, stopSignal) and then the corresponding contrast folder. Filename format is: {taskName}_{contrastName}encounter{encounter_number}. The encounter numbers range from 1-5.

In [2]:
import os
import numpy as np
import pandas as pd
import nibabel as nib
import nilearn
import json
import datetime
import warnings
from nilearn import plotting
from nilearn.glm.first_level import FirstLevelModel
from nilearn.glm.second_level import SecondLevelModel
from nilearn.glm import threshold_stats_img
from nilearn.image import concat_imgs, mean_img, index_img
from nilearn.reporting import make_glm_report
import matplotlib.pyplot as plt
from collections import defaultdict

In [8]:
# where the updated second levels are saved (with the fixed tedana pipeline):
OUTPUT_DIR = '/home/users/nklevak/network_data_second_lev/'
os.makedirs(OUTPUT_DIR, exist_ok=True)

# where the first level contrast maps are stored
BASE_DIR = '/oak/stanford/groups/russpold/data/network_grant/discovery_BIDS_20250402/derivatives/'
LEVEL = 'output_lev1_mni'

# subjects in the discovery sample
SUBJECTS = ['sub-s03', 'sub-s10', 'sub-s19', 'sub-s29', 'sub-s43']

In [9]:
# relevant task and contrast and session data
TASKS = ["nBack","flanker","directedForgetting","goNogo", "shapeMatching", "stopSignal", "cuedTS", "spatialTS"]
CONTRASTS = {}
CONTRASTS["nBack"] = ["twoBack-oneBack", "match-mismatch","task-baseline","response_time"] # the nback contrasts
CONTRASTS["flanker"] = ["incongruent-congruent", "task-baseline", "incongruent-congruent","response_time"]
CONTRASTS["directedForgetting"] = ["neg-con", "task-baseline","response_time"]
CONTRASTS["goNogo"] = ["nogo_success-go", "nogo_success","task-baseline","response_time"] # go_rtModel check
CONTRASTS["shapeMatching"] = ["DDD", "DDS", "DNN", "DSD", "main_vars", "SDD", "SNN", "SSS", "task-baseline","response_time"]
CONTRASTS["stopSignal"] = ["go", "stop_failure-go", "stop_failure", "stop_failure-stop_success", "stop_success-go", "stop_success", "stop_success-stop_failure", "task-baseline","response_time"]
CONTRASTS["cuedTS"] = ["cue_switch_cost", "task_switch_cost", "task_switch_cue_switch-task_stay_cue_stay", "task-baseline","response_time"]
CONTRASTS["spatialTS"] = ["cue_switch_cost", "task_switch_cost", "task_switch_cue_switch-task_stay_cue_stay", "task-baseline","response_time"]
SESSIONS = ['ses-01', 'ses-02', 'ses-03', 'ses-04', 'ses-05', 'ses-06', 'ses-07', 'ses-08', 'ses-09','ses-10']

# number of encounters each subject has with a task
max_num_encounters = 5

In [10]:
# helper functions:

def build_contrast_map_path(base_dir, level, subject, session, task, contrast_name):
    """Build the file path for a contrast map."""
    filename = f'{subject}_{session}_task-{task}_contrast-{contrast_name}_rtmodel-rt_centered_stat-effect-size.nii.gz'
    return os.path.join(base_dir, level, subject, task, 'indiv_contrasts', filename)

def is_valid_contrast_map(img_path):
    """Check if a contrast map has sufficient variance and no NaN values."""
    try:
        img = nib.load(img_path)
        data = img.get_fdata()
        return np.std(data) > 1e-10 and not np.isnan(data).any()
    except Exception as e:
        print(f"Error validating {img_path}: {e}")
        return False
        
def clean_z_map_data(z_map, task, contrast_name, encounter):
    """Clean z-map data by handling NaN and infinity values."""
    data = z_map.get_fdata()
    if np.isnan(data).any() or np.isinf(data).any():
        data = np.nan_to_num(data)
        z_map = nib.Nifti1Image(data, z_map.affine, z_map.header)
        print(f"Warning: Fixed NaN/Inf values in {task}:{contrast_name}:encounter-{encounter+1}")
    return z_map

In [11]:
# Load first level contrast map filepaths for all tasks and contrasts
all_contrast_maps = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
encounter_maps = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))

for task in TASKS:
    for contrast_name in CONTRASTS[task]:
        for subject in SUBJECTS:
            overall_encounter_count = 0
            
            for session in SESSIONS:
                contrast_map_path = build_contrast_map_path(BASE_DIR, LEVEL, subject, session, task, contrast_name)
                
                if os.path.exists(contrast_map_path):
                    all_contrast_maps[task][contrast_name][subject].append(contrast_map_path)
                    encounter_maps[task][contrast_name][subject][overall_encounter_count] = contrast_map_path
                    overall_encounter_count += 1

# Sort contrast maps by encounter number (grouping all subjects together)
session_contrast_maps = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
session_design_rows = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))

for task in TASKS:
    for contrast_name in CONTRASTS[task]:
        for encounter in range(max_num_encounters):
            for subject in SUBJECTS:
                subject_numeric = np.float64(float(subject[5:]))
                
                if (subject in encounter_maps[task][contrast_name] and 
                    encounter in encounter_maps[task][contrast_name][subject]):
                    
                    map_path = encounter_maps[task][contrast_name][subject][encounter]
                    session_contrast_maps[task][contrast_name][encounter].append(map_path)
                    session_design_rows[task][contrast_name][encounter].append({
                        'subject': subject_numeric,
                        # Add any subject-specific covariates here
                        # 'performance': performance[subject][session]
                    })

# Create all second-level model z-maps (1 per encounter)
secondLevelZmaps = defaultdict(lambda: defaultdict(dict))

for task in TASKS:
    for contrast_name in CONTRASTS[task]:
        for encounter in range(5):  # You can make this configurable
            maps = session_contrast_maps[task][contrast_name][encounter]
            rows = session_design_rows[task][contrast_name][encounter]
            
            # Skip if fewer than minimum required subjects
            if len(maps) < 4:
                print(f"Skipping {task}:{contrast_name}:encounter-{encounter+1} (insufficient data)")
                secondLevelZmaps[task][contrast_name][encounter] = None
                continue
            
            try:
                # Create design matrix
                dm = pd.DataFrame(rows)
                dm['intercept'] = 1  # Add intercept column (group effect)
                
                # Check for design matrix issues
                if dm.shape[0] <= dm.shape[1]:
                    print(f"Warning: More regressors than subjects for {task}:{contrast_name}:encounter-{encounter+1}")
                
                # Filter out invalid maps
                valid_maps = []
                valid_rows = []
                
                for i, map_path in enumerate(maps):
                    if is_valid_contrast_map(map_path):
                        valid_maps.append(nib.load(map_path))
                        valid_rows.append(dm.iloc[i])
                
                if len(valid_maps) < 4:
                    print(f"Skipping {task}:{contrast_name}:encounter-{encounter+1} (insufficient valid maps)")
                    secondLevelZmaps[task][contrast_name][encounter] = None
                    continue
                
                # Recreate design matrix with only valid subjects
                valid_dm = pd.DataFrame(valid_rows).reset_index(drop=True)
                
                # Create and fit second-level model
                second_level_model = SecondLevelModel(smoothing_fwhm=8.0)
                
                with warnings.catch_warnings():
                    warnings.filterwarnings("ignore", category=RuntimeWarning)
                    second_level_model.fit(valid_maps, design_matrix=valid_dm)
                    z_map = second_level_model.compute_contrast(
                        second_level_contrast='intercept',
                        output_type='z_score'
                    )
                
                # Clean z-map data
                z_map = clean_z_map_data(z_map, task, contrast_name, encounter)
                
                secondLevelZmaps[task][contrast_name][encounter] = z_map
                print(f"Completed {task}:{contrast_name}:encounter-{encounter+1}")
                
            except Exception as e:
                print(f"Error processing {task}:{contrast_name}:encounter-{encounter+1}: {e}")
                secondLevelZmaps[task][contrast_name][encounter] = None

Completed nBack:twoBack-oneBack:encounter-1
Completed nBack:twoBack-oneBack:encounter-2
Completed nBack:twoBack-oneBack:encounter-3
Completed nBack:twoBack-oneBack:encounter-4
Completed nBack:twoBack-oneBack:encounter-5
Completed nBack:match-mismatch:encounter-1
Completed nBack:match-mismatch:encounter-2
Completed nBack:match-mismatch:encounter-3
Completed nBack:match-mismatch:encounter-4
Completed nBack:match-mismatch:encounter-5
Completed nBack:task-baseline:encounter-1
Completed nBack:task-baseline:encounter-2
Completed nBack:task-baseline:encounter-3
Completed nBack:task-baseline:encounter-4
Completed nBack:task-baseline:encounter-5
Completed nBack:response_time:encounter-1
Completed nBack:response_time:encounter-2
Completed nBack:response_time:encounter-3
Completed nBack:response_time:encounter-4
Completed nBack:response_time:encounter-5
Completed flanker:incongruent-congruent:encounter-1
Completed flanker:incongruent-congruent:encounter-2
Completed flanker:incongruent-congruent:e

In [12]:
# save all of the second level zmaps to the output directory
# Create a function to save zmaps with metadata
def save_zmaps_with_metadata(zmaps_dict, output_dir=OUTPUT_DIR):
    """
    Save zmaps with corresponding metadata json files in BIDS-like format.
    
    Parameters:
    -----------
    zmaps_dict : dict
        Nested dictionary of zmaps organized by task, contrast, and encounter.
    output_dir : str
        Directory where files will be saved.
    """
    # Get current date for metadata
    current_date = datetime.datetime.now().strftime('%Y-%m-%d')

    num_saved = 0
    num_skipped = 0
    for task in zmaps_dict:
        # Create task directory
        task_dir = os.path.join(output_dir, task)
        os.makedirs(task_dir, exist_ok=True)
        
        for contrast_name in zmaps_dict[task]:
            # Create contrast directory
            contrast_dir = os.path.join(task_dir, contrast_name)
            os.makedirs(contrast_dir, exist_ok=True)
            
            for encounter_idx, zmap in zmaps_dict[task][contrast_name].items():
                # Define base filename
                base_filename = f"{task}_{contrast_name}encounter{encounter_idx+1}"
                full_path = os.path.join(contrast_dir, base_filename)

                try:
                    # Save NIfTI file
                    zmap.to_filename(f"{full_path}.nii.gz")
                    num_saved += 1
                    
                    # Create and save metadata
                    metadata = {
                        "TaskName": task,
                        "ContrastName": contrast_name,
                        "EncounterNumber": encounter_idx + 1,
                        "DataType": "z_statistic_map",
                        "AnalysisDate": current_date,
                        "SmoothinFWHM": 8.0,  # From model
                        "ContrastType": "intercept",  # From contrast
                        "OutputType": "z_score"  # From output type
                    }
    
                    # Save metadata to JSON file
                    with open(f"{full_path}.json", 'w') as f:
                        json.dump(metadata, f, indent=4)
                except Exception as e:
                    print(f"Error saving {task}:{contrast_name}:encounter-{encounter_idx+1}: {e}")
                    num_skipped += 1
                
    print(f"Saved {num_saved} zmaps with metadata. Skipped {num_skipped} maps.")
    
# save updated zmaps to folder
save_zmaps_with_metadata(secondLevelZmaps)

Error saving goNogo:nogo_success-go:encounter-5: 'NoneType' object has no attribute 'to_filename'
Error saving goNogo:nogo_success:encounter-5: 'NoneType' object has no attribute 'to_filename'
Error saving goNogo:task-baseline:encounter-5: 'NoneType' object has no attribute 'to_filename'
Error saving goNogo:response_time:encounter-5: 'NoneType' object has no attribute 'to_filename'
Saved 211 zmaps with metadata. Skipped 4 maps.
