In [27]:
# !pip install nibabel nilearn scikit-learn pandas numpy matplotlib
import os
import numpy as np
import pandas as pd
import nibabel as nib
import matplotlib.pyplot as plt
from collections import defaultdict

from nilearn.maskers import NiftiMasker
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score
from scipy.stats import pearsonr

%matplotlib inline

## Load PsychPY timing data

In [12]:
# Convert start/stop times to scan indices
def time_to_index(times, TR=1.5):
    return np.round(np.array(times) / TR).astype(int)

In [42]:
def get_fixed_length_tr_range(start_times, TR=1.5, duration_sec=10):
    """
    Convert a list of start times to TR indices of fixed duration.
    Returns a list of lists (one TR range per event).
    """
    n_TRs = int(np.round(duration_sec / TR))  # e.g. 10s / 1.5s = 6.67 → 7
    indices = []
    for start in start_times:
        start_idx = int(np.round(start / TR))
        indices.append(list(range(start_idx, start_idx + n_TRs)))
    return indices
    
def convert_psychopy_time_to_fmri_index(beh_file, subject_id, TR=1.5):
    df = pd.read_csv(beh_file)
    picture_col = f'picture_{subject_id}'

    image_to_view_TRs = {}
    image_to_recall_TRs = {}
    imagine_category_blocks = defaultdict(list)  # 'dog' → list of TR lists, etc.

    for i, row in df.iterrows():
        image_name = row.get(picture_col)
        if pd.isna(image_name):
            continue

        # VIEW (fixed length)
        if not pd.isna(row.get('view.started')):
            tr_range = get_fixed_length_tr_range([row['view.started']], TR)[0]
            image_to_view_TRs[image_name] = tr_range

        # RECALL (fixed length)
        if not pd.isna(row.get('recall.started')):
            tr_range = get_fixed_length_tr_range([row['recall.started']], TR)[0]
            image_to_recall_TRs[image_name] = tr_range

    # IMAGINE (grouped by category)
    if 'imagine_task.started' in df.columns:
        valid_rows = df[df['imagine_task.started'].notna()]
        for _, row in valid_rows.iterrows():
            image_name = row.get(picture_col)
            if pd.isna(image_name):
                continue

            # Infer category
            if 'dog' in image_name.lower():
                category = 'dog'
            elif 'sunflower' in image_name.lower():
                category = 'sunflower'
            else:
                category = 'unknown'

            tr_range = get_fixed_length_tr_range([row['imagine_task.started']], TR)[0]
            imagine_category_blocks[category].append(tr_range)

    return {
        'view': image_to_view_TRs,                # dict: image → TRs
        'recall': image_to_recall_TRs,            # dict: image → TRs
        'imagine': dict(imagine_category_blocks)  # dict: category → list of TR lists
    }


In [43]:
# Example usage for subject 1:
subj_behavior_file = {1:"psychopy_data/1_fmri design_2025-03-04_15h31.40.655.csv",
                      3:"psychopy_data/2_fmri design_2025-03-05_15h36.18.659.csv",
                      4:"psychopy_data/4_fmri design_2025-03-05_14h28.37.417.csv"
                      }
results_sub01 = convert_psychopy_time_to_fmri_index(subj_behavior_file[1], 1, TR=1.5)

In [44]:
results_sub01['view']

{'dog1.jpg': [15, 16, 17, 18, 19, 20, 21],
 'dog2.jpg': [32, 33, 34, 35, 36, 37, 38],
 'dog3.jpg': [49, 50, 51, 52, 53, 54, 55],
 'dog4.jpg': [67, 68, 69, 70, 71, 72, 73],
 'dog5.jpg': [99, 100, 101, 102, 103, 104, 105],
 'dog6.jpg': [117, 118, 119, 120, 121, 122, 123],
 'dog7.jpg': [134, 135, 136, 137, 138, 139, 140],
 'dog8.jpg': [151, 152, 153, 154, 155, 156, 157],
 'dog9.jpg': [184, 185, 186, 187, 188, 189, 190],
 'dog10.jpg': [201, 202, 203, 204, 205, 206, 207],
 'dog11.jpg': [219, 220, 221, 222, 223, 224, 225],
 'dog12.jpg': [236, 237, 238, 239, 240, 241, 242],
 'sunflower1.jpg': [269, 270, 271, 272, 273, 274, 275],
 'sunflower2.jpg': [286, 287, 288, 289, 290, 291, 292],
 'sunflower3.jpg': [303, 304, 305, 306, 307, 308, 309],
 'sunflower4.jpg': [321, 322, 323, 324, 325, 326, 327],
 'sunflower5.jpg': [353, 354, 355, 356, 357, 358, 359],
 'sunflower6.jpg': [371, 372, 373, 374, 375, 376, 377],
 'sunflower7.jpg': [388, 389, 390, 391, 392, 393, 394],
 'sunflower8.jpg': [405, 406, 407,

In [45]:
results_sub01['recall']

{'dog1.jpg': [23, 24, 25, 26, 27, 28, 29],
 'dog2.jpg': [41, 42, 43, 44, 45, 46, 47],
 'dog3.jpg': [58, 59, 60, 61, 62, 63, 64],
 'dog4.jpg': [75, 76, 77, 78, 79, 80, 81],
 'dog5.jpg': [108, 109, 110, 111, 112, 113, 114],
 'dog6.jpg': [125, 126, 127, 128, 129, 130, 131],
 'dog7.jpg': [143, 144, 145, 146, 147, 148, 149],
 'dog8.jpg': [160, 161, 162, 163, 164, 165, 166],
 'dog9.jpg': [193, 194, 195, 196, 197, 198, 199],
 'dog10.jpg': [210, 211, 212, 213, 214, 215, 216],
 'dog11.jpg': [227, 228, 229, 230, 231, 232, 233],
 'dog12.jpg': [245, 246, 247, 248, 249, 250, 251],
 'sunflower1.jpg': [277, 278, 279, 280, 281, 282, 283],
 'sunflower2.jpg': [295, 296, 297, 298, 299, 300, 301],
 'sunflower3.jpg': [312, 313, 314, 315, 316, 317, 318],
 'sunflower4.jpg': [329, 330, 331, 332, 333, 334, 335],
 'sunflower5.jpg': [362, 363, 364, 365, 366, 367, 368],
 'sunflower6.jpg': [379, 380, 381, 382, 383, 384, 385],
 'sunflower7.jpg': [397, 398, 399, 400, 401, 402, 403],
 'sunflower8.jpg': [414, 415, 416

In [46]:
results_sub01['imagine']

{'dog': [[89, 90, 91, 92, 93, 94, 95],
  [174, 175, 176, 177, 178, 179, 180],
  [259, 260, 261, 262, 263, 264, 265]],
 'sunflower': [[343, 344, 345, 346, 347, 348, 349],
  [428, 429, 430, 431, 432, 433, 434],
  [512, 513, 514, 515, 516, 517, 518]]}

## Load the fMRI data

In [35]:
# Define data paths
data_dir = "/jukebox/hasson/snastase/neu502b-2025/neu502b-fmri/data/bids/derivatives/fmriprep" 
subjects = ["sub-01/func/", "sub-03/func/", "sub-04/func/"]  # Assuming 20 subjects, update as needed
task_prefix = "imagine"

In [8]:
output_dir = "neural_activity"
os.makedirs(output_dir, exist_ok=True)

def extract_neural_activity(subject):
    print(f"Processing {subject}...")

    # Find the preprocessed BOLD image
    bold_file = None
    for f in os.listdir(os.path.join(data_dir, subject)):
        if task_prefix in f and "desc-preproc_bold.nii.gz" in f:
            bold_file = os.path.join(data_dir, subject, f)
            break

    if not bold_file:
        print(f"No preprocessed BOLD file found for {subject}.")
        return

    # Load fMRI data
    bold_img = nib.load(bold_file)

    # Find brain mask
    mask_file = None
    for f in os.listdir(os.path.join(data_dir, subject)):
        if task_prefix in f and "desc-brain_mask.nii.gz" in f:
            mask_file = os.path.join(data_dir, subject, f)
            break

    if mask_file:
        mask_img = nib.load(mask_file)
        print(f"Using brain mask: {mask_file}")
    else:
        print(f"No explicit brain mask found. Will compute one.")
        mask_img = None
    
    confound_tsv = None
    for f in os.listdir(os.path.join(data_dir, subject)):
        if (task_prefix in f) and ("desc-confounds_timeseries.tsv" in f):
            confound_tsv = os.path.join(data_dir, f)
            break

    confound_data = None
    if confound_tsv is not None:
        df_conf = pd.read_csv(confound_tsv, sep='\t')
        # pick some columns, e.g., 6 motion parameters
        nuisance_cols = ['trans_x','trans_y','trans_z','rot_x','rot_y','rot_z']
        nuisance_cols = [c for c in nuisance_cols if c in df_conf.columns]
        confound_data = df_conf[nuisance_cols].fillna(method='bfill').fillna(method='ffill').values

# ---------
    
    # Extract time series from masked brain voxels
    masker = NiftiMasker(mask_img=mask_img, 
                         standardize=True,
                        high_pass=0.01)
    masker.fit(bold_img)
    time_series = masker.transform(bold_img, confounds=confound_data)
    
    print(f"Extracted neural activity shape (TRs x voxels): {time_series.shape}")  # (T, N_voxels)
    return time_series

In [37]:
# Run processing for all subjects
for subject in subjects:
    extract_neural_activity(subject)

Processing sub-01/func/...
Using brain mask: /jukebox/hasson/snastase/neu502b-2025/neu502b-fmri/data/bids/derivatives/fmriprep/sub-01/func/sub-01_task-imagine_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz




Extracted neural activity shape: (535, 73291)
Processing sub-03/func/...
Using brain mask: /jukebox/hasson/snastase/neu502b-2025/neu502b-fmri/data/bids/derivatives/fmriprep/sub-03/func/sub-03_task-imagine_space-T1w_desc-brain_mask.nii.gz
Extracted neural activity shape: (535, 60764)
Processing sub-04/func/...
Using brain mask: /jukebox/hasson/snastase/neu502b-2025/neu502b-fmri/data/bids/derivatives/fmriprep/sub-04/func/sub-04_task-imagine_space-T1w_desc-brain_mask.nii.gz
Extracted neural activity shape: (535, 56978)


## Multivariate pattern analysis (whole brain analysis)
* Train the binary category classifier on the view condition
* Test it on the recall and imagine condition (also cross validation)

See notebook: fmri-4/fmri-4-mvpa-key.ipynb

Ref handbook: https://brainhack-princeton.github.io/handbook/content_pages/05-02-mvpa.html


## Representational Similarity Analysis (RSA) 

* Keep the category the same (e.g. dog), what is the similarity between view, recall, and imagine conditions
* Compare the within-category similarity (within dogs vs within flowers)
* Expect flower category to be more clustered because of visual similarity
* Produce correlation matrix

See notebook: fmri-5/fmri-5-rsa-key.ipynb
