In [15]:
import pandas as pd
import os
import glob
import numpy as np

In [16]:
def load_dat_file(filepath: str, delimiter: str = '\t') -> np.ndarray:
    """
    Load a .dat file into a NumPy array.

    Args:
        filepath (str): Path to the .dat file.
        delimiter (str, optional): The delimiter used in the .dat file. 
                                   Default is tab ('\\t').

    Returns:
        np.ndarray: The data from the .dat file as a NumPy array.
    """
    # loadtxt will automatically infer rows/columns based on the file
    data = np.loadtxt(filepath, delimiter=delimiter)
    return data

In [35]:
def compute_fisher_z(signals: np.ndarray) -> np.ndarray:
    """
    Given time series data of shape (n_timepoints, n_regions),
    compute the pairwise Pearson correlation among the columns (i.e., regions),
    then apply the Fisher Z-transform to those correlation values.

    Args:
        signals (np.ndarray): fMRI time-series data of shape (n_timepoints, n_regions),
                              where each column is a region, and each row is a timepoint.

    Returns:
        fisher_z_mat (np.ndarray): (n_regions, n_regions) matrix of
                                   Fisher Z-transformed connectivity.
    """
    # Step 1: Compute Pearson correlation among columns (regions)
    # rowvar=False => treat each column as a variable
    corr_mat = np.corrcoef(signals, rowvar=False)  # shape: (n_regions, n_regions)

    # Step 2: Apply Fisher Z-transform
    # Z = arctanh(r), i.e. 0.5 * ln((1+r)/(1-r))
    # We clamp r to avoid infinity at r=±1
    epsilon = 1e-8
    corr_mat = np.clip(corr_mat, -1 + epsilon, 1 - epsilon)
    fisher_z_mat = np.arctanh(corr_mat)

    return fisher_z_mat

In [25]:
abide_df = pd.read_csv("./Phenotypic_V1_0b.csv", index_col=0)

In [26]:
abide_df.head()

Unnamed: 0,SITE_ID,SUB_ID,FILE_ID,DX_GROUP,DSM_IV_TR,AGE_AT_SCAN,SEX,HANDEDNESS_CATEGORY,HANDEDNESS_SCORES,FIQ,...,WISC_IV_BLK_DSN_SCALED,WISC_IV_PIC_CON_SCALED,WISC_IV_MATRIX_SCALED,WISC_IV_DIGIT_SPAN_SCALED,WISC_IV_LET_NUM_SCALED,WISC_IV_CODING_SCALED,WISC_IV_SYM_SCALED,EYE_STATUS_AT_SCAN,AGE_AT_MPRAGE,BMI
0,CALTECH,51456,Caltech_0051456,1,4,55.4,1,R,,126.0,...,,,,,,,,2,,
1,CALTECH,51457,Caltech_0051457,1,4,22.9,1,Ambi,,107.0,...,,,,,,,,2,,
2,CALTECH,51458,Caltech_0051458,1,1,39.2,1,R,,93.0,...,,,,,,,,2,,
3,CALTECH,51459,Caltech_0051459,1,1,22.8,1,R,,106.0,...,,,,,,,,2,,
4,CALTECH,51460,Caltech_0051460,1,1,34.6,2,Ambi,,133.0,...,,,,,,,,2,,


In [24]:
metadata_df.columns

Index(['SITE_ID', 'SUB_ID', 'FILE_ID', 'DX_GROUP', 'DSM_IV_TR', 'AGE_AT_SCAN',
       'SEX', 'HANDEDNESS_CATEGORY', 'HANDEDNESS_SCORES', 'FIQ', 'VIQ', 'PIQ',
       'FIQ_TEST_TYPE', 'VIQ_TEST_TYPE', 'PIQ_TEST_TYPE',
       'ADI_R_SOCIAL_TOTAL_A', 'ADI_R_VERBAL_TOTAL_BV', 'ADI_RRB_TOTAL_C',
       'ADI_R_ONSET_TOTAL_D', 'ADI_R_RSRCH_RELIABLE', 'ADOS_MODULE',
       'ADOS_TOTAL', 'ADOS_COMM', 'ADOS_SOCIAL', 'ADOS_STEREO_BEHAV',
       'ADOS_RSRCH_RELIABLE', 'ADOS_GOTHAM_SOCAFFECT', 'ADOS_GOTHAM_RRB',
       'ADOS_GOTHAM_TOTAL', 'ADOS_GOTHAM_SEVERITY', 'SRS_VERSION',
       'SRS_RAW_TOTAL', 'SRS_AWARENESS', 'SRS_COGNITION', 'SRS_COMMUNICATION',
       'SRS_MOTIVATION', 'SRS_MANNERISMS', 'SCQ_TOTAL', 'AQ_TOTAL',
       'COMORBIDITY', 'CURRENT_MED_STATUS', 'MEDICATION_NAME',
       'OFF_STIMULANTS_AT_SCAN', 'VINELAND_RECEPTIVE_V_SCALED',
       'VINELAND_EXPRESSIVE_V_SCALED', 'VINELAND_WRITTEN_V_SCALED',
       'VINELAND_COMMUNICATION_STANDARD', 'VINELAND_PERSONAL_V_SCALED',
       'VINELAN

In [18]:
all_dat_file = glob.glob("/blue/ruogu.fang/ryoi360/projects/fmri_vlm/data/ABIDE_parcelled/*")

In [22]:
subj_to_fmri_arr = {os.path.basename(path).replace("_MNI_2mm.dat", ""): load_dat_file(path) for path in all_dat_file}

In [27]:
abide_df["fmri_arr"] = abide_df["FILE_ID"].map(subj_to_fmri_arr)

In [34]:
abide_df["fmri_arr"].iloc[0]

array([[  0.18433658,  -6.07196965,  -6.97500883, ...,  -3.41201421,
          0.9953801 ,  -1.72951851],
       [  0.73146197,  -4.38761729,  -6.44765689, ..., -32.41978836,
          2.26119128,   7.04113906],
       [ -2.6803328 ,  -2.08896616,  -7.50461406, ..., -50.72791344,
          5.01940766,   9.23309426],
       ...,
       [  5.51452779,   3.73930147, -10.3789194 , ...,   6.16789148,
         14.71152254,   1.60544165],
       [  1.57698048,  -1.8572631 , -18.06669439, ...,  -1.40416673,
         18.47535985, -13.83961682],
       [ -0.23791221,  -4.84153936, -13.79866159, ...,  -5.58723652,
         16.31697614, -12.75332682]], shape=(146, 166))

In [30]:
abide_df = abide_df.dropna(subset=["fmri_arr"])

In [36]:
abide_df["corr_matrix"] = abide_df["fmri_arr"].apply(lambda x: compute_fisher_z(x))

  c /= stddev[:, None]
  c /= stddev[None, :]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  abide_df["corr_matrix"] = abide_df["fmri_arr"].apply(lambda x: compute_fisher_z(x))


In [37]:
abide_df["corr_matrix"].iloc[0]

array([[ 9.55691396,  1.02633563,  0.62918206, ..., -0.04302107,
        -0.18306976, -0.14144837],
       [ 1.02633563,  9.55691396,  0.45323908, ..., -0.10637844,
        -0.06027886, -0.09394716],
       [ 0.62918206,  0.45323908,  9.55691396, ..., -0.14061898,
        -0.3746295 , -0.13466304],
       ...,
       [-0.04302107, -0.10637844, -0.14061898, ...,  9.55691396,
         0.31651522,  0.32601488],
       [-0.18306976, -0.06027886, -0.3746295 , ...,  0.31651522,
         9.55691396,  1.09054817],
       [-0.14144837, -0.09394716, -0.13466304, ...,  0.32601488,
         1.09054817,  9.55691396]], shape=(166, 166))

In [6]:
len(df["SUB_ID"].unique())

1112

In [7]:
df["SITE_ID"].unique()

array(['CALTECH', 'CMU', 'KKI', 'LEUVEN_1', 'LEUVEN_2', 'MAX_MUN', 'NYU',
       'OHSU', 'OLIN', 'PITT', 'SBL', 'SDSU', 'STANFORD', 'TRINITY',
       'UCLA_1', 'UCLA_2', 'UM_1', 'UM_2', 'USM', 'YALE'], dtype=object)

In [8]:
df["DX_GROUP"].unique()

array([1, 2])

In [9]:
df.columns

Index(['SITE_ID', 'SUB_ID', 'FILE_ID', 'DX_GROUP', 'DSM_IV_TR', 'AGE_AT_SCAN',
       'SEX', 'HANDEDNESS_CATEGORY', 'HANDEDNESS_SCORES', 'FIQ', 'VIQ', 'PIQ',
       'FIQ_TEST_TYPE', 'VIQ_TEST_TYPE', 'PIQ_TEST_TYPE',
       'ADI_R_SOCIAL_TOTAL_A', 'ADI_R_VERBAL_TOTAL_BV', 'ADI_RRB_TOTAL_C',
       'ADI_R_ONSET_TOTAL_D', 'ADI_R_RSRCH_RELIABLE', 'ADOS_MODULE',
       'ADOS_TOTAL', 'ADOS_COMM', 'ADOS_SOCIAL', 'ADOS_STEREO_BEHAV',
       'ADOS_RSRCH_RELIABLE', 'ADOS_GOTHAM_SOCAFFECT', 'ADOS_GOTHAM_RRB',
       'ADOS_GOTHAM_TOTAL', 'ADOS_GOTHAM_SEVERITY', 'SRS_VERSION',
       'SRS_RAW_TOTAL', 'SRS_AWARENESS', 'SRS_COGNITION', 'SRS_COMMUNICATION',
       'SRS_MOTIVATION', 'SRS_MANNERISMS', 'SCQ_TOTAL', 'AQ_TOTAL',
       'COMORBIDITY', 'CURRENT_MED_STATUS', 'MEDICATION_NAME',
       'OFF_STIMULANTS_AT_SCAN', 'VINELAND_RECEPTIVE_V_SCALED',
       'VINELAND_EXPRESSIVE_V_SCALED', 'VINELAND_WRITTEN_V_SCALED',
       'VINELAND_COMMUNICATION_STANDARD', 'VINELAND_PERSONAL_V_SCALED',
       'VINELAN

In [17]:
preprocessed_metadata_df = pd.read_csv("/blue/ruogu.fang/ryoi360/projects/fmri_vlm/results/2025_02_25_ABIDE_processing/Phenotypic_V1_0b_preprocessed1.csv")

In [21]:
for col in preprocessed_metadata_df.columns:
    print(col)

Unnamed: 0.1
Unnamed: 0
SUB_ID
X
subject
SITE_ID
FILE_ID
DX_GROUP
DSM_IV_TR
AGE_AT_SCAN
SEX
HANDEDNESS_CATEGORY
HANDEDNESS_SCORES
FIQ
VIQ
PIQ
FIQ_TEST_TYPE
VIQ_TEST_TYPE
PIQ_TEST_TYPE
ADI_R_SOCIAL_TOTAL_A
ADI_R_VERBAL_TOTAL_BV
ADI_RRB_TOTAL_C
ADI_R_ONSET_TOTAL_D
ADI_R_RSRCH_RELIABLE
ADOS_MODULE
ADOS_TOTAL
ADOS_COMM
ADOS_SOCIAL
ADOS_STEREO_BEHAV
ADOS_RSRCH_RELIABLE
ADOS_GOTHAM_SOCAFFECT
ADOS_GOTHAM_RRB
ADOS_GOTHAM_TOTAL
ADOS_GOTHAM_SEVERITY
SRS_VERSION
SRS_RAW_TOTAL
SRS_AWARENESS
SRS_COGNITION
SRS_COMMUNICATION
SRS_MOTIVATION
SRS_MANNERISMS
SCQ_TOTAL
AQ_TOTAL
COMORBIDITY
CURRENT_MED_STATUS
MEDICATION_NAME
OFF_STIMULANTS_AT_SCAN
VINELAND_RECEPTIVE_V_SCALED
VINELAND_EXPRESSIVE_V_SCALED
VINELAND_WRITTEN_V_SCALED
VINELAND_COMMUNICATION_STANDARD
VINELAND_PERSONAL_V_SCALED
VINELAND_DOMESTIC_V_SCALED
VINELAND_COMMUNITY_V_SCALED
VINELAND_DAILYLVNG_STANDARD
VINELAND_INTERPERSONAL_V_SCALED
VINELAND_PLAY_V_SCALED
VINELAND_COPING_V_SCALED
VINELAND_SOCIAL_STANDARD
VINELAND_SUM_SCORES
VINELAND_ABC_ST

In [26]:
motion_filtered_df = preprocessed_metadata_df[(preprocessed_metadata_df['func_mean_fd'] <= 0.2) & (preprocessed_metadata_df['func_num_fd'] < 20)]

print(f"Subjects after stricter filtering: {len(motion_filtered_df)}")

Subjects after stricter filtering: 714


In [22]:
len(preprocessed_metadata_df.columns)

106

In [19]:
preprocessed_metadata_df["func_mean_fd"]

0       0.116828
1       0.322092
2       0.127745
3       0.128136
4       0.070143
          ...   
1107    0.116186
1108    0.140171
1109    0.154887
1110    0.048246
1111    0.168913
Name: func_mean_fd, Length: 1112, dtype: float64

In [28]:
preprocessed_metadata_df["func_fwhm"].max()

np.float64(3.7534808758)

In [10]:
import glob

In [11]:
abide_files = glob.glob("/orange/ruogu.fang/ryoi360/ABIDE/*")

In [12]:
len(abide_files)

1035

In [11]:
import numpy as np
import nibabel as nib
from nilearn import plotting
import matplotlib.pyplot as plt

In [13]:
fmri_img = nib.load(abide_files[0])
fmri_data = fmri_img.get_fdata()

In [14]:
fmri_data.shape

(61, 73, 61, 116)

In [15]:
# Get the voxel size from the affine transformation matrix
voxel_size = np.sqrt(np.sum(fmri_img.affine[:3, :3] ** 2, axis=0))
print("Original Voxel Size (mm):", voxel_size)

Original Voxel Size (mm): [3. 3. 3.]


In [None]:
1. Install Required Packages
If you haven't installed Nipype, Nibabel, and NiLearn, do so using:

bash
Copy
Edit
pip install nipype nibabel nilearn numpy scipy


In [None]:
2. Preprocessing Steps in Python
Step 1: Rigid Body Motion Correction
Use SPM's Realign function via Nipype.
Alternatively, use FSL's MCFLIRT.
SPM12 (via Nipype)
python
Copy
Edit
from nipype.interfaces.spm import Realign

realign = Realign()
realign.inputs.in_files = 'subject_func.nii'  # Replace with your file path
realign.inputs.register_to_mean = True
realign.run()
FSL Alternative
python
Copy
Edit
from nipype.interfaces.fsl import MCFLIRT

mcflirt = MCFLIRT()
mcflirt.inputs.in_file = 'subject_func.nii'
mcflirt.inputs.out_file = 'motion_corrected.nii'
mcflirt.run()
Step 2: Slice Timing Correction
Adjusts for differences in slice acquisition time.
Requires TR (repetition time) and slice order.
python
Copy
Edit
from nipype.interfaces.spm import SliceTiming

slice_timing = SliceTiming()
slice_timing.inputs.in_files = 'motion_corrected.nii'
slice_timing.inputs.time_repetition = 2.0  # Set the correct TR
slice_timing.run()
Step 3: Normalization to MNI Space
Warp the functional data into MNI152 template.
Use SPM's Normalize or FSL's FLIRT/FNIRT.
SPM Normalization
python
Copy
Edit
from nipype.interfaces.spm import Normalize12

normalize = Normalize12()
normalize.inputs.image_to_align = 'slice_time_corrected.nii'
normalize.inputs.apply_to_files = ['slice_time_corrected.nii']
normalize.inputs.jobtype = 'estwrite'  # Estimate and apply transformation
normalize.run()
FSL FLIRT Alternative
python
Copy
Edit
from nipype.interfaces.fsl import FLIRT

flirt = FLIRT()
flirt.inputs.in_file = 'slice_time_corrected.nii'
flirt.inputs.reference = '/usr/local/fsl/data/standard/MNI152_T1_2mm_brain.nii.gz'
flirt.inputs.out_file = 'normalized.nii'
flirt.run()
Step 4: Resampling to 3×3×3 mm³
Use NiLearn for resampling.
python
Copy
Edit
from nilearn.image import resample_img
import nibabel as nib

img = nib.load("normalized.nii")

resampled_img = resample_img(img, target_affine=np.diag([3, 3, 3, 1]))
nib.save(resampled_img, "resampled_3mm.nii")
Step 5: Spatial Smoothing (FWHM = 6 mm)
Apply Gaussian smoothing using NiLearn.
python
Copy
Edit
from nilearn.image import smooth_img

smoothed_img = smooth_img("resampled_3mm.nii", fwhm=6)
smoothed_img.to_filename("smoothed.nii")
