In [16]:
import pandas as pd
import numpy as np
import gensim
from gensim.models import Word2Vec
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from nltk.tokenize import word_tokenize
import nltk

# Download NLTK tokenizer if not already installed
# nltk.download('punkt')

In [17]:
# nltk.download('punkt_tab')

In [18]:
# Load Neurosynth terms from the file
file_path = "./data/items.csv"
df = pd.read_csv(file_path)

# Assuming the column containing phrases is named "term"
phrases = df["name"].astype(str).tolist()

# Tokenize phrases into words
tokenized_phrases = [word_tokenize(phrase.lower()) for phrase in phrases]


In [24]:

with open('./proc/terms.txt', 'w') as f:
    for line in df["name"].to_list():
        f.write(f"{line}\n")

In [19]:
# Train Word2Vec on tokenized phrases
word2vec_model = Word2Vec(sentences=tokenized_phrases, vector_size=100, window=5, min_count=1, workers=4)
word2vec_model.train(tokenized_phrases, total_examples=len(tokenized_phrases), epochs=10)


(15200, 17750)

In [10]:
def get_phrase_vector(phrase, model):
    """Convert a phrase to an averaged Word2Vec vector."""
    words = [word for word in phrase if word in model.wv]
    if not words:
        return np.zeros(model.vector_size)  # Return zero vector if no words found
    return np.mean([model.wv[word] for word in words], axis=0)

# Convert all phrases into vectors
phrase_vectors = np.array([get_phrase_vector(phrase, word2vec_model) for phrase in tokenized_phrases])


In [11]:
from sklearn.cluster import KMeans

# Standardize the vectors for better clustering
scaler = StandardScaler()
phrase_vectors_scaled = scaler.fit_transform(phrase_vectors)

# Set number of clusters (adjust based on dataset size)
num_clusters = 30  

# Perform K-Means clustering
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
df["Cluster"] = kmeans.fit_predict(phrase_vectors_scaled)

# Display clustered phrases
df_sorted = df.sort_values("Cluster")
print(df_sorted)


              name  num studies  num activations  Cluster
1142       speaker           78             3519        0
339        disgust          103             3666        0
849            ofc          227             7221        0
346    distraction           85             3098        0
103          avoid          105             3434        0
...            ...          ...              ...      ...
1025    remembered          157             5704       29
223       contexts          223             8606       29
296   default mode          777            26256       29
1159     strategic           88             3077       29
465            ffa           99             3187       29

[1334 rows x 4 columns]


In [12]:
df

Unnamed: 0,name,num studies,num activations,Cluster
0,abilities,291,11638,17
1,ability,1094,38134,23
2,abstract,289,11245,19
3,abuse,91,2625,28
4,acc,558,19397,25
...,...,...,...,...
1329,word recognition,87,2997,22
1330,words,948,38353,10
1331,working,1126,41359,23
1332,working memory,1091,39905,7


In [13]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans

# Convert phrases to TF-IDF vectors
vectorizer = TfidfVectorizer(stop_words="english")
# phrases = [word.replace("disease", "").replace("disorder", "").strip() for word in phrases]
phrases = [word.strip() for word in phrases]

X = vectorizer.fit_transform(phrases)

# Apply K-means clustering
num_clusters = 30  # Adjust based on expected groups
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
labels = kmeans.fit_predict(X)

# Create DataFrame with cluster labels
df = pd.DataFrame({"Phrase": phrases, "Cluster": labels})
df_sorted = df.sort_values(by="Cluster")

print(df_sorted)

                       Phrase  Cluster
499   functional connectivity        0
213              connectivity        0
1268                       v5        1
1154              stimulation        2
701      magnetic stimulation        2
...                       ...      ...
37          alzheimer disease       29
337                disease pd       29
15                         ad       29
336                disease ad       29
335                   disease       29

[1334 rows x 2 columns]


Get a list of networks
get a list of brain regions
get a list of diseases

In [14]:
df_sorted.head(n=50)

Unnamed: 0,Phrase,Cluster
499,functional connectivity,0
213,connectivity,0
1268,v5,1
1154,stimulation,2
701,magnetic stimulation,2
1155,stimulation tms,2
249,cortex dorsal,3
248,cortex dmpfc,3
250,cortex dorsolateral,3
614,insular cortex,3


In [23]:
df_sorted.tail(n=50)

Unnamed: 0,Phrase,Cluster
263,cortex pcc,24
267,cortex precuneus,24
240,cortex acc,24
242,cortex anterior,24
846,occipitotemporal cortex,24
243,cortex ba,24
244,cortex bilaterally,24
264,cortex pfc,24
270,cortex supplementary,25
1192,supplementary,25


In [13]:
raise ValueError()

ValueError: 

In [2]:
def extract_unique_pairs(zmatrix, region_list):
    """
    For each pair of distinct indices (i, j) in zmatrix, where i < j,
    record the value in a dict keyed by "Region_i-Region_j".

    Args:
        zmatrix (np.ndarray): An (n x n) connectivity/z-score matrix.
        region_list (List[str]): A list of region names with length n.

    Returns:
        dict: Keys are "Region_i-Region_j" (with i<j), 
              values are the zmatrix[i, j] entries.
    """
    pair_dict = {}
    n = len(region_list)
    for i in range(n):
        for j in range(i + 1, n):
            val = zmatrix[i, j]
            key = f"{region_list[i]}-{region_list[j]}"
            pair_dict[key] = val
    return pair_dict

In [3]:
#!/usr/bin/env python3
"""
This script provides utilities for:

1. Converting fMRI volumes into AAL3-based parcellations and saving time-series data (.dat files).
2. Visualizing the AAL3 atlas.
3. Generating example subsequences from fMRI data for modeling or analysis.

Author: [Your Name]
Date: [Optional: YYYY-MM-DD]

Dependencies:
- numpy
- nibabel
- nilearn
- matplotlib
- ipdb (optional, for debugging)
- pandas
- python >= 3.6
"""

import os
import argparse
import glob
import pickle

import numpy as np
import nibabel as nib
from nilearn import plotting
import matplotlib.pyplot as plt

from ipdb import set_trace




In [4]:
def report_significantly_different_connectivity(connectivity_dict, alpha_zscore=1.96, top_n=None):
    """
    Generate a text report of region pairs that have a functional connectivity z-score
    exceeding a fixed threshold in absolute value. Optionally limit the report 
    to the highest N magnitude z-scores.

    Example: A threshold of 1.96 roughly corresponds to p < 0.05 in a two-tailed 
    test against zero for a standard normal distribution.

    Args:
        connectivity_dict (dict): 
            Keys are string labels for region pairs (e.g., "Hippocampus_R-VTA_L"),
            Values are numeric z-scores (float).
        alpha_zscore (float):
            The absolute z-score threshold for significance (default=1.96).
        top_n (int, optional):
            If provided, only the top N region pairs by absolute z-score 
            will be reported. Default is None (i.e., report all).

    Returns:
        str: A multi-line text string summarizing region pairs whose |z-score| 
             exceeds 'alpha_zscore'. If top_n is set, only the highest N 
             results are shown.
    """
    if not connectivity_dict:
        return "No data provided."

    # Filter region pairs based on the threshold
    significant_results = [
        (region_pair, z) 
        for region_pair, z in connectivity_dict.items()
        if abs(z) >= alpha_zscore
    ]

    # Sort by absolute z-score descending
    significant_results.sort(key=lambda x: abs(x[1]), reverse=True)

    # If top_n is provided, truncate the list
    if top_n is not None and top_n > 0:
        significant_results = significant_results[:top_n]

    # Build textual report
    report_lines = []
    report_lines.append("Significantly Different Functional Connectivity (Absolute Z-Score)")
    report_lines.append(f"Number of region pairs tested: {len(connectivity_dict)}")
    report_lines.append(f"Z-score threshold for significance: |z| >= {alpha_zscore:.2f}")

    if top_n:
        report_lines.append(f"Limiting to top {top_n} results by absolute z-score.\n")
    else:
        report_lines.append("")

    if significant_results:
        report_lines.append("The following region pairs exceed the threshold:\n")
        for rp, orig_z in significant_results:
            report_lines.append(f"  - {rp}: z = {orig_z:.3f}")
    else:
        report_lines.append("No region pairs exceed the threshold.")

    return "\n".join(report_lines)

In [5]:
import statistics


In [6]:
# def convert_fMRIvols_to_AAL3(data_path, output_path):
#     """
#     Convert 4D fMRI volumes into AAL3 parcellations.

#     This function:
#       1) Scans a given directory (data_path) for .nii.gz files.
#       2) For each matching file, it loads an AAL3 atlas (hardcoded path) and the fMRI data.
#       3) Reshapes the 4D fMRI volume into [n_voxels, n_timepoints], then extracts
#          average time-series data for each AAL3 parcel (1 to 170).
#       4) Saves the resulting time-series as a .dat file in 'output_path'.

#     Args:
#         data_path (str): Directory containing preprocessed fMRI volumes (.nii.gz).
#         output_path (str): Directory where the parcellated time-series (.dat) files are saved.

#     Notes:
#         - The path for the AAL3 atlas is currently hardcoded as:
#             '/orange/ruogu.fang/zeyun.zhao/FSL/bb_FSL/data/standard/AAL/AAL3.nii.gz'
#         - The function is set to skip files unless they match a specific substring
#           ("1000023_20227_2_0_fMRI_in_MNI_space.nii.gz") in this example code; adapt as needed.
#         - Each .dat file is named after the original fMRI filename with '.nii.gz' removed.

#     Returns:
#         None
#     """
#     paths = os.listdir(data_path)
#     print("fMRI data path specified:", data_path)
#     print("Number of fMRI files found:", len(paths))

#     # Hardcoded AAL3 atlas path
#     aal_path = '/orange/ruogu.fang/zeyun.zhao/FSL/bb_FSL/data/standard/AAL/AAL3.nii.gz'
#     print("Atlas file:", aal_path)

#     # Load the atlas
#     try:
#         label_img = nib.load(aal_path)
#         label_data = label_img.get_fdata()
#         label_data = label_data.flatten()  # Flatten into 1D
#         print("Atlas successfully loaded.")
#     except Exception as e:
#         print(f'Error loading AAL3 atlas at {aal_path}: {str(e)}')
#         return

#     # Loop over all files in the directory
#     for f in paths:
#         file_path = os.path.join(data_path, f)

#         # Example filter: proceed only if filename contains a specific substring
#         if "1000023_20227_2_0_fMRI_in_MNI_space.nii.gz" not in f:
#             continue

#         # Process only .nii.gz files
#         if ".nii.gz" in f:
#             print(f'Loading 4D image from {file_path}')
#             try:
#                 dts_img = nib.load(file_path)
#                 dts_data = dts_img.get_fdata()
#                 print("Loaded fMRI data.")
#             except Exception as e:
#                 print(f'Error loading 4D fMRI file "{f}": {str(e)}')
#                 continue

#             try:
#                 print(f"Extracting AAL3 parcels for {f}...")
#                 # Reshape from (X, Y, Z, T) to (T, X*Y*Z)
#                 flattened = dts_data.reshape((-1, dts_data.shape[-1])).T
#                 n_timepoints = flattened.shape[0]

#                 # AAL3 has 170 parcels (labeled 1 through 170)
#                 n_parcels = 170
#                 pmTS = np.zeros((n_timepoints, n_parcels))

#                 # Compute mean signal for each parcel i
#                 # The label_data is also flattened (same shape in spatial dims).
#                 for i in range(1, n_parcels + 1):
#                     parcel_mask = (label_data == i)
#                     y = flattened[:, parcel_mask]
#                     pmTS[:, i - 1] = np.nanmean(y, axis=1)

#                 # Replace NaNs with 0
#                 pmTS[np.isnan(pmTS)] = 0

#                 # Save time series as .dat
#                 save_name = f.split('.nii.gz')[0]
#                 out_file = os.path.join(output_path, f'{save_name}.dat')
#                 print(f"Saving {out_file} with shape {pmTS.shape} (timepoints x parcels).")
#                 np.savetxt(out_file, pmTS, delimiter='\t')

#                 set_trace()  # Debug if needed
#             except Exception as e:
#                 print(f"Error extracting or saving parcels for {f}: {str(e)}")
#         else:
#             print(f"Skipping non-NIfTI file: {f}")


# def show_AAL3(aal_template_path, save_dir):
#     """
#     Visualize each region in the AAL3 atlas by generating separate PNGs.

#     This function:
#       - Loads the AAL3 template.
#       - Iterates over all possible region indices (0 to 169 in this script).
#       - For each region, creates a binary mask image, then uses nilearn.plotting.plot_roi
#         to produce an orthographic display, saved as a PNG.

#     Args:
#         aal_template_path (str): Path to the AAL3 atlas NIfTI file (e.g., AAL3.nii.gz).
#         save_dir (str): Directory where the visualization PNG files are saved.

#     Returns:
#         None
#     """
#     output_image_path = os.path.join(save_dir, "AAL.jpg")

#     try:
#         label_img = nib.load(aal_template_path)
#         label_data = label_img.get_fdata().reshape(91, 109, 91, 1)
#     except Exception as e:
#         print(f"Error loading AAL3 atlas at {aal_template_path}: {str(e)}")
#         return

#     set_trace()  # Debug if needed

#     # AAL3 has 170 regions by default; adjust if needed
#     for roi_index in range(170):
#         # Create a binary mask for the current ROI
#         roi_mask_data = (label_data == roi_index).astype(np.int16)
#         roi_mask_img = nib.Nifti1Image(roi_mask_data, affine=label_img.affine)

#         # Show ROI in an orthographic view
#         display = plotting.plot_roi(
#             roi_mask_img,
#             title=f"ROI Index {roi_index}",
#             display_mode='ortho',
#             colorbar=True
#         )
#         # Save figure
#         output_path = os.path.join(save_dir, f"{roi_index}.png")
#         display.savefig(output_path)
#         display.close()


def generate_subsequences(fmri_data, subsequence_length=200, segment_length=20, num_segments=10):
    """
    Sample random subsequences and segment them for each region of the fMRI data.

    Given an fMRI dataset shaped (timepoints, regions):
      1) Randomly sample a subsequence of length = subsequence_length (default 200) from the time dimension.
      2) Split that subsequence into multiple (num_segments) segments, each of length segment_length.

    Args:
        fmri_data (np.ndarray): fMRI data shaped (T, R), where T=number of timepoints, R=number of regions.
        subsequence_length (int): Length of the randomly sampled subsequence (default=200).
        segment_length (int): Size of each segment (default=20).
        num_segments (int): Number of segments per subsequence (default=10).

    Returns:
        list of np.ndarray:
            A list containing one set of segments for each region. Each set is
            a list of `num_segments` arrays, each of shape (segment_length,).

    Raises:
        AssertionError: If the split does not produce the expected number of segments.
    """
    num_timesteps, num_regions = fmri_data.shape
    subsequences = []

    set_trace()  # Debug if needed

    for i in range(num_regions):
        # Random start index for the subsequence
        start_idx = np.random.randint(0, num_timesteps - subsequence_length)
        # Extract the subsequence for region i
        subsequence = fmri_data[start_idx:start_idx + subsequence_length, i]
        # Split the subsequence into smaller segments
        segments = [subsequence[j:j + segment_length] for j in range(0, subsequence_length, segment_length)]
        # Check we have exactly num_segments segments
        assert len(segments) == num_segments, f"Expected {num_segments} segments, got {len(segments)}"
        subsequences.append(segments)

    return subsequences


# def main():
#     """
#     Main execution flow for testing and demonstration.

#     1) Defines paths for the AAL3 template, an fMRI data directory, and an output directory.
#     2) Optionally calls the show_AAL3() function to visualize the AAL3 atlas.
#     3) Optionally calls the convert_fMRIvols_to_AAL3() function to parcellate .nii.gz data into .dat files.
#     4) Demonstrates how to generate random subsequences from artificially-created fMRI data (490 timepoints, 90 regions).
#     """
#     aal_template_path = '/orange/ruogu.fang/zeyun.zhao/FSL/bb_FSL/data/standard/AAL/AAL3.nii.gz'  
#     output_path = "/orange/ruogu.fang/zeyun.zhao/DATA/UKB_sub/rsfMRI_processed_nii/imgs"
#     fmri_data_path = '/orange/ruogu.fang/zeyun.zhao/DATA/UKB_sub/rsfMRI_processed_nii/Affined'

#     # Uncomment if you want to visualize AAL3 regions
#     # show_AAL3(aal_template_path, output_path)

#     # Uncomment if you want to convert volumes in fmri_data_path
#     # convert_fMRIvols_to_AAL3(fmri_data_path, output_path)

#     # Example usage of generate_subsequences:
#     print("Generating random example subsequences from mock data...")
#     fmri_data = np.random.rand(490, 90)  # (timepoints=490, regions=90)
#     subsequences = generate_subsequences(fmri_data)
#     set_trace()




In [7]:
import os
import numpy as np
import nibabel as nib
from typing import List



In [8]:

# def list_nifti_files(
#     data_path: str,
#     filename_substring: str = None,
#     extension: str = ".nii.gz"
# ) -> List[str]:
#     """
#     Return a list of file paths in `data_path` that match the given extension
#     and optionally contain a specified substring in the filename.
#     """
#     files = []
#     for fname in os.listdir(data_path):
#         if fname.endswith(extension):
#             if filename_substring is None or filename_substring in fname:
#                 files.append(os.path.join(data_path, fname))
#     return files

def list_nifti_files(
    data_path: str,
    filename_substring: str = None,
    extension: str = ".nii.gz"
) -> List[str]:
    """
    Recursively search `data_path` for files matching a specific extension
    (default: .nii.gz) and optionally filter by a substring in the filename.

    Args:
        data_path (str): The root directory to search.
        filename_substring (str, optional): Only return files that contain this substring.
        extension (str, optional): The file extension to match (default: '.nii.gz').

    Returns:
        List[str]: A list of full file paths that match the criteria.
    """
    # Construct a pattern to match all files with the given extension in any subdirectory
    pattern = os.path.join(data_path, '**', f'*{extension}')
    
    # Use glob with recursive=True to find all matching files
    matched_files = glob.glob(pattern, recursive=True)

    # If a substring is specified, filter further
    if filename_substring is not None:
        matched_files = [
            f for f in matched_files
            if filename_substring in os.path.basename(f)
        ]

    return matched_files

def load_atlas(atlas_path: str) -> np.ndarray:
    """
    Load a NIfTI atlas from `atlas_path` and return it as a flattened NumPy array.
    """
    try:
        atlas_img = nib.load(atlas_path)
        atlas_data = atlas_img.get_fdata()
        return atlas_data.flatten()
    except Exception as e:
        raise IOError(f"Failed to load atlas at {atlas_path}: {str(e)}")

def extract_parcel_timeseries(
    fmri_path: str,
    label_data: np.ndarray,
    n_parcels: int = 170
) -> np.ndarray:
    """
    Load a 4D fMRI volume from `fmri_path`, reshape it, and compute mean time series
    for each of the `n_parcels` in `label_data`. Assumes labels 1..n_parcels.
    Returns a 2D array of shape (timepoints, parcels).
    """
    fmri_img = nib.load(fmri_path)
    fmri_data = fmri_img.get_fdata()
    
    # Flatten spatial dimensions, transpose so shape is (timepoints, voxels)
    flattened = fmri_data.reshape((-1, fmri_data.shape[-1])).T
    n_timepoints = flattened.shape[0]

    # Initialize output array
    pmTS = np.zeros((n_timepoints, n_parcels))

    for i in range(1, n_parcels + 1):
        parcel_mask = (label_data == i)
        y = flattened[:, parcel_mask]  # shape: (timepoints, #voxels_in_parcel)
        pmTS[:, i - 1] = np.nanmean(y, axis=1)

    # Replace NaNs with 0
    pmTS[np.isnan(pmTS)] = 0
    return pmTS

def save_timeseries(pmTS: np.ndarray, out_file: str):
    """
    Save time series (2D NumPy array) to a .dat file using tab delimiters.
    """
    np.savetxt(out_file, pmTS, delimiter='\t')

def convert_fMRIvols_to_parcels(
    data_path: str,
    atlas_path: str,
    filename_substring: str = None,
    n_parcels: int = 170
) -> None:
    """
    Convert 4D fMRI volumes into parcel-based time series using a provided atlas,
    and store the output .dat files in the SAME folder as the input .nii.gz files.

    Steps:
      1) List .nii.gz files in `data_path`. Optionally filter by `filename_substring`.
      2) Load `atlas_path` as a flattened array of integer labels.
      3) For each fMRI file, load and reshape the 4D volume to (voxels x time).
      4) Compute average time series for each parcel label (1..n_parcels).
      5) Save the resulting matrix as a .dat file in the same folder as the input fMRI file.

    Args:
        data_path (str): Directory of 4D fMRI .nii.gz files.
        atlas_path (str): Path to the NIfTI atlas (e.g., AAL3).
        filename_substring (str, optional): Only process files containing this substring.
        n_parcels (int, optional): Number of parcels expected in the atlas.

    Returns:
        None
    """
    fmri_files = list_nifti_files(data_path, filename_substring)
    print(f"Found {len(fmri_files)} files in '{data_path}' matching criteria.")

    # Load the atlas
    try:
        label_data = load_atlas(atlas_path)
        print(f"Atlas loaded successfully from: {atlas_path}")
    except IOError as e:
        print(str(e))
        return

    for fmri_path in fmri_files:
        filename = os.path.basename(fmri_path)
        print(f"Processing file: {filename}")

        try:
            pmTS = extract_parcel_timeseries(fmri_path, label_data, n_parcels=n_parcels)
        except Exception as e:
            print(f"Error extracting time series for {filename}: {str(e)}")
            continue

        # Build output path in the SAME folder
        out_dir = os.path.dirname(fmri_path)
        base_name = filename.replace(".nii.gz", "")
        out_file = os.path.join(out_dir, f"{base_name}.dat")

        try:
            save_timeseries(pmTS, out_file)
            print(f"Saved parcellated time series to: {out_file}")
        except Exception as e:
            print(f"Error saving time series for {filename}: {str(e)}")


# Example usage (comment out if placing in a module):
# convert_fMRIvols_to_parcels(
#     data_path="/path/to/fmri/files",
#     atlas_path="/path/to/AAL3.nii.gz",
#     filename_substring="fMRI_in_MNI_space",
#     n_parcels=170
# )

In [9]:
import numpy as np

def generate_subsequences(
    fmri_data: np.ndarray,
    subsequence_length: int = 200,
    segment_length: int = 20,
    num_segments: int = 10
):
    """
    Sample non-overlapping subsequences and segment them for each region of the fMRI data.

    Given an fMRI dataset shaped (timepoints, regions):
      1) Determine how many non-overlapping intervals of length = `subsequence_length`
         can fit into the time dimension.
      2) Randomly choose `num_regions` distinct intervals (start indices).
      3) For each region, extract the subsequence from its assigned interval and
         then split that subsequence into multiple segments, each of length
         `segment_length`.
      4) Return a list of these segment sets, one set per region.

    Args:
        fmri_data (np.ndarray): fMRI data shaped (T, R),
                                where T = number of timepoints,
                                      R = number of regions.
        subsequence_length (int): Length of the subsequence to extract (default=200).
        segment_length (int): Size of each segment (default=20).
        num_segments (int): Number of segments in each subsequence (default=10).

    Returns:
        list of list of np.ndarray:
            A list of length R, where each element is a list of `num_segments`
            arrays, each array having shape (segment_length,).

    Raises:
        ValueError: If there aren't enough timepoints to assign distinct subsequences
                    to all regions.
        AssertionError: If any subsequence does not split into the expected number
                        of segments.
    """
    num_timesteps, num_regions = fmri_data.shape
    
    # 1) Figure out how many distinct non-overlapping intervals of subsequence_length fit
    max_nonoverlapping_intervals = num_timesteps // subsequence_length
    if num_regions > max_nonoverlapping_intervals:
        raise ValueError(
            f"Not enough time for {num_regions} non-overlapping subsequences of "
            f"length {subsequence_length} in {num_timesteps} timepoints."
        )

    # 2) Build a list of possible start indices for these non-overlapping intervals
    possible_starts = [
        i * subsequence_length for i in range(max_nonoverlapping_intervals)
    ]
    # Randomly shuffle and pick the first 'num_regions' starts
    np.random.shuffle(possible_starts)
    chosen_starts = possible_starts[:num_regions]

    subsequences = []

    # 3) For each region, get the subsequence from the chosen start index
    for region_idx in range(num_regions):
        start_idx = chosen_starts[region_idx]
        # Extract the subsequence for this region
        subsequence = fmri_data[start_idx : start_idx + subsequence_length, region_idx]

        # 4) Split the subsequence into segments
        segments = [
            subsequence[j : j + segment_length]
            for j in range(0, subsequence_length, segment_length)
        ]
        # Ensure we got the exact number of segments
        assert len(segments) == num_segments, (
            f"Expected {num_segments} segments, got {len(segments)} for region {region_idx}."
        )

        subsequences.append(segments)

    return subsequences

In [10]:
def pairwise_column_correlation(data: np.ndarray) -> np.ndarray:
    """
    Calculate the Pearson correlation for all pairs of columns
    in the given 2D NumPy array.

    Each column is treated as a variable, and each row is an observation.

    Args:
        data (np.ndarray): 2D array with shape (T, R), where:
                           T = number of rows (observations),
                           R = number of columns (variables).

    Returns:
        np.ndarray: A 2D array of shape (R, R) containing the pairwise
                    Pearson correlation coefficients.
    """
    # rowvar=False indicates each column is a variable (like "feature"),
    # and rows are observations.
    corr_matrix = np.corrcoef(data, rowvar=False)
    return corr_matrix

In [11]:
list_nifti_files("/blue/ruogu.fang/ryoi360/projects/fmri_vlm/data/UKB/brain/AD", filename_substring="MNI")

['/blue/ruogu.fang/ryoi360/projects/fmri_vlm/data/UKB/brain/AD/20227_rsfMRI_NIFTI/rsfmri_unzip/1565751_20227_2_0/fMRI/rfMRI.ica/filtered_func_data_clean_MNI.nii.gz',
 '/blue/ruogu.fang/ryoi360/projects/fmri_vlm/data/UKB/brain/AD/20227_rsfMRI_NIFTI/rsfmri_unzip/1714333_20227_2_0/fMRI/rfMRI.ica/filtered_func_data_clean_MNI.nii.gz']

In [12]:
def load_dat_file(filepath: str, delimiter: str = '\t') -> np.ndarray:
    """
    Load a .dat file into a NumPy array.

    Args:
        filepath (str): Path to the .dat file.
        delimiter (str, optional): The delimiter used in the .dat file. 
                                   Default is tab ('\\t').

    Returns:
        np.ndarray: The data from the .dat file as a NumPy array.
    """
    # loadtxt will automatically infer rows/columns based on the file
    data = np.loadtxt(filepath, delimiter=delimiter)
    return data

# Example usage:
# file_path = 'path/to/your_file.dat'
# data_array = load_dat_file(file_path)
# print("Data shape:", data_array.shape)
# print("First few rows:\n", data_array[:5])

In [13]:
def load_all_subject_dat(root_dir: str, pattern: str = "**/*.dat") -> dict:
    """
    Recursively search `root_dir` for all .dat files, load each file, and store
    them in a dictionary keyed by the subject ID (folder name).

    Args:
        root_dir (str): The top-level directory to search.
        pattern (str, optional): Glob pattern. Default is '**/*.dat', which
                                 searches all subdirectories for .dat files.

    Returns:
        dict: Keys are subject IDs (the name of the folder containing the file),
              values are the NumPy arrays loaded from the .dat files.
    """
    subject_data = {}

    # Find all .dat files under root_dir
    all_dat_files = glob.glob(os.path.join(root_dir, pattern), recursive=True)

    for filepath in all_dat_files:
        # Example: /path/to/root_dir/sub-01/something.dat
        # subject_id would be 'sub-01' if that's the immediate parent folder
        subject_id = os.path.basename(os.path.dirname(filepath))

        data_array = load_dat_file(filepath)
        subject_data[subject_id] = data_array

    return subject_data

In [14]:
all_dat_files = glob.glob("/blue/ruogu.fang/ryoi360/projects/fmri_vlm/data/UKB/brain/*/*/*unzip*/*/fMRI/*/*.dat")

In [15]:
def parse_group_and_subject_id(filepath: str) -> (str, str):
    """
    Parse a path of the form:
        /.../PD/20227_rsfMRI_NIFTI/rsfmri_unzip/1080638_20227_2_0/fMRI/rfMRI.ica/filtered_func_data_clean_MNI.dat
    to extract:
        group = 'PD'
        subject_id = '1080638'

    Args:
        filepath (str): The full file path.

    Returns:
        (str, str): A tuple (group, subject_id).
    """
    # Split the path into parts
    parts = filepath.split(os.sep)
    # Example parts:
    # [
    #   '', 'blue', 'ruogu.fang', 'ryoi360', 'projects', 'fmri_vlm', 'data', 'UKB', 'brain',
    #   'PD', '20227_rsfMRI_NIFTI', 'rsfmri_unzip', '1080638_20227_2_0', 'fMRI',
    #   'rfMRI.ica', 'filtered_func_data_clean_MNI.dat'
    # ]

    # 1) Extract group from a known position; in this example, it's parts[9].
    group = parts[9]  # e.g., 'PD'

    # 2) The subject folder name is parts[12]. For instance '1080638_20227_2_0'.
    subject_folder = parts[12]
    # Extract the subject ID by splitting on underscore and taking the first chunk.
    subject_id = subject_folder.split('_')[0]  # e.g., '1080638'

    return group, subject_id

In [16]:
from collections import defaultdict

In [17]:
id_to_dict = defaultdict(dict)
for file_path in all_dat_files:
    group, subject_id = parse_group_and_subject_id(file_path)
    id_to_dict[subject_id]["group"] = group
    id_to_dict[subject_id]["bold_arr"] = load_dat_file(file_path)
    id_to_dict[subject_id]["corr_matrix"] = pairwise_column_correlation(id_to_dict[subject_id]["bold_arr"])

  c /= stddev[:, None]
  c /= stddev[None, :]


In [18]:
fmri_df = pd.DataFrame.from_dict(id_to_dict, orient="index")

In [19]:
fmri_df = fmri_df.reset_index()

In [20]:
fmri_df["id_odd_or_even"] = fmri_df["index"].astype(int).apply(lambda x: x % 2 == 0)

In [21]:
fmri_df["baseline"] = (fmri_df["group"] == "control") & (fmri_df["id_odd_or_even"] == True)

In [22]:
fmri_df.head()

Unnamed: 0,index,group,bold_arr,corr_matrix,id_odd_or_even,baseline
0,1080638,PD,"[[4939.057823152287, 10958.907044729725, 4983....","[[0.9999999999999999, 0.1857165690564201, 0.25...",True,False
1,3912201,PD,"[[2330.1448758509573, 11109.145230464348, 1853...","[[0.9999999999999998, 0.563850224587867, 0.550...",False,False
2,2624428,PD,"[[3387.5494932498164, 10085.910710540476, 3114...","[[0.9999999999999998, 0.8958240969983267, 0.74...",True,False
3,1013014,control,"[[3195.2888963562787, 14138.177237238591, 4228...","[[1.0, 0.4757426072404447, 0.17334303960399244...",True,True
4,1013317,control,"[[3055.4551621744463, 13380.706479922972, 3604...","[[1.0, 0.8552293538309268, 0.5479695519711071,...",False,False


In [23]:
fmri_df.tail()

Unnamed: 0,index,group,bold_arr,corr_matrix,id_odd_or_even,baseline
20,1003639,control,"[[2275.088215387728, 11884.866670997635, 2076....","[[1.0, 0.8589694541363802, 0.6393752977470762,...",False,False
21,1011903,control,"[[2408.1432084705907, 10454.311263056217, 3041...","[[0.9999999999999999, 0.641602758239139, 0.368...",False,False
22,1007815,control,"[[2685.573042608092, 10096.8285560251, 1459.93...","[[0.9999999999999998, 0.5746605702022838, 0.14...",False,False
23,1565751,AD,"[[1092.377286530055, 12427.607279035157, 1371....","[[1.0, 0.14729506612579832, 0.1840513083309353...",False,False
24,1714333,AD,"[[3449.3159941366903, 12473.739904673344, 4350...","[[1.0, 0.8158525944417093, 0.27754538126297906...",False,False


# Calculating the average of the control group

In [24]:
def average_connectivity_matrices(fc_matrices):
    """
    Given a list of 2D NumPy arrays (functional connectivity matrices) all of the
    same shape, compute the element-wise mean across them.

    Args:
        fc_matrices (List[np.ndarray]): List of 2D arrays, each of shape (R, R).

    Returns:
        np.ndarray: A 2D array of shape (R, R), where each element is the mean
                    of the corresponding elements across all matrices in the list.
    """
    # Stack the list of matrices along a new axis, creating a 3D array of shape (N, R, R),
    # where N is the number of matrices.
    stacked = np.stack(fc_matrices, axis=0)
    # Compute the mean over the first axis (the N dimension).
    mean_matrix = np.mean(stacked, axis=0)
    return np.round(mean_matrix, 2)

In [25]:
def std_connectivity_matrices(fc_matrices):
    """
    Given a list of 2D NumPy arrays (functional connectivity matrices) all of the
    same shape, compute the element-wise mean across them.

    Args:
        fc_matrices (List[np.ndarray]): List of 2D arrays, each of shape (R, R).

    Returns:
        np.ndarray: A 2D array of shape (R, R), where each element is the mean
                    of the corresponding elements across all matrices in the list.
    """
    # Stack the list of matrices along a new axis, creating a 3D array of shape (N, R, R),
    # where N is the number of matrices.
    stacked = np.stack(fc_matrices, axis=0)
    # Compute the mean over the first axis (the N dimension).
    std_matrix = np.std(stacked, axis=0)
    return np.round(std_matrix, 2)

In [26]:
baseline_mean_corr = average_connectivity_matrices(fmri_df[fmri_df["baseline"]]["corr_matrix"].to_list())

In [27]:
baseline_std_corr = std_connectivity_matrices(fmri_df[fmri_df["baseline"]]["corr_matrix"].to_list())

In [28]:
baseline_std_corr

array([[0.  , 0.16, 0.22, ..., 0.14, 0.06, 0.08],
       [0.16, 0.  , 0.27, ..., 0.15, 0.07, 0.14],
       [0.22, 0.27, 0.  , ..., 0.11, 0.07, 0.13],
       ...,
       [0.14, 0.15, 0.11, ..., 0.  , 0.05, 0.06],
       [0.06, 0.07, 0.07, ..., 0.05, 0.  , 0.06],
       [0.08, 0.14, 0.13, ..., 0.06, 0.06, 0.  ]], shape=(170, 170))

In [29]:
baseline_mean_corr

array([[ 1.  ,  0.56,  0.21, ...,  0.01,  0.01,  0.  ],
       [ 0.56,  1.  ,  0.31, ...,  0.01,  0.02, -0.  ],
       [ 0.21,  0.31,  1.  , ..., -0.  , -0.01,  0.03],
       ...,
       [ 0.01,  0.01, -0.  , ...,  1.  ,  0.01, -0.02],
       [ 0.01,  0.02, -0.01, ...,  0.01,  1.  , -0.01],
       [ 0.  , -0.  ,  0.03, ..., -0.02, -0.01,  1.  ]], shape=(170, 170))

In [30]:
def compute_zscore(
    data_matrix: np.ndarray,
    mean_matrix: np.ndarray,
    std_matrix: np.ndarray
) -> np.ndarray:
    """
    Compute z-scores element-wise for 'data_matrix', based on provided 
    mean_matrix and std_matrix of the same shape.

    Z = (data - mean) / std

    Args:
        data_matrix (np.ndarray): Array of data values (e.g., shape (M, N)).
        mean_matrix (np.ndarray): Array of mean values, same shape as data_matrix.
        std_matrix (np.ndarray): Array of std values, same shape as data_matrix.

    Returns:
        np.ndarray: A z-score matrix of the same shape as data_matrix.
                    If std=0 for any element, that position will be NaN.
    """
    # Prevent division by zero: replace zeros in std_matrix with np.nan
    safe_std_matrix = np.where(std_matrix == 0, np.nan, std_matrix)

    # Compute z-score element-wise
    z_matrix = (data_matrix - mean_matrix) / safe_std_matrix
    return np.nan_to_num(z_matrix, nan=0)

In [31]:
fmri_df["z_score_matrix"] = fmri_df.apply(lambda x: compute_zscore(x["corr_matrix"], baseline_mean_corr, baseline_std_corr), axis=1)

In [32]:
fmri_df.head()

Unnamed: 0,index,group,bold_arr,corr_matrix,id_odd_or_even,baseline,z_score_matrix
0,1080638,PD,"[[4939.057823152287, 10958.907044729725, 4983....","[[0.9999999999999999, 0.1857165690564201, 0.25...",True,False,"[[0.0, -2.339271443397375, 0.2219054315262979,..."
1,3912201,PD,"[[2330.1448758509573, 11109.145230464348, 1853...","[[0.9999999999999998, 0.563850224587867, 0.550...",False,False,"[[0.0, 0.02406390367416869, 1.547277344451915,..."
2,2624428,PD,"[[3387.5494932498164, 10085.910710540476, 3114...","[[0.9999999999999998, 0.8958240969983267, 0.74...",True,False,"[[0.0, 2.0989006062395412, 2.4432882105716853,..."
3,1013014,control,"[[3195.2888963562787, 14138.177237238591, 4228...","[[1.0, 0.4757426072404447, 0.17334303960399244...",True,True,"[[0.0, -0.5266087047472209, -0.166622547254579..."
4,1013317,control,"[[3055.4551621744463, 13380.706479922972, 3604...","[[1.0, 0.8552293538309268, 0.5479695519711071,...",False,False,"[[0.0, 1.845183461443292, 1.536225236232305, 2..."


In [33]:
fmri_df["z_score_matrix"].iloc[0]

array([[ 0.        , -2.33927144,  0.22190543, ..., -0.26877269,
         2.18208227, -1.15748236],
       [-2.33927144,  0.        ,  0.33334668, ...,  0.10839085,
         1.53087697, -0.89155821],
       [ 0.22190543,  0.33334668,  0.        , ..., -0.71028236,
         1.31087912, -0.37850623],
       ...,
       [-0.26877269,  0.10839085, -0.71028236, ...,  0.        ,
         0.62841552, -0.80928803],
       [ 2.18208227,  1.53087697,  1.31087912, ...,  0.62841552,
         0.        ,  0.82958834],
       [-1.15748236, -0.89155821, -0.37850623, ..., -0.80928803,
         0.82958834,  0.        ]], shape=(170, 170))

In [34]:
fmri_df["z_score_matrix"].iloc[0].max()

np.float64(3.8446569850719023)

In [35]:
fmri_df["z_score_matrix"].iloc[0].min()

np.float64(-8.066684745724414)

In [36]:
roi_df = pd.read_csv("../../data/UKB/brain/ROI_MNI_V6_vol.txt", sep="\t")

In [37]:
roi_df.head()

Unnamed: 0,nom_c,nom_l,color,vol_vox,vol_mm3
0,FAL,Precentral_L,1,3526,28208
1,FAR,Precentral_R,2,3381,27048
2,F1_2L,Frontal_Sup_2_L,3,4870,38960
3,F1_2R,Frontal_Sup_2_R,4,5126,41008
4,F2_2L,Frontal_Mid_2_L,5,4507,36056


In [38]:
# 4) Apply the function to each row's z-score matrix and store the dict in a new column.
fmri_df["all_pairs"] = fmri_df["z_score_matrix"].apply(
    lambda mat: extract_unique_pairs(mat, roi_df["nom_l"])
)

In [39]:
fmri_df["significant_pairs"] = fmri_df["all_pairs"].apply(lambda x: {k:v for k,v in x.items() if v >= 10 or v <= -10})

In [40]:
fmri_df["number_of_sig_pairs"] = fmri_df["significant_pairs"].apply(lambda x: len(x))

In [41]:
fmri_df[["group", "baseline", "number_of_sig_pairs", "significant_pairs"]]

Unnamed: 0,group,baseline,number_of_sig_pairs,significant_pairs
0,PD,False,0,{}
1,PD,False,0,{}
2,PD,False,8,"{'Frontal_Mid_2_R-VTA_L': 11.577292763518429, ..."
3,control,True,0,{}
4,control,False,1,{'Thal_LGN_L-VTA_L': 10.256517534940155}
5,control,True,0,{}
6,control,False,3,"{'Cuneus_R-Thal_VPL_R': -10.63652602955795, 'P..."
7,control,False,1,{'Supp_Motor_Area_L-Paracentral_Lobule_R': -11...
8,control,True,0,{}
9,control,False,0,{}


In [42]:
meta_data_df = pd.read_pickle("/blue/ruogu.fang/ryoi360/projects/fmri_vlm/results/2025_01_21_metadata_eda/proc/trimmed_icd.pkl")

In [43]:
meta_data_df["num_diagnoses"] = meta_data_df["all_diagnoses"].apply(lambda x: len(x))

In [44]:
meta_data_df["eid"] = meta_data_df["eid"].astype(str)
fmri_df["index"] = fmri_df["index"].astype(str)

In [45]:
merged_df = pd.DataFrame.merge(fmri_df, meta_data_df, left_on="index", right_on="eid", how="left")

In [46]:
merged_df["report"] = merged_df.apply(lambda x: report_significantly_different_connectivity(x["all_pairs"], alpha_zscore=2, top_n=20), axis=1)

In [51]:
print(merged_df[(merged_df["group"] == "PD") & (merged_df["baseline"] == False)]["report"].iloc[-1])

Significantly Different Functional Connectivity (Absolute Z-Score)
Number of region pairs tested: 14365
Z-score threshold for significance: |z| >= 2.00
Limiting to top 20 results by absolute z-score.

The following region pairs exceed the threshold:

  - Cerebelum_8_R-VTA_L: z = 16.290
  - Amygdala_L-Cerebellum_10_R: z = 15.996
  - Hippocampus_R-VTA_L: z = 13.586
  - Vermis_9-Thal_PuA_R: z = 12.445
  - Frontal_Mid_2_R-VTA_L: z = 11.577
  - ParaHippocampal_R-VTA_L: z = 10.462
  - Cerebelum_7b_L-Thal_PuA_R: z = 10.286
  - Caudate_R-VTA_L: z = 10.055
  - Frontal_Sup_2_L-VTA_L: z = 9.762
  - Vermis_3-VTA_L: z = 9.696
  - Cerebelum_7b_L-Raphe_M: z = 9.518
  - Cerebellum_10_R-Thal_Re_R: z = 9.500
  - Cerebelum_3_L-Cerebellum_10_R: z = 9.114
  - Frontal_Inf_Tri_L-Thal_Re_R: z = 9.037
  - Temporal_Pole_Mid_L-Cerebelum_7b_R: z = 8.978
  - Temporal_Sup_L-VTA_L: z = 8.977
  - Frontal_Inf_Tri_L-Cerebellum_10_R: z = 8.912
  - Cerebelum_7b_L-VTA_L: z = 8.893
  - Frontal_Sup_2_R-VTA_L: z = 8.807
  - 

In [48]:
print(merged_df[(merged_df["group"] == "AD") & (merged_df["baseline"] == False)]["report"].iloc[0])

Significantly Different Functional Connectivity (Absolute Z-Score)
Number of region pairs tested: 14365
Z-score threshold for significance: |z| >= 2.00
Limiting to top 20 results by absolute z-score.

The following region pairs exceed the threshold:

  - Supp_Motor_Area_L-Paracentral_Lobule_R: z = -13.589
  - Supp_Motor_Area_L-Supp_Motor_Area_R: z = -11.862
  - Supp_Motor_Area_L-Insula_L: z = -9.847
  - Precentral_R-Parietal_Inf_R: z = -7.626
  - SupraMarginal_R-Temporal_Sup_R: z = -7.154
  - Frontal_Sup_Medial_R-Occipital_Inf_L: z = -6.865
  - Insula_L-SupraMarginal_R: z = -6.718
  - Supp_Motor_Area_L-Temporal_Sup_R: z = -6.613
  - Rolandic_Oper_R-Temporal_Sup_R: z = -6.397
  - Thal_PuI_L-Raphe_D: z = 6.300
  - Rolandic_Oper_R-Insula_L: z = -6.019
  - Supp_Motor_Area_R-Temporal_Sup_R: z = -6.002
  - Supp_Motor_Area_L-Postcentral_R: z = -5.952
  - Postcentral_R-SupraMarginal_R: z = -5.830
  - Amygdala_L-Cerebellum_10_R: z = 5.684
  - SupraMarginal_R-Heschl_L: z = -5.463
  - Rolandic_Op

In [49]:
print(merged_df[(merged_df["group"] == "control") & (merged_df["baseline"] == False)]["report"].iloc[0])

Significantly Different Functional Connectivity (Absolute Z-Score)
Number of region pairs tested: 14365
Z-score threshold for significance: |z| >= 2.00
Limiting to top 20 results by absolute z-score.

The following region pairs exceed the threshold:

  - Thal_LGN_L-VTA_L: z = 10.257
  - Thal_PuL_L-Red_N_R: z = 9.525
  - Cerebelum_Crus2_R-Thal_PuL_L: z = 8.856
  - Hippocampus_R-VTA_L: z = 8.632
  - Occipital_Inf_L-Thal_PuL_L: z = 8.435
  - Rectus_L-Thal_MDm_L: z = -7.800
  - Frontal_Sup_Medial_R-Red_N_R: z = 7.480
  - Vermis_4_5-Thal_MDm_L: z = -7.438
  - Cingulate_Post_L-Thal_Re_L: z = 7.349
  - Thal_PuA_L-Thal_PuL_L: z = 6.989
  - Hippocampus_L-Cerebelum_Crus2_R: z = 6.856
  - ParaHippocampal_R-VTA_L: z = 6.816
  - Temporal_Sup_L-Thal_MDm_L: z = -6.804
  - Precentral_L-SN_pc_R: z = 6.765
  - SupraMarginal_L-Thal_PuM_R: z = 6.756
  - Hippocampus_L-Red_N_R: z = 6.747
  - Thal_MDm_L-ACC_sub_L: z = -6.739
  - SupraMarginal_L-Thal_MDm_L: z = -6.668
  - Vermis_1_2-Thal_VPL_R: z = 6.659
  - 

In [52]:
merged_df

Unnamed: 0,index,group,bold_arr,corr_matrix,id_odd_or_even,baseline,z_score_matrix,all_pairs,significant_pairs,number_of_sig_pairs,...,31-0.0,21022-0.0,all_diagnoses,mental_and_neural_diagnoses,has_any_diagnosis,has_mental_or_neural_diagnosis,has_PD,has_AD,num_diagnoses,report
0,1080638,PD,"[[4939.057823152287, 10958.907044729725, 4983....","[[0.9999999999999999, 0.1857165690564201, 0.25...",True,False,"[[0.0, -2.339271443397375, 0.2219054315262979,...",{'Precentral_L-Precentral_R': -2.3392714433973...,{},0,...,1,64,"[G20, I609, R418]",[G20],True,True,True,False,3,Significantly Different Functional Connectivit...
1,3912201,PD,"[[2330.1448758509573, 11109.145230464348, 1853...","[[0.9999999999999998, 0.563850224587867, 0.550...",False,False,"[[0.0, 0.02406390367416869, 1.547277344451915,...",{'Precentral_L-Precentral_R': 0.02406390367416...,{},0,...,1,61,[G20],[G20],True,True,True,False,1,Significantly Different Functional Connectivit...
2,2624428,PD,"[[3387.5494932498164, 10085.910710540476, 3114...","[[0.9999999999999998, 0.8958240969983267, 0.74...",True,False,"[[0.0, 2.0989006062395412, 2.4432882105716853,...",{'Precentral_L-Precentral_R': 2.09890060623954...,"{'Frontal_Mid_2_R-VTA_L': 11.577292763518429, ...",8,...,1,68,"[D180, G20, H251, I802, K400, K409, R31, R33, ...",[G20],True,True,True,False,11,Significantly Different Functional Connectivit...
3,1013014,control,"[[3195.2888963562787, 14138.177237238591, 4228...","[[1.0, 0.4757426072404447, 0.17334303960399244...",True,True,"[[0.0, -0.5266087047472209, -0.166622547254579...",{'Precentral_L-Precentral_R': -0.5266087047472...,{},0,...,1,48,[],[],False,False,False,False,0,Significantly Different Functional Connectivit...
4,1013317,control,"[[3055.4551621744463, 13380.706479922972, 3604...","[[1.0, 0.8552293538309268, 0.5479695519711071,...",False,False,"[[0.0, 1.845183461443292, 1.536225236232305, 2...",{'Precentral_L-Precentral_R': 1.84518346144329...,{'Thal_LGN_L-VTA_L': 10.256517534940155},1,...,1,55,[],[],False,False,False,False,0,Significantly Different Functional Connectivit...
5,1005844,control,"[[5138.383032736137, 12961.299299426137, 4429....","[[1.0, 0.636907680827599, 0.07168783367147342,...",True,True,"[[0.0, 0.4806730051724932, -0.6286916651296661...",{'Precentral_L-Precentral_R': 0.48067300517249...,{},0,...,1,52,[],[],False,False,False,False,0,Significantly Different Functional Connectivit...
6,1021171,control,"[[2250.007671761837, 11141.483296490675, 2933....","[[0.9999999999999999, 0.5975265005315571, 0.53...",False,False,"[[0.0, 0.2345406283222315, 1.4990994054878704,...",{'Precentral_L-Precentral_R': 0.23454062832223...,"{'Cuneus_R-Thal_VPL_R': -10.63652602955795, 'P...",3,...,1,56,[],[],False,False,False,False,0,Significantly Different Functional Connectivit...
7,1004075,control,"[[1815.7181810868383, 10033.757142777838, 1774...","[[1.0, 0.45769168553165185, -0.032157166489048...",False,False,"[[0.0, -0.6394269654271763, -1.100714393132040...",{'Precentral_L-Precentral_R': -0.6394269654271...,{'Supp_Motor_Area_L-Paracentral_Lobule_R': -11...,1,...,0,41,[],[],False,False,False,False,0,Significantly Different Functional Connectivit...
8,1020838,control,"[[143.7761953903475, 6895.317216073751, 47.464...","[[1.0, 0.22323539304475054, 0.1248864168779933...",True,True,"[[0.0, -2.1047787934703095, -0.386879923281848...",{'Precentral_L-Precentral_R': -2.1047787934703...,{},0,...,0,58,[],[],False,False,False,False,0,Significantly Different Functional Connectivit...
9,1018093,control,"[[1222.4892274756291, 11833.579472660418, 2501...","[[0.9999999999999998, 0.18114942314698107, -0....",False,False,"[[0.0, -2.3678161053313684, -1.298724437662137...",{'Precentral_L-Precentral_R': -2.3678161053313...,{},0,...,0,59,[],[],False,False,False,False,0,Significantly Different Functional Connectivit...
