In [1]:
import sys
import os
from pathlib import Path

# For Jupyter or interactive use — use current working directory as script base
notebook_path = Path().resolve()

# Assume notebook is in a subfolder of the repo — go up one level
file_dir = notebook_path.parent

# Set working directory to the repo root
os.chdir(file_dir)
print("Working directory set to:", Path.cwd())

sys.path.append(str(file_dir))

import pandas as pd
import numpy as np
import nibabel as nib
from netneurotools import  stats 
from neuromaps.nulls.spins import get_parcel_centroids 

data_dir = file_dir / 'data'

Working directory set to: /Users/melinatsotras/Desktop/submission


In [None]:
# --- Load mixed model region-wise results---
regional_age_effects = pd.read_csv(f'{file_dir}/Mixed_Effects_Models/regionwise_age_effects_MixedLM.csv') \
                         .drop(columns='Unnamed: 0')

# --- Load cell abundance data and align index by region ---
cell_abundance = pd.read_csv(f'{file_dir}/data/level_2_cell_abundance.csv') \
                   .rename(columns={'D99': 'region'}).set_index('region')

# --- Merge cell abundance data with age effects to align index by region ---
aligned_data = cell_abundance.merge(
    regional_age_effects[['region', 't_value']], on='region', how='right'
).rename(columns={'t_value': 'age_effects'}).set_index('region')

# --- Add average similarity data ---
total_similarity_strength = pd.read_csv(f'{file_dir}/MIND_Network/total_similarity_strength.csv').set_index('region')

aligned_data['total_similarity_strength'] = total_similarity_strength['total_similarity_strength']
aligned_data.sort_index(inplace=True)

# --- Final aligned dataset ---
aligned_data



Unnamed: 0_level_0,ASC,EC,MG,OLG,OPC,VLMC,L2,L2_3,L2_3_4,L3_4,...,L6,LAMP5,PVALB,PV_CHC,RELN,SST,VIP,VIP_RELN,age_effects,total_similarity_strength
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,5278.325964,18.075779,251.224446,6761.221388,834.675989,995.809783,4756.866855,10951.553780,3682.748156,11863.853313,...,6438.177027,922.180149,3530.884979,258.190866,1846.827700,4631.538372,1311.753585,1075.126790,-9.800735,0.035900
3,4509.771664,10.696771,159.122329,6187.902606,771.877549,666.460473,6191.564020,13839.344739,3398.986054,11508.152829,...,5246.360434,955.569268,3259.121739,217.084703,1974.603038,4392.061549,1347.114991,1143.838558,2.002599,0.041058
5,5157.211016,70.473636,332.000864,2799.653176,1106.630399,1456.732092,7260.175052,13310.293612,4623.085869,9342.578483,...,5345.391204,889.073629,3052.636092,260.163668,2306.792973,4749.023322,1429.783063,1017.893435,-7.145680,0.036988
7,8757.939734,36.591316,1036.753944,8541.527797,1604.687016,562.953179,9114.813504,5922.869778,3845.868363,5162.402268,...,11779.197529,1001.645673,3201.952838,171.307791,1856.540106,4799.160377,1568.063003,1071.078255,-1.977216,0.040741
8,4827.494387,19.755874,414.384254,4664.344169,934.703504,586.569133,5319.850414,13491.905353,4481.262901,11016.892823,...,5294.721363,918.722250,3136.953012,242.079904,1782.869483,4718.587874,1382.367105,933.685452,-7.254415,0.041991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,6496.837928,23.255814,410.692989,2374.395627,1465.934796,1746.846876,8828.230708,9076.678254,4532.996059,8750.950650,...,6334.459522,1005.633423,4029.048216,210.803113,2656.283992,4896.819676,1351.383776,1016.624192,-3.157773,0.041251
194,8994.352674,63.572791,1126.972201,3704.166086,2441.970051,7263.693790,20062.893945,5711.671446,5911.730934,4031.559088,...,5515.599146,1470.994325,2922.738735,391.646701,3285.740847,4225.378035,1870.129609,1300.623322,-1.634197,0.047327
195,7421.327515,74.705530,388.217297,5662.626851,1143.174033,1149.276690,8054.070522,9419.945146,4898.631808,6838.490589,...,5675.010750,1036.453491,3596.689600,197.043498,2649.394040,5167.829508,1410.422964,1621.920141,1.322742,0.033406
218,6682.867089,3.466685,428.467938,6162.354814,1301.687554,247.796252,8645.256779,13164.881648,4697.309985,9851.024931,...,5084.132326,870.632783,3488.278689,168.062494,2115.191768,4752.021555,1144.429174,949.630162,-5.382336,0.042458


## Hungarian Nulls for CCA

In [None]:


# Define the file path to the left hemisphere parcellation (brain atlas)
parcellation_left = f'{data_dir}/gifti/CIVET_macaque-alpha-0.2/D99_atlas_rsl_sym_left.label.gii'
parcellation_right = f'{data_dir}/gifti/CIVET_macaque-alpha-0.2/D99_atlas_rsl_sym_right.label.gii'

# Load the parcellation file, which contains labeled regions of interest (ROIs) in the left hemisphere
new_parc = nib.load(parcellation_left).agg_data()

# Define file paths to the left and right hemisphere spherical surface files
lhsphere = f'{data_dir}/gifti/CIVET_macaque-alpha-0.2/sphere_left_iso.surf.gii'
rhsphere = f'{data_dir}/gifti/CIVET_macaque-alpha-0.2/sphere_right_iso.surf.gii'

# Compute the centroids (geometric centers) of the brain parcels for both hemispheres
# - `lhsphere` and `rhsphere` define the spherical representations of the left and right hemispheres.
# - The `method='geodesic'` argument specifies that centroids should be calculated based on geodesic distances.
centroids, hemiid = get_parcel_centroids(
    [lhsphere, rhsphere],  # Spherical surface representations of both hemispheres
    parcellation=[parcellation_left, parcellation_right],  
    method='geodesic'  # Compute centroids using geodesic distances
)

# 0 parcel is automatically excluded. See here: https://github.com/netneurolab/neuromaps/blob/main/neuromaps/nulls/spins.py#L50


In [None]:

# Create a DataFrame to store the centroid coordinates for each brain region
# - `region`: Contains unique parcel (region) labels, excluding the first value ( background or unassigned label)
# - `centroid`: Stores (x, y, z) coordinates for the first 141 centroids
parcel_centroids_df = pd.DataFrame({
    'region': np.unique(new_parc)[1:],  # Exclude background label 
    'centroid_l': [(x, y, z) for (x, y, z) in centroids[:141, :]],
    'centroid_r': [(x, y, z) for (x, y, z) in centroids[141:, :]]})

# Filter `parcel_centroids_df` to include only regions present in `aligned_data.index`
# - Ensures that only relevant brain regions (those available in `aligned_data`) are kept
# This will remove region 70, which is not included in MINDs
parcel_centroids_df = parcel_centroids_df[parcel_centroids_df.region.isin(aligned_data.index)]

# Adjust hemiid to match number of regions left after filtering
hemiid = [0]*140 + [1]*140 

# Convert the centroid tuples back into a NumPy array
# - Each row represents the (x, y, z) coordinates of a centroid
cen_l = np.array([np.array([x, y, z]) for x, y, z in parcel_centroids_df.centroid_l])
cen_r = np.array([np.array([x, y, z]) for x, y, z in parcel_centroids_df.centroid_r])

# Duplicate centroids array to match the expected input size
# - Ensures that the array contains centroids for both hemispheres
cens = np.concatenate([cen_l, cen_r])

# Generate spin-based null distributions using the Hungarian method
# - `gen_spinsamples()` creates spatially constrained permutations of data
# - `centroids`: Provides the spatial locations of regions
# - `hemiid`: Ensures rotations respect hemispheric constraints (left vs. right)
# - `n_rotate = 5000`: Specifies the number of rotations (permutations) to generate
# - `method='hungarian'`: Uses the Hungarian algorithm to optimize rotations
spins = stats.gen_spinsamples(cens, hemiid, n_rotate=5000, method='hungarian', seed=13)

In [None]:

age_effects_npy = aligned_data['age_effects'].to_numpy()
total_similarity_strength_npy = aligned_data['total_similarity_strength'].to_numpy()

# we only use spins in left hemisphere due to the nature of the data (LH only)
lh_spins = spins[:len(spins)//2]

# Spins the data through indexing of the spin samples
hungarian_spins_age_effects = age_effects_npy[lh_spins]
np.savetxt(f'{file_dir}/CCA/input/hungarian_5k_nulls_age_effects.csv', hungarian_spins_age_effects)

hungarian_spins_similarity_strength = total_similarity_strength_npy[lh_spins]
np.savetxt(f'{file_dir}/CCA/input/hungarian_5k_nulls_similarity_strength.csv', hungarian_spins_similarity_strength)