In [None]:
import sys
import os
from pathlib import Path

# For Jupyter or interactive use — use current working directory as script base
notebook_path = Path().resolve()

# Assume notebook is in a subfolder of the repo — go up one level
file_dir = notebook_path.parent

# Set working directory to the repo root
os.chdir(file_dir)
print("Working directory set to:", Path.cwd())

sys.path.append(str(file_dir))

import pandas as pd
import numpy as np
import nibabel as nib
from scipy.stats.mstats import pearsonr
from neuromaps.stats import compare_images
from neuromaps.nulls.spins import get_parcel_centroids
from netneurotools import  stats  

data_dir = f'{file_dir}/data'
demographics = pd.read_csv(f'{data_dir}/demographics_v2.csv')


In [None]:
# Initialize an empty dictionary to store MIND data for each subject
MIND_dict = {}

# Convert the 'subject' column in demographics dataframe to a list
subs = demographics.subject.to_list()

# Iterate through each subject to process their MIND data
for i, subject in enumerate(subs):
    print(subject)  # Print the current subject ID
    
    # Read the subject's MIND data CSV, rename the column for the region, and set the region as the index
    temp = pd.read_csv(f'{file_dir}/MIND_Network/MIND_output/{subject}_MIND_sa_vol_mc_gc_sd_ct_ratio.csv').rename(columns={'Unnamed: 0':'region'}).set_index('region')
    
    # Add the processed MIND data to the MIND_dict dictionary using subject ID as the key
    MIND_dict[subject] = temp
    
    # For the first subject, set 'avg_MIND' as the MIND data; otherwise, accumulate the MIND data
    if i == 0:
        avg_MIND = temp
    else:
        avg_MIND = avg_MIND + temp

# After processing all subjects, calculate the average MIND data
avg_MIND = avg_MIND / len(subs)

# Get the regions (columns) from one of the subject's MIND data
regions = MIND_dict[subject].columns

In [4]:
# Get unique D99 regions except for region 0 and 70, since these are excluded from MINDs
D99 = pd.DataFrame({'regions':np.unique(nib.load(f'{data_dir}/gifti/CIVET_macaque-alpha-0.2/D99_atlas_rsl_sym_left.label.gii').agg_data())[1:]})
D99 = D99[D99.regions !=70]

# Create lists of region names for the left and right hemisphere regions by appending '_l' and '_r' to each region name in D99
ll = [str(x)+'_l' for x in D99.regions]
rr = [str(x)+'_r' for x in D99.regions]

# Extract the average MIND data for the left and right hemisphere regions
np.fill_diagonal(avg_MIND.values, np.nan)

# Calculate  total similarity strength per region
similarity_strength_both_hemis = avg_MIND.mean(axis = 1)

# average left and right hemisphere vectors from total similarity strength
L = similarity_strength_both_hemis.loc[ll]
R = similarity_strength_both_hemis.loc[rr]
L.index = L.index.str.replace('_l', '').astype(int)
R.index = R.index.str.replace('_r', '').astype(int)

# Calculate the total similarity strength for each region by averaging the left and right hemisphere values
similarity_strength =(L+R)/2

# Save the total similarity strength to a CSV file
pd.DataFrame(similarity_strength).rename(columns={0:'total_similarity_strength'}).to_csv(f'{file_dir}/MIND_Network/total_similarity_strength.csv')

similarity_strength

region
2      0.035900
3      0.041058
5      0.036988
7      0.040741
8      0.041991
         ...   
188    0.041251
194    0.047327
195    0.033406
218    0.042458
224    0.040663
Length: 140, dtype: float64

In [9]:
# Load the cell density fractions dataset from a CSV file into a pandas DataFrame
cell_dataset = pd.read_csv(f'{data_dir}/d99_cell_abundance.csv')

# Filter out rows where the 'D99' column has a value of 70 and set the 'D99' column as the index of the DataFrame
cell_dataset = cell_dataset[cell_dataset.D99 != 70].set_index('D99')

# Display the resulting cell dataset
cell_dataset.sort_index(inplace=True)
cell_dataset

Unnamed: 0_level_0,ASC,EC,MG,OLG,OPC,VLMC,L2,L2_3,L2_3_4,L3_4,...,L4_5_6,L5_6,L6,LAMP5,PVALB,PV_CHC,RELN,SST,VIP,VIP_RELN
D99,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0.052783,0.000181,0.002512,0.067612,0.008347,0.009958,0.047569,0.109516,0.036827,0.118639,...,0.083930,0.062244,0.064382,0.009222,0.035309,0.002582,0.018468,0.046315,0.013118,0.010751
3,0.045098,0.000107,0.001591,0.061879,0.007719,0.006665,0.061916,0.138393,0.033990,0.115082,...,0.070874,0.049222,0.052464,0.009556,0.032591,0.002171,0.019746,0.043921,0.013471,0.011438
5,0.051572,0.000705,0.003320,0.027997,0.011066,0.014567,0.072602,0.133103,0.046231,0.093426,...,0.079994,0.043355,0.053454,0.008891,0.030526,0.002602,0.023068,0.047490,0.014298,0.010179
7,0.087579,0.000366,0.010368,0.085415,0.016047,0.005630,0.091148,0.059229,0.038459,0.051624,...,0.111480,0.073866,0.117792,0.010016,0.032020,0.001713,0.018565,0.047992,0.015681,0.010711
8,0.048275,0.000198,0.004144,0.046643,0.009347,0.005866,0.053199,0.134919,0.044813,0.110169,...,0.074724,0.048506,0.052947,0.009187,0.031370,0.002421,0.017829,0.047186,0.013824,0.009337
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,0.064968,0.000233,0.004107,0.023744,0.014659,0.017468,0.088282,0.090767,0.045330,0.087510,...,0.103105,0.049505,0.063345,0.010056,0.040290,0.002108,0.026563,0.048968,0.013514,0.010166
194,0.089944,0.000636,0.011270,0.037042,0.024420,0.072637,0.200629,0.057117,0.059117,0.040316,...,0.082089,0.027984,0.055156,0.014710,0.029227,0.003916,0.032857,0.042254,0.018701,0.013006
195,0.074213,0.000747,0.003882,0.056626,0.011432,0.011493,0.080541,0.094199,0.048986,0.068385,...,0.087086,0.054409,0.056750,0.010365,0.035967,0.001970,0.026494,0.051678,0.014104,0.016219
218,0.066829,0.000035,0.004285,0.061624,0.013017,0.002478,0.086453,0.131649,0.046973,0.098510,...,0.073834,0.049153,0.050841,0.008706,0.034883,0.001681,0.021152,0.047520,0.011444,0.009496


### Univariate Cell Correlations

In [7]:


# Define the file path to the left hemisphere parcellation (brain atlas)
parcellation_left = f'{data_dir}/gifti/CIVET_macaque-alpha-0.2/D99_atlas_rsl_sym_left.label.gii'
parcellation_right = f'{data_dir}/gifti/CIVET_macaque-alpha-0.2/D99_atlas_rsl_sym_right.label.gii'

# Load the parcellation file, which contains labeled regions of interest (ROIs) in the left hemisphere
new_parc = nib.load(parcellation_left).agg_data()

# Define file paths to the left and right hemisphere spherical surface files
lhsphere = f'{data_dir}/gifti/CIVET_macaque-alpha-0.2/sphere_left_iso.surf.gii'
rhsphere = f'{data_dir}/gifti/CIVET_macaque-alpha-0.2/sphere_right_iso.surf.gii'

# Compute the centroids (geometric centers) of the brain parcels for both hemispheres
# - `lhsphere` and `rhsphere` define the spherical representations of the left and right hemispheres.
# - The `method='geodesic'` argument specifies that centroids should be calculated based on geodesic distances.
centroids, hemiid = get_parcel_centroids(
    [lhsphere, rhsphere],  # Spherical surface representations of both hemispheres
    parcellation=[parcellation_left, parcellation_right],  
    method='geodesic'  # Compute centroids using geodesic distances
)

# 0 parcel is automatically excluded. See here: https://github.com/netneurolab/neuromaps/blob/main/neuromaps/nulls/spins.py#L50


In [None]:

# Create a DataFrame to store the centroid coordinates for each brain region
# - `region`: Contains unique parcel (region) labels, excluding the first value (often background or unassigned label)
# - `centroid`: Stores (x, y, z) coordinates for the first 141 centroids
parcel_centroids_df = pd.DataFrame({
    'region': np.unique(new_parc)[1:],  # Exclude background label (assumed to be first entry)
    'centroid_l': [(x, y, z) for (x, y, z) in centroids[:141, :]],
    'centroid_r': [(x, y, z) for (x, y, z) in centroids[141:, :]]})

# Filter `parcel_centroids_df` to include only regions present in `aligned_data.index`
# - Ensures that only relevant brain regions (those available in `aligned_data`) are kept
# This will remove region 70, which is not included in MINDs
parcel_centroids_df = parcel_centroids_df[parcel_centroids_df.region.isin(cell_dataset.index)]

# centroids are aligned based on left and right hemisphere regions after filtering
display(parcel_centroids_df.head(5))



Unnamed: 0,region,centroid_l,centroid_r
0,2,"(0.792261, 0.592609, 0.14539)","(-0.792261, 0.592609, 0.14539)"
1,3,"(0.17709, 0.897491, -0.403916)","(-0.17709, 0.897491, -0.403916)"
2,5,"(0.51799, 0.690912, -0.504309)","(-0.51799, 0.690912, -0.504309)"
3,7,"(-0.320857, 0.732878, -0.599951)","(0.320857, 0.732878, -0.599951)"
4,8,"(0.399609, 0.710326, -0.579439)","(-0.399609, 0.710326, -0.579439)"


In [None]:
# Adjust hemiid to match number of regions left after filtering
hemiid = [0]*140 + [1]*140 

# Convert the centroid tuples back into a NumPy array for use in spin tests
# - Each row represents the (x, y, z) coordinates of a centroid for a parcel/brain region
cen_l = np.array([np.array([x, y, z]) for x, y, z in parcel_centroids_df.centroid_l])
cen_r = np.array([np.array([x, y, z]) for x, y, z in parcel_centroids_df.centroid_r])


# - Ensures that the array contains centroids for both hemispheres
cens = np.concatenate([cen_l, cen_r])

# Generate spin-based null distributions using the Hungarian method
# - `gen_spinsamples()` creates spatially constrained permutations of data
# - `centroids`: Provides the spatial locations of regions
# - `hemiid`: Ensures rotations respect hemispheric constraints (left vs. right)
# - `n_rotate = 5000`: Specifies the number of rotations (permutations) to generate
# - `method='hungarian'`: Uses the Hungarian algorithm to optimize rotations
spins = stats.gen_spinsamples(cens, hemiid, n_rotate=5000, method='hungarian', seed=27)

In [None]:
# Read and process MS MixedLM regionwise results, dropping the unnecessary column and setting 'region' as the index
age_effects = pd.read_csv(f'{file_dir}/Mixed_Effects_Models/regionwise_age_effects_MixedLM.csv') \
    .drop('Unnamed: 0', axis=1).set_index('region')

# Prepare the 'ua_df' dataframe by copying the 'cell_dataset', renaming 'D99' to 'region', and setting 'region' as the index
ua_df = cell_dataset.copy().reset_index().rename(columns={'D99': 'region'}).set_index('region')

# Add columns for 'age_effects', and 'similarity_strength' to 'ua_df' to align all data by region ID
ua_df['age_effects'] = age_effects['t_value']
ua_df['similarity_strength'] = similarity_strength


Unnamed: 0_level_0,ASC,EC,MG,OLG,OPC,VLMC,L2,L2_3,L2_3_4,L3_4,...,L6,LAMP5,PVALB,PV_CHC,RELN,SST,VIP,VIP_RELN,age_effects,similarity_strength
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,5278.325964,18.075779,251.224446,6761.221388,834.675989,995.809783,4756.866855,10951.553780,3682.748156,11863.853313,...,6438.177027,922.180149,3530.884979,258.190866,1846.827700,4631.538372,1311.753585,1075.126790,-9.800735,0.035900
3,4509.771664,10.696771,159.122329,6187.902606,771.877549,666.460473,6191.564020,13839.344739,3398.986054,11508.152829,...,5246.360434,955.569268,3259.121739,217.084703,1974.603038,4392.061549,1347.114991,1143.838558,2.002599,0.041058
5,5157.211016,70.473636,332.000864,2799.653176,1106.630399,1456.732092,7260.175052,13310.293612,4623.085869,9342.578483,...,5345.391204,889.073629,3052.636092,260.163668,2306.792973,4749.023322,1429.783063,1017.893435,-7.145680,0.036988
7,8757.939734,36.591316,1036.753944,8541.527797,1604.687016,562.953179,9114.813504,5922.869778,3845.868363,5162.402268,...,11779.197529,1001.645673,3201.952838,171.307791,1856.540106,4799.160377,1568.063003,1071.078255,-1.977216,0.040741
8,4827.494387,19.755874,414.384254,4664.344169,934.703504,586.569133,5319.850414,13491.905353,4481.262901,11016.892823,...,5294.721363,918.722250,3136.953012,242.079904,1782.869483,4718.587874,1382.367105,933.685452,-7.254415,0.041991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,6496.837928,23.255814,410.692989,2374.395627,1465.934796,1746.846876,8828.230708,9076.678254,4532.996059,8750.950650,...,6334.459522,1005.633423,4029.048216,210.803113,2656.283992,4896.819676,1351.383776,1016.624192,-3.157773,0.041251
194,8994.352674,63.572791,1126.972201,3704.166086,2441.970051,7263.693790,20062.893945,5711.671446,5911.730934,4031.559088,...,5515.599146,1470.994325,2922.738735,391.646701,3285.740847,4225.378035,1870.129609,1300.623322,-1.634197,0.047327
195,7421.327515,74.705530,388.217297,5662.626851,1143.174033,1149.276690,8054.070522,9419.945146,4898.631808,6838.490589,...,5675.010750,1036.453491,3596.689600,197.043498,2649.394040,5167.829508,1410.422964,1621.920141,1.322742,0.033406
218,6682.867089,3.466685,428.467938,6162.354814,1301.687554,247.796252,8645.256779,13164.881648,4697.309985,9851.024931,...,5084.132326,870.632783,3488.278689,168.062494,2115.191768,4752.021555,1144.429174,949.630162,-5.382336,0.042458


In [None]:
# Extract the first 140 rows (for LH only) of the spins array to create null maps
# spin tests are performed on each hemisphere symmetrically, so this is okay
null_maps = spins[:140,:]

ua_df = ua_df.sort_index()
# This reorders each measure for each permutation in null_maps
null_maps_tss = np.array([[ua_df['similarity_strength'].to_list()[idx] for idx in row] for row in null_maps])
null_maps_ae = np.array([[ua_df['age_effects'].to_list()[idx] for idx in row] for row in null_maps])

# Initialize a dictionary to store results for each cell type and its associated correlations
ua = {'cell_type':[], 'age_effects_r': [], 'age_effects_p':[],'similarity_strength_r':[], 'similarity_strength_p':[]}

# Loop through each cell type in the dataset
for cell in cell_dataset.columns:
    ua['cell_type'] += [cell]  # Store the current cell type
    
    # Compare the cell data to age_effects using Pearson correlation and null maps for significance testing
    p = compare_images(ua_df[cell].to_list(), ua_df['age_effects'].to_list(), metric='pearsonr', nulls=null_maps_ae)
    ua['age_effects_r'] += [p[0]]  # Store Pearson correlation coefficient
    ua['age_effects_p'] += [p[1]]  # Store p-value for the correlation
    
    # Compare the cell data to similarity_strength using Pearson correlation and null maps for significance testing
    r = compare_images(ua_df[cell].to_list(), ua_df['similarity_strength'].to_list(), metric='pearsonr', nulls=null_maps_tss)
    ua['similarity_strength_r'] += [r[0]]  # Store Pearson correlation coefficient
    ua['similarity_strength_p'] += [r[1]]  # Store p-value for the correlation

# Convert the results dictionary to a DataFrame
ua = pd.DataFrame(ua)

# Sort the DataFrame by age_effects (Pearson correlation coefficient with age_effects)
ua = ua.sort_values('age_effects_r')


ua

Unnamed: 0,cell_type,age_effects_r,age_effects_p,similarity_strength_r,similarity_strength_p
18,PV_CHC,-0.388342,0.006799,-0.075189,0.484303
19,RELN,-0.229154,0.096981,-0.160015,0.124575
4,OPC,-0.170974,0.233953,-0.044199,0.671866
0,ASC,-0.140397,0.340932,-0.054561,0.59768
21,VIP,-0.135943,0.359528,0.004051,0.967407
14,L5_6,-0.127617,0.393521,-0.238555,0.025795
20,SST,-0.114627,0.45131,-0.089415,0.402719
16,LAMP5,-0.093863,0.55009,-0.002376,0.982204
12,L4_5,-0.085055,0.591682,-0.277187,0.011398
2,MG,-0.073824,0.643871,-0.011596,0.916217


In [14]:
# Display the sorted DataFrame
ua.to_csv(f'{file_dir}/Univariate_Associations/univariate_results.csv')

### Supplementary -  Left and Right hemisphere MINDs are highly correlated.

In [None]:
# Supplementary -  left and right are significantly correlated (ll - left hemisphere regions, rr - right hemisphere regions)
AVG_LEFT = avg_MIND.loc[ll, ll]
AVG_RIGHT = avg_MIND.loc[rr, rr]

# Clean up the column and index names by removing the '_l' and '_r' suffixes, converting them to integers
AVG_LEFT.columns = AVG_LEFT.columns.str.replace('_l', '').astype(int)
AVG_LEFT.index = AVG_LEFT.index.str.replace('_l', '').astype(int)

AVG_RIGHT.columns = AVG_RIGHT.columns.str.replace('_r', '').astype(int)
AVG_RIGHT.index = AVG_RIGHT.index.str.replace('_r', '').astype(int)

# Sort rows and columns to align regions between hemispheres
AVG_LEFT = AVG_LEFT.sort_index(axis=0).sort_index(axis=1)
AVG_RIGHT = AVG_RIGHT.sort_index(axis=0).sort_index(axis=1)


# Set the diagonal values to NaN as they represent self-connections and are not needed for this analysis

# Extract the lower triangular values below the diagonal for both hemispheres
lower_tri_left = AVG_LEFT.where(np.tril(np.ones(AVG_LEFT.shape), k=-1).astype(bool)).stack()
lower_tri_right = AVG_RIGHT.where(np.tril(np.ones(AVG_RIGHT.shape), k=-1).astype(bool)).stack()

# Flatten the lower triangular values
flattened_left = lower_tri_left.values
flattened_right = lower_tri_right.values

print(pearsonr(flattened_left, flattened_right))



PearsonRResult(statistic=np.float64(0.9821666766041887), pvalue=np.float64(0.0))
