In [1]:
import sys
import os
from pathlib import Path

# For Jupyter or interactive use — use current working directory as script base
notebook_path = Path().resolve()

# Assume notebook is in a subfolder of the repo — go up one level
file_dir = notebook_path.parent

# Set working directory to the repo root
os.chdir(file_dir)
print("Working directory set to:", Path.cwd())

sys.path.append(str(file_dir))

import pandas as pd
import numpy as np

data_dir =file_dir / 'data'

Working directory set to: /Users/melinatsotras/Desktop/submission


### Load MINDs and create average MIND

In [2]:
demographics = pd.read_csv(f'{data_dir}/demographics_v2.csv')


In [3]:
MIND_dict = {}
subs = demographics.subject.to_list()
for subject in subs:
    print(subject)
    temp = pd.read_csv(f'{file_dir}/MIND_Network/MIND_output/{subject}_MIND_sa_vol_mc_gc_sd_ct_ratio.csv').rename(columns={'Unnamed: 0':'region'}).set_index('region')

    MIND_dict[subject] = temp
    
regions = MIND_dict[subject].columns

33002
34884
35241
35253
35580
36657
37693
39437
39599
39834
40706
41083
41372
41645
41759
44444
49134
47979
48184
48233
49041
49050
49078
49112
49196
49225
42019
41807
41716
41854
41651
41547
41519
41463
41458
40033
39066
37972
36824
36464
36291
34847
34110
33111
32479
32287
37169
36831
36374
36422
36574
36352
35949
35671
35545
35306
34839
33825
33734
33278
33199
33882
32924
29949


### Functional Network Dict

In [4]:


# Derive unique region IDs by removing the hemisphere suffix (_l or _r)
region_ids = [region[:-2] for region in regions[:len(regions) // 2]]

# Load the D99-to-Yeo network lookup table
lookup_path = data_dir / "d99_to_yeo_network_labels.csv"
d99_to_yeo_df = pd.read_csv(lookup_path).drop(columns=["Unnamed: 0"])
d99_to_yeo_df = d99_to_yeo_df[["D99", "D99_abbr", "yeo_label"]]

# Filter to only region IDs present in your dataset
d99_to_yeo_df = d99_to_yeo_df[d99_to_yeo_df["D99"].astype(str).isin(region_ids)]


### Regressions

In [5]:
# Initialize a DataFrame to hold mean strength values for each region and subject
similarity_strength_df = pd.DataFrame(0, columns=MIND_dict.keys(), index=regions)

# Calculate mean strength per region, excluding self-connections, for each subject
for subject_id, mind_matrix in MIND_dict.items():
    # Convert subject's MIND matrix to NumPy array for processing
    mind_array = mind_matrix.to_numpy()

    # Exclude self-connections by setting diagonal elements to NaN
    np.fill_diagonal(mind_array, np.nan)

    # Align DataFrame index with the MIND matrix index
    similarity_strength_df = similarity_strength_df.loc[mind_matrix.index]

    # Compute the mean strength per region (ignoring NaNs) and assign to the subject's column
    similarity_strength_df[subject_id] = np.nanmean(mind_array, axis=1)

# Sort subjects by age according to the demographics DataFrame
sorted_subjects = demographics.sort_values('age', ascending=True)['subject'].tolist()

# Reorder columns in mean_strength_df to match sorted subjects
similarity_strength_df = similarity_strength_df[sorted_subjects]
similarity_strength_df.head()

Unnamed: 0_level_0,49134,49112,48233,49050,49041,49225,47979,49196,48184,49078,...,34884,33825,33734,35253,33278,33199,33002,33882,32924,29949
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
98_l,0.037579,0.045268,0.04354,0.044835,0.045403,0.042909,0.047107,0.044726,0.046429,0.041821,...,0.035206,0.038683,0.037095,0.03436,0.038332,0.037964,0.03577,0.034819,0.036845,0.037424
131_l,0.043345,0.044077,0.043966,0.045701,0.042725,0.04584,0.046356,0.046835,0.045068,0.044611,...,0.042384,0.044377,0.045502,0.046089,0.043932,0.04304,0.041062,0.041368,0.042839,0.04252
151_l,0.040349,0.041796,0.041397,0.045166,0.041955,0.040565,0.044,0.048063,0.043198,0.04017,...,0.040865,0.042362,0.040478,0.041041,0.042135,0.044056,0.039866,0.042617,0.042806,0.040674
20_l,0.044335,0.042298,0.046145,0.045524,0.045982,0.044193,0.047768,0.047463,0.048421,0.042623,...,0.045119,0.048712,0.046159,0.044038,0.044999,0.043182,0.043712,0.044607,0.04327,0.047394
129_l,0.044357,0.042913,0.044928,0.041707,0.042515,0.04209,0.043721,0.041919,0.043459,0.04073,...,0.03932,0.042705,0.03957,0.039322,0.038778,0.041162,0.03985,0.039004,0.040045,0.040081


### Create Dataframe to Used for lme in R

In [6]:
# Fetch demographic data for merging

n_subjects = len(similarity_strength_df.columns)  # Number of subjects

region_list = []
subject_list = []
hemi_list = []
value_list = []

# Iterate over unique regions and gather hemisphere-specific values
for region in region_ids:
    # Add region id twice per subject (left and right hemisphere)
    region_list.extend([int(region)] * 2 * n_subjects)
    
    # Repeat subjects twice (once for each hemisphere)
    subject_list.extend(list(similarity_strength_df.columns) * 2)
    
    # Hemisphere labels: left for first n_subjects, right for next n_subjects
    hemi_list.extend(['left'] * n_subjects + ['right'] * n_subjects)
    
    # Extract left and right hemisphere values for the current region
    left_values = similarity_strength_df.loc[f'{region}_l'].to_numpy().flatten()
    right_values = similarity_strength_df.loc[f'{region}_r'].to_numpy().flatten()
    
    # Combine left and right hemisphere values
    value_list.extend(np.concatenate([left_values, right_values]))

# Create a tidy DataFrame suitable for analysis or modeling
data = pd.DataFrame({
    'subject': subject_list,
    'value': value_list,
    'hemi': hemi_list,
    'region': region_list
})

# Merge with demographics (age, sex)
data = data.merge(demographics, on='subject', how='left')

# Merge with Yeo network labels (make sure d99_to_yeo_df is loaded and contains these columns)
data = data.merge(
    d99_to_yeo_df[['D99', 'yeo_label']].rename(columns={'D99': 'region'}),
    on='region',
    how='left'
)

# save the resulting dataset
file_name = f"{file_dir}/MIND_Network/similarity_strength_subject_data.csv"
#data.to_csv(file_name)

# Optional: demean values by Yeo label
#data['value_demeaned'] = data['value'] - data.groupby('yeo_label')['value'].transform('mean')
