In [1]:
import sys
import os
from pathlib import Path

# For Jupyter or interactive use — use current working directory as script base
notebook_path = Path().resolve()

# Assume notebook is in a subfolder of the repo — go up one level
file_dir = notebook_path.parent

# Set working directory to the repo root
os.chdir(file_dir)
print("Working directory set to:", Path.cwd())

sys.path.append(str(file_dir))

import pandas as pd
import numpy as np

data_dir =file_dir / 'data'

Working directory set to: /Users/melinatsotras/Desktop/submission


### Load MINDs and create average MIND

In [2]:
demographics = pd.read_csv(f'{data_dir}/demographics_v2.csv')


In [None]:
MIND_dict = {}
subs = demographics.subject.to_list()
for subject in subs:
    temp = pd.read_csv(f'{file_dir}/MIND_Network/MIND_output/{subject}_MIND_sa_vol_mc_gc_sd_ct_ratio.csv').rename(columns={'Unnamed: 0':'region'}).set_index('region')

    MIND_dict[subject] = temp
    
regions = MIND_dict[subject].columns

### Functional Network Dict

In [4]:


# Derive unique region IDs by removing the hemisphere suffix (_l or _r)
region_ids = [region[:-2] for region in regions[:len(regions) // 2]]

# Load the D99-to-Yeo network lookup table
lookup_path = data_dir / "d99_to_yeo_network_labels.csv"
d99_to_yeo_df = pd.read_csv(lookup_path).drop(columns=["Unnamed: 0"])
d99_to_yeo_df = d99_to_yeo_df[["D99", "D99_abbr", "yeo_label"]]

# Filter to only region IDs present in your dataset
d99_to_yeo_df = d99_to_yeo_df[d99_to_yeo_df["D99"].astype(str).isin(region_ids)]


### Regressions

In [None]:
# Initialize a DataFrame to hold mean strength values for each region and subject
similarity_strength_df = pd.DataFrame(0, columns=MIND_dict.keys(), index=regions)

# Calculate mean strength per region, excluding self-connections, for each subject
for subject_id, mind_matrix in MIND_dict.items():
    # Convert subject's MIND matrix to NumPy array for processing
    mind_array = mind_matrix.to_numpy()

    # Exclude self-connections by setting diagonal elements to NaN
    np.fill_diagonal(mind_array, np.nan)

    # Align DataFrame index with the MIND matrix index
    similarity_strength_df = similarity_strength_df.loc[mind_matrix.index]

    # Compute the mean strength per region (ignoring NaNs) and assign to the subject's column
    similarity_strength_df[subject_id] = np.nanmean(mind_array, axis=1)

# Sort subjects by age according to the demographics DataFrame
sorted_subjects = demographics.sort_values('age', ascending=True)['subject'].tolist()

# Reorder columns in mean_strength_df to match sorted subjects
similarity_strength_df = similarity_strength_df[sorted_subjects]
similarity_strength_df.head()

### Create Dataframe to Used for lme in R

In [6]:
# Fetch demographic data for merging

n_subjects = len(similarity_strength_df.columns)  # Number of subjects

region_list = []
subject_list = []
hemi_list = []
value_list = []

# Iterate over unique regions and gather hemisphere-specific values
for region in region_ids:
    # Add region id twice per subject (left and right hemisphere)
    region_list.extend([int(region)] * 2 * n_subjects)
    
    # Repeat subjects twice (once for each hemisphere)
    subject_list.extend(list(similarity_strength_df.columns) * 2)
    
    # Hemisphere labels: left for first n_subjects, right for next n_subjects
    hemi_list.extend(['left'] * n_subjects + ['right'] * n_subjects)
    
    # Extract left and right hemisphere values for the current region
    left_values = similarity_strength_df.loc[f'{region}_l'].to_numpy().flatten()
    right_values = similarity_strength_df.loc[f'{region}_r'].to_numpy().flatten()
    
    # Combine left and right hemisphere values
    value_list.extend(np.concatenate([left_values, right_values]))

# Create a tidy DataFrame suitable for analysis or modeling
data = pd.DataFrame({
    'subject': subject_list,
    'value': value_list,
    'hemi': hemi_list,
    'region': region_list
})

# Merge with demographics (age, sex)
data = data.merge(demographics, on='subject', how='left')

# Merge with Yeo network labels (make sure d99_to_yeo_df is loaded and contains these columns)
data = data.merge(
    d99_to_yeo_df[['D99', 'yeo_label']].rename(columns={'D99': 'region'}),
    on='region',
    how='left'
)

# save the resulting dataset
file_name = f"{file_dir}/MIND_Network/similarity_strength_subject_data.csv"
#data.to_csv(file_name)

# Optional: demean values by Yeo label
#data['value_demeaned'] = data['value'] - data.groupby('yeo_label')['value'].transform('mean')
