## Pipeline Script 


Inputs: Group CEST and nmap data output from pyGluCEST as well as demographic data from _________. 
Outputs: Compiled dataframes with GluCEST and nmap data. Trimmed based on number of people with sufficient data.

    Trimmed subject-wise dfs: e.g., cestmat (outpath + 'trimmed_cestmat' + dataset + atlas + '.csv')
    Long form dfs: e.g., long_df (outpath + 'longform_grpdf' + dataset + '_' + atlas + '.csv')
         Also have version with standard nmap values
    Mean dfs: e.g., grouped_df (outpath + 'means_' + dataset + '_' + atlas + '.csv')


### Import Packages

In [43]:
import os
import glob
import numpy as np
import pandas as pd
#import network_fcon as fc
import scipy as sp
from scipy.stats import pearsonr
from scipy.stats import linregress
import seaborn as sns
import matplotlib.pyplot as plt
import re
from nilearn.datasets import fetch_atlas_schaefer_2018

### Define paths and variables

In [44]:
# Set variables
dataset = 'longglucest_outputmeasures2'
atlas = 'atl-Cammoun2012_res-500'
nmaps = ["NMDA", "mGluR5", "GABA"]
maps = ["cest", "NMDA", "mGluR5", "GABA"]
normalize_cest = True

# Set paths
inpath = "/Users/pecsok/Desktop/ImageData/PMACS_remote/data/nmaps/" + dataset
outpath = "/Users/pecsok/Desktop/ImageData/PMACS_remote/data/nmaps/analyses/" + atlas
os.makedirs(os.path.join(outpath), exist_ok=True)

# Read in data
cestmat = pd.read_csv(inpath + "/all_subs_GluCEST_" + atlas + "_UNI.csv", sep=',')
NMDAmat = pd.read_csv(inpath + "/all_subs_NMDA_normalized_" + atlas + "_UNI.csv", sep=',')
mGluR5mat = pd.read_csv(inpath + "/all_subs_mGluR5_normalized_" + atlas + "_UNI.csv", sep=',')
GABAmat = pd.read_csv(inpath + "/all_subs_GABA_normalized_" + atlas + "_UNI.csv", sep=',')

# Set indices and correct column names
cestmat.set_index('Subject', inplace = True)
NMDAmat.set_index('Subject', inplace = True)
GABAmat.set_index('Subject', inplace = True)
mGluR5mat.set_index('Subject', inplace = True)
dfs = [cestmat, NMDAmat, mGluR5mat, GABAmat]

# Load in standardized nmap data for alternative approach.
receptor_df = pd.read_csv("/Users/pecsok/projects/Neuromaps/pecsok_pfns/neuromaps/results/receptor_data_scale1000_17.csv", sep=',')
receptor_df = pd.read_csv("/Users/pecsok/projects/Neuromaps/pecsok_pfns/neuromaps/results/receptor_data_cammoun2012_scale500.csv", sep=',')


In [89]:
#pd.set_option('display.max_rows', None)
#pd.set_option('display.max_columns', None)
#print(NMDAmat)

## Trim Data

In [45]:
# ID parcels with < 20 voxels* 
for i, col in enumerate(cestmat.columns):
    if 'NZcount' in col:
        # Set mean col to nan
        mean_col = cestmat.columns[i - 1]
        sigma_col = cestmat.columns[i + 1]
        cestmat[mean_col] = np.where(cestmat[col] < 20, np.nan, cestmat[mean_col])
        cestmat[sigma_col] = np.where(cestmat[col] < 20, np.nan, cestmat[sigma_col])
        cestmat[col] = np.where(cestmat[col] < 20, np.nan, cestmat[col])       
columns = cestmat.columns[cestmat.notnull().sum() > len(cestmat)*.75]
print(cestmat.shape)

# Trim all dfs based on column filter
cestmat= cestmat[columns]
NMDAmat= NMDAmat[columns]
GABAmat= GABAmat[columns]
mGluR5mat= mGluR5mat[columns]
print(cestmat.shape)

# ID subjects missing >65% of remaining GluCEST parcels
sparse_subjs = cestmat[cestmat.isna().sum(axis=1) > cestmat.shape[1] * 0.65].index

# Trim all dfs based on row filter
cestmat = cestmat.drop(index=sparse_subjs)
NMDAmat = NMDAmat.drop(index=sparse_subjs)
GABAmat = GABAmat.drop(index=sparse_subjs)
mGluR5mat = mGluR5mat.drop(index=sparse_subjs)
print(cestmat.shape)

#for df in dfs: Fix, put this back into a loop later
#    df = df[columns]
#    print(df.shape)

# Temporary: Remove mysterious zeros in nmap dataframes
dfs = [NMDAmat, mGluR5mat, GABAmat]
for i in range(len(dfs)):
    df = dfs[i]
    df.replace(0, np.nan, inplace=True)

# Save trimmed dfs
cestmat.to_csv(outpath + '/trimmed_cestmat' + dataset + atlas + '.csv', index=True)
NMDAmat.to_csv(outpath + '/trimmed_NMDAmat' + dataset + atlas + '.csv', index=True)
GABAmat.to_csv(outpath + '/trimmed_GABAmat' + dataset + atlas + '.csv', index=True)
mGluR5mat.to_csv(outpath + '/trimmed_mGluR5mat' + dataset + atlas + '.csv', index=True)

(182, 725)
(182, 167)
(176, 167)


In [46]:
cestmat

Unnamed: 0_level_0,Unnamed: 0,frontalpole_1R NZMean,frontalpole_1R NZcount,frontalpole_1R NZSigma,superiorfrontal_40R NZMean,superiorfrontal_40R NZcount,superiorfrontal_40R NZSigma,superiorfrontal_7R NZMean,superiorfrontal_7R NZcount,superiorfrontal_7R NZSigma,...,cuneus_3R NZMean,cuneus_3R NZcount,cuneus_3R NZSigma,cuneus_5R NZMean,cuneus_5R NZcount,cuneus_5R NZSigma,precuneus_15R NZMean,precuneus_15R NZcount,precuneus_15R NZSigma,group
Subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100522_12003,0,5.533474,32.0,2.188992,4.299698,49.0,2.442113,5.721321,45.0,2.229686,...,,,,,,,,,,TD/NC
100522_12371,1,5.459877,28.0,2.447069,5.136139,39.0,2.566857,6.545109,48.0,2.199931,...,,,,,,,,,,TD/NC
100522_12783,2,4.864008,31.0,1.728823,6.269497,51.0,2.130490,6.568427,62.0,1.743864,...,,,,,,,,,,TD/NC
102041_12037,3,7.210485,69.0,2.443378,7.443285,66.0,1.365854,7.582340,60.0,1.850772,...,7.258384,41.0,2.249775,8.277832,30.0,1.703806,,,,PRO/CHR
102041_12500,4,9.612453,71.0,2.522940,7.381907,63.0,1.860883,8.027369,56.0,1.492858,...,9.191903,42.0,1.569125,9.379838,34.0,1.157970,,,,PRO/CHR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96902_11903,177,8.020560,73.0,2.340148,9.015884,80.0,2.237241,8.667854,36.0,1.383506,...,11.154245,49.0,1.649121,12.150836,56.0,1.191361,10.632643,65.0,1.911237,TD/NC
96902_12440,178,5.263019,48.0,2.596715,6.919213,111.0,2.271600,8.576091,63.0,1.747012,...,,,,9.116145,28.0,2.211376,,,,TD/NC
96902_12788,179,6.758405,78.0,1.654970,6.837509,72.0,1.420031,6.936294,33.0,1.310823,...,,,,11.343265,40.0,1.563066,,,,TD/NC
98370_12558,180,7.516272,36.0,2.894000,5.205563,88.0,2.071508,5.207922,66.0,1.804130,...,5.980300,35.0,1.459202,7.319676,54.0,1.661533,7.258878,60.0,1.873145,TD/NC


## Normalize GluCEST

In [47]:
# Step 1: Select columns that contain 'NZMean'
if normalize_cest:
    nzmean_columns = [col for col in cestmat.columns if 'NZMean' in col]
    
    # Step 2: Calculate mean and std deviation for each subject (row-wise) across selected columns
    cestmat['Subject_Avg_NZMean'] = cestmat[nzmean_columns].mean(axis=1)
    cestmat['Subject_Std_NZMean'] = cestmat[nzmean_columns].std(axis=1)
    
    # Step 3: Calculate z-scores for all selected columns at once and store them in a new dataframe
    zscore_df = (cestmat[nzmean_columns].sub(cestmat['Subject_Avg_NZMean'], axis=0)
                 .div(cestmat['Subject_Std_NZMean'], axis=0))

    # Step 4: Concatenate the z-scores dataframe to the original cestmat dataframe
    cestmat = pd.concat([cestmat['group'], zscore_df], axis=1)
    cestmat.to_csv(outpath + '/grp_df_means_std_normalized_' + dataset + '_' + atlas + '.csv', index=False)


  cestmat['Subject_Avg_NZMean'] = cestmat[nzmean_columns].mean(axis=1)
  cestmat['Subject_Std_NZMean'] = cestmat[nzmean_columns].std(axis=1)


In [34]:
cestmat.head()

Unnamed: 0_level_0,group,frontalpole_1R NZMean,superiorfrontal_40R NZMean,superiorfrontal_7R NZMean,superiorfrontal_23R NZMean,superiorfrontal_10R NZMean,superiorfrontal_39R NZMean,superiorfrontal_29R NZMean,superiorfrontal_1R NZMean,superiorfrontal_30R NZMean,...,paracentral_11R NZMean,precuneus_7R NZMean,precuneus_14R NZMean,precuneus_11R NZMean,precuneus_8R NZMean,precuneus_18R NZMean,precuneus_13R NZMean,cuneus_3R NZMean,cuneus_5R NZMean,precuneus_15R NZMean
Subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100522_12003,TD/NC,-1.583104,-2.546943,-1.436356,-0.680306,-1.333544,-1.450471,-0.444078,-1.041088,0.015964,...,,,,,,,,,,
100522_12371,TD/NC,-2.03701,-2.303801,-1.142677,-0.048309,-1.07948,-1.16212,0.123848,-0.143814,0.039272,...,,,,,,,,,,
100522_12783,TD/NC,-2.373719,-1.22431,-0.979845,0.381669,-0.317736,-1.247852,-0.334146,0.411618,-0.65401,...,,,,,,,,,,
102041_12037,PRO/CHR,-0.710716,-0.513366,-0.395485,-0.290701,-0.324551,-0.98506,-1.177559,-0.420007,-0.529846,...,0.806698,,0.123622,0.239443,,-0.813841,-0.393435,-0.670111,0.194101,
102041_12500,PRO/CHR,2.116556,-0.105547,0.537472,0.030525,-0.086434,0.188051,-0.733269,0.197125,-0.710615,...,-0.988681,,0.280858,-0.397086,-0.430869,0.929336,-0.504479,1.697598,1.884821,


## Make classic grp_df

In [48]:
# First, add datatype to column names so can be distinguished later.
cestmat2=cestmat.copy()
NMDAmat2=NMDAmat.copy()
GABAmat2=GABAmat.copy()
mGluR5mat2=mGluR5mat.copy()
cestmat2.columns = [f"GluCEST_{col}" if "NZ" in col else col for col in cestmat2.columns]
NMDAmat2.columns = [f"NMDA_{col}" if "NZ" in col else col for col in NMDAmat2.columns]
GABAmat2.columns = [f"GABA_{col}" if "NZ" in col else col for col in GABAmat2.columns]
mGluR5mat2.columns = [f"mGluR5_{col}" if "NZ" in col else col for col in mGluR5mat2.columns]

# Align dataframes by "Subject" index and concatenate along columns
grp_df = cestmat2.join(NMDAmat2.filter(like='NZ'), how='left')
grp_df = grp_df.join(GABAmat2.filter(like='NZ'), how='left')
grp_df = grp_df.join(mGluR5mat2.filter(like='NZ'), how='left')

# Save grpd_df
grp_df.to_csv(outpath + '/grp_df_' + dataset + atlas + '.csv', index=True)
#print(cestmat)

In [96]:
#print(NMDAmat)

## Make longform group df

In [49]:
# Make longform group df
# Get list of parcel names
parcels = cestmat.filter(like="NZMean").columns.tolist()

# Melt cestmat to get Glu data in long format
cestlong = cestmat.reset_index().melt(id_vars='Subject', value_vars=parcels, 
                                      var_name='Parcel', value_name='GluCEST')

# Melt nmap data. Fix!! turn into loop later.
NMDAlong = NMDAmat.reset_index().melt(id_vars='Subject', value_vars=parcels, 
                                       var_name='Parcel', value_name='NMDA')
GABAlong = GABAmat.reset_index().melt(id_vars='Subject', value_vars=parcels, 
                                       var_name='Parcel', value_name='GABA')
mGluR5long = mGluR5mat.reset_index().melt(id_vars='Subject', value_vars=parcels, 
                                       var_name='Parcel', value_name='mGluR5')

# Merge the long-form dataframes based on Subject and Parcel
long_df = pd.merge(cestlong, NMDAlong,  on=['Subject', 'Parcel'])
long_df = pd.merge(long_df, GABAlong,  on=['Subject', 'Parcel'])
long_df = pd.merge(long_df, mGluR5long,  on=['Subject', 'Parcel'])

# Add diagnostic group
diag_df = cestmat['group']
long_df = pd.merge(long_df, diag_df, on='Subject')
long_df['hstatus'] = np.where(long_df['group'].isin(['TD/NC']), 'HC', 'PSY')
#print(long_df)

# Save longformdf
long_df.to_csv(outpath + '/longform_grpdf_' + dataset + atlas + '.csv', index=True)

### Make mean group dfs by diagnosis

In [50]:
# Make mean group dfs by diagnosis
grouped_subj = long_df.groupby(['Parcel', 'hstatus']).agg(
    CEST_avg=('GluCEST', 'mean'),
    NMDA=('NMDA', 'mean'),
    mGluR5=('mGluR5', 'mean'),
    GABA=('GABA', 'mean')
).reset_index()
print(grouped_subj)
# Save
grouped_subj.to_csv(outpath + '/means_subjectnmaps_' + dataset + '_' + atlas + '.csv', index=False)

                                Parcel hstatus  CEST_avg      NMDA    mGluR5  \
0                accumbensareaR NZMean      HC -0.237595  0.682994  0.451078   
1                accumbensareaR NZMean     PSY -0.269562  0.643160  0.535665   
2                    brainstemL NZMean      HC  0.092807  0.489323 -0.977112   
3                    brainstemL NZMean     PSY  0.136885  0.463831 -0.996127   
4    caudalanteriorcingulate_5R NZMean      HC  0.158525 -1.047351  0.278927   
..                                 ...     ...       ...       ...       ...   
105          superiorfrontal_8R NZMean     PSY -0.792765  0.242487  0.391164   
106          superiorfrontal_9R NZMean      HC -0.145166 -0.110998  0.021254   
107          superiorfrontal_9R NZMean     PSY -0.360681 -0.306964 -0.112934   
108             thalamusproperR NZMean      HC -0.571699  1.020895 -0.213864   
109             thalamusproperR NZMean     PSY -0.405138  0.900669 -0.290407   

         GABA  
0    0.321929  
1    0.

# REPEAT USING STANDARD NMAPS

In [51]:
"""
from netneurotools.datasets import fetch_cammoun2012

# Import and add parcel labels to standard receptor_df
if atlas == 'schaefer':
    schaefer = fetch_atlas_schaefer_2018(n_rois=1000, yeo_networks=17)
    labels = schaefer.labels
    labels = [label.decode('utf-8') for label in labels]
    receptor_df.index = labels
    receptor_df.index.name = 'Parcel'

if atlas == 'cammoun2012':
"""

receptor_df.rename(columns={'GABAa': 'GABA'}, inplace=True)
print(receptor_df)   

# Chop up receptor_df by map
NMDAmat = receptor_df[["Parcel","NMDA"]]
GABAmat = receptor_df[["Parcel","GABA"]]
mGluR5mat = receptor_df[["Parcel","mGluR5"]]

#print(NMDAmat)

                        Parcel      NMDA    mGluR5      GABA
0      lateralorbitofrontal_9R -0.020868  0.164621 -0.150228
1     lateralorbitofrontal_11R  0.366013 -0.085390  0.170483
2      lateralorbitofrontal_5R  1.171849  1.015446  1.317157
3      lateralorbitofrontal_6R  0.715088 -0.055909 -0.026400
4      lateralorbitofrontal_7R -0.125005  0.105069  0.344259
...                        ...       ...       ...       ...
1010                 pallidumL  1.184228 -2.200435 -3.248923
1011            accumbensareaL  0.632115  0.061453 -0.237722
1012              hippocampusL  0.855395 -1.009223 -1.039651
1013                 amygdalaL  0.109529 -0.782412 -0.971487
1014                brainstemL -0.798938 -4.706040 -5.108990

[1015 rows x 4 columns]


### Make classic grp_df

In [52]:
# Transpose receptor maps.
nmda = NMDAmat.T
gaba = GABAmat.T
mglur5 = mGluR5mat.T

print(nmda)
# Keep only parcels contained in cestmat
cestmat_regions = [col.replace(' NZMean', '') for col in cestmat.columns if ' NZMean' in col]
nmda_filtered = nmda[[col for col in nmda.columns if col in cestmat_regions]]
gaba_filtered = gaba[[col for col in gaba.columns if col in cestmat_regions]]
mglur5_filtered = mglur5[[col for col in mglur5.columns if col in cestmat_regions]]

# Filtered columns
nmda_filtered.columns = [f"NMDA_{col}" for col in nmda_filtered.columns]
gaba_filtered.columns = [f"GABA_{col}" for col in gaba_filtered.columns]
mglur5_filtered.columns = [f"mGluR5_{col}" for col in mglur5_filtered.columns]

# Repeat values for length of cestmat
nmda_repeated = pd.concat([nmda_filtered] * len(cestmat), ignore_index=True)
gaba_repeated = pd.concat([gaba_filtered] * len(cestmat), ignore_index=True)
mglur5_repeated = pd.concat([mglur5_filtered] * len(cestmat), ignore_index=True)

print(nmda_repeated)
# Concatenate
grp_df_std = pd.concat([cestmat2, nmda_repeated, gaba_repeated, mglur5_repeated], axis=1)

# Save grp_df_std
grp_df_std.to_csv(outpath + '/grp_df_std' + dataset + atlas + '.csv', index=True)
print(grp_df_std.head())

                           0                         1     \
Parcel  lateralorbitofrontal_9R  lateralorbitofrontal_11R   
NMDA                  -0.020868                  0.366013   

                           2                        3     \
Parcel  lateralorbitofrontal_5R  lateralorbitofrontal_6R   
NMDA                   1.171849                 0.715088   

                           4                         5     \
Parcel  lateralorbitofrontal_7R  lateralorbitofrontal_10R   
NMDA                  -0.125005                  0.039044   

                           6                         7     \
Parcel  lateralorbitofrontal_4R  lateralorbitofrontal_17R   
NMDA                   1.248462                   0.37993   

                           8                         9     ...       1005  \
Parcel  lateralorbitofrontal_8R  lateralorbitofrontal_15R  ...  insula_3L   
NMDA                  -0.529395                  0.130049  ...  -0.166495   

              1006             1007

### Make Longform df

In [59]:
# Make longform df using standardized nmaps values

# Get list of parcel names
parcels = cestmat.filter(like="NZMean").columns.tolist()

# Keep relevant columns from long_df and rename parcels
longdf_cest = long_df[["Subject","Parcel","GluCEST","group","hstatus"]]
longdf_cest = longdf_cest.replace(' NZMean', '', regex=True)

#longdf_cest["Parcel"] = longdf_cest["Parcel"].str.replace(' NZMean', '', regex=False)

# Convert receptor_df from wide to long format for merging
nmda_long = NMDAmat.reset_index().melt(id_vars='Parcel', var_name='Receptor1', value_name='NMDA_standard')
gaba_long = GABAmat.reset_index().melt(id_vars='Parcel', var_name='Receptor2', value_name='GABA_standard')
mglur5_long = mGluR5mat.reset_index().melt(id_vars='Parcel', var_name='Receptor3', value_name='mGluR5_standard')

long_df_std = pd.merge(longdf_cest, NMDAmat, on=["Parcel"])
long_df_std = pd.merge(long_df_std, GABAmat, on = ["Parcel"])
long_df_std = pd.merge(long_df_std, mGluR5mat, on = ["Parcel"])

# Save the longform dataframe to a CSV
long_df_std.to_csv(outpath + '/longform_grpdf_std_' + dataset + '_' + atlas + '.csv', index=False)


### Make mean df by diagnosis

In [61]:
# Make mean group dfs by diagnosis
# Standard nmap data
grouped_std = long_df_std.groupby(['Parcel', 'hstatus']).agg(
    CESTavg=('GluCEST', 'mean'),
    NMDA=('NMDA', 'mean'),
    mGluR5=('mGluR5', 'mean'),
    GABA=('GABA', 'mean')
).reset_index()
print(grouped_std)
grouped_std.to_csv(outpath + '/means_std_' + dataset + '_' + atlas + '.csv', index=False)

                         Parcel hstatus   CESTavg      NMDA    mGluR5  \
0                accumbensareaR      HC -0.237595  0.213972 -0.084778   
1                accumbensareaR     PSY -0.269562  0.213972 -0.084778   
2                    brainstemL      HC  0.092807 -0.798938 -4.706040   
3                    brainstemL     PSY  0.136885 -0.798938 -4.706040   
4    caudalanteriorcingulate_5R      HC  0.158525 -0.026330  1.229935   
..                          ...     ...       ...       ...       ...   
105          superiorfrontal_8R     PSY -0.792765 -0.331275 -0.129873   
106          superiorfrontal_9R      HC -0.145166 -0.293822 -0.660154   
107          superiorfrontal_9R     PSY -0.360681 -0.293822 -0.660154   
108             thalamusproperR      HC -0.571699  1.573297 -2.083970   
109             thalamusproperR     PSY -0.405138  1.573297 -2.083970   

         GABA  
0   -0.713525  
1   -0.713525  
2   -5.108990  
3   -5.108990  
4   -0.473436  
..        ...  
105 -0.1089

### Data Imputation

0

In [94]:
# Now, for the long_dfs, impute data based on average across participants for that parcel
# Subject-wise data
merged_df = pd.merge(long_df, grouped_subj[['Parcel', 'hstatus', 'CEST_avg', 'NMDA', 'GABA', 'mGluR5']], on=['Parcel', 'hstatus'], how='left')
#print(merged_df)
merged_df['GluCEST'] = merged_df['GluCEST'].fillna(merged_df['CEST_avg'])
merged_df['NMDA'] = merged_df['NMDA_x'].fillna(merged_df['NMDA_y'])
merged_df['GABA'] = merged_df['GABA_x'].fillna(merged_df['GABA_y'])
merged_df['mGluR5'] = merged_df['mGluR5_x'].fillna(merged_df['mGluR5_y'])
#print(merged_df)
imputed_df = merged_df.drop(columns=['CEST_avg', 'NMDA_x', 'NMDA_y', 'GABA_x', 'GABA_y','mGluR5_x', 'mGluR5_y'])

# Standard data
# First, rename parcels
long_df_std["Parcel"] = long_df_std["Parcel"].str.replace(' NZMean', '', regex=False)
grouped_subj_std = grouped_subj.copy()
grouped_subj_std["Parcel"] = grouped_subj_std["Parcel"].str.replace(' NZMean', '', regex=False)
#print(long_df_std)
merged_df_std = pd.merge(long_df_std, grouped_subj_std[['Parcel', 'hstatus', 'CEST_avg']], on=['Parcel', 'hstatus'], how='left')
merged_df_std['GluCEST'] = merged_df_std ['GluCEST'].fillna(merged_df_std ['CEST_avg'])
#print(merged_df_std)
imputed_df_std = merged_df_std.drop(columns=['CEST_avg'])

imputed_df.to_csv(outpath + '/imputed_long_df_' + dataset + '_' + atlas + '.csv', index=False)
imputed_df_std.to_csv(outpath + '/imputed_long_df_standardnmaps_' + dataset + '_' + atlas + '.csv', index=False)

### Normalize GluCEST values

In [12]:
# Make mean group dfs by diagnosis
# Standard nmap data
grouped_std = long_df_std.groupby(['Parcel', 'hstatus']).agg(
    GluCEST_avg=('CEST', 'mean'),
    NMDA_avg=('NMDA_standard', 'mean'),
    mGluR5_avg=('mGluR5_standard', 'mean'),
    GABA_avg=('GABA_standard', 'mean')
).reset_index()
#print(grouped_std)
grouped_std.to_csv(outpath + '/means_std_normalized_cest_' + dataset + '_' + atlas + '.csv', index=False)

In [13]:
# Step 1: Select columns that contain 'NZMean'
nzmean_columns = [col for col in cestmat.columns if 'NZMean' in col]

# Step 2: Calculate mean and std deviation for each subject (row-wise) across selected columns
cestmat['Subject_Avg_NZMean'] = cestmat[nzmean_columns].mean(axis=1)
cestmat['Subject_Std_NZMean'] = cestmat[nzmean_columns].std(axis=1)
print(cestmat)

# Step 3: Calculate z-scores for all selected columns at once and store them in a new dataframe
zscore_df = (cestmat[nzmean_columns].sub(cestmat['Subject_Avg_NZMean'], axis=0)
             .div(cestmat['Subject_Std_NZMean'], axis=0))

# Rename z-score columns
zscore_df.columns = [col + '_Zscore' for col in zscore_df.columns]
#print(zscore_df.size)
#print(zscore_df)
# Step 4: Concatenate the z-scores dataframe to the original cestmat dataframe
zcestmat = pd.concat([cestmat['group'], zscore_df], axis=1)

zcestmat.to_csv(outpath + '/grp_df_means_std_normalized_' + dataset + '_' + atlas + '.csv', index=False)
print(zcestmat)


  cestmat['Subject_Avg_NZMean'] = cestmat[nzmean_columns].mean(axis=1)
  cestmat['Subject_Std_NZMean'] = cestmat[nzmean_columns].std(axis=1)


              Unnamed: 0    group  17Networks_RH_VisCent_Striate_2 NZMean  \
Subject                                                                     
100522_12003           0    TD/NC                                     NaN   
100522_12371           1    TD/NC                                     NaN   
100522_12783           2    TD/NC                                     NaN   
102041_12037           3  PRO/CHR                                6.896060   
102041_12500           4  PRO/CHR                                9.101267   
...                  ...      ...                                     ...   
96902_11903          171    TD/NC                               10.768049   
96902_12440          172    TD/NC                                     NaN   
96902_12788          173    TD/NC                                     NaN   
98370_12558          174    TD/NC                                8.194758   
98370_12952          175    TD/NC                                7.623180   

In [None]:
cestmat2=cestmat.copy()
NMDAmat2=NMDAmat.copy()
GABAmat2=GABAmat.copy()
mGluR5mat2=mGluR5mat.copy()
cestmat2.columns = [f"GluCEST_{col}" if "NZ" in col else col for col in cestmat2.columns]
NMDAmat2.columns = [f"NMDA_{col}" if "NZ" in col else col for col in NMDAmat2.columns]
GABAmat2.columns = [f"GABA_{col}" if "NZ" in col else col for col in GABAmat2.columns]
mGluR5mat2.columns = [f"mGluR5_{col}" if "NZ" in col else col for col in mGluR5mat2.columns]

# Align dataframes by "Subject" index and concatenate along columns
grp_df = cestmat2.join(NMDAmat2.filter(like='NZ'), how='left')
grp_df = grp_df.join(GABAmat2.filter(like='NZ'), how='left')
grp_df = grp_df.join(mGluR5mat2.filter(like='NZ'), how='left')

# Save grp_df
grp_df.to_csv(outpath + '/grp_df_' + dataset + atlas + '.csv', index=True)

In [None]:
# Start with Loop through subjects. 
# Find path to 

I have parcel-wise glucest values and I want to normalize based on that subject's average glucest value.
Loop through rows of df
subj= the index of that row. 
Get average glucest value from path that includes subj in the path name

# Transpose receptor maps.
nmda = NMDAmat.T
gaba = GABAmat.T
mglur5 = mGluR5mat.T

# Keep only parcels contained in cestmat
cestmat_regions = [col.replace(' NZMean', '') for col in cestmat.columns if ' NZMean' in col]
nmda_filtered = nmda[[col for col in nmda.columns if col in cestmat_regions]]
gaba_filtered = gaba[[col for col in gaba.columns if col in cestmat_regions]]
mglur5_filtered = mglur5[[col for col in mglur5.columns if col in cestmat_regions]]

# Filtered columns
nmda_filtered.columns = [f"NMDA_{col}" for col in nmda_filtered.columns]
gaba_filtered.columns = [f"GABA_{col}" for col in gaba_filtered.columns]
mglur5_filtered.columns = [f"mGluR5_{col}" for col in mglur5_filtered.columns]

# Repeat values for length of cestmat
nmda_repeated = pd.concat([nmda_filtered] * len(cestmat), ignore_index=True)
gaba_repeated = pd.concat([gaba_filtered] * len(cestmat), ignore_index=True)
mglur5_repeated = pd.concat([mglur5_filtered] * len(cestmat), ignore_index=True)

# Concatenate
grp_df_std = pd.concat([cestmat2, nmda_repeated, gaba_repeated, mglur5_repeated], axis=1)

# Save grp_df_std
grp_df_std.to_csv(outpath + '/grp_df_std' + dataset + atlas + '.csv', index=True)
print(grp_df_std)