## GEE Model

Inputs: longform output df from 1_atlas_pipeline2

### Import Packages

In [4]:
import os
import glob
import numpy as np
import pandas as pd
#import network_fcon as fc
import scipy as sp
import seaborn as sns
import matplotlib.pyplot as plt
import re
import statsmodels
import statsmodels.api as sm
from statsmodels.genmod.generalized_estimating_equations import GEE
from statsmodels.genmod.cov_struct import Exchangeable


In [None]:
### Define Paths and Variables

In [22]:
# Set variables
dataset = 'longglucest_outputmeasures2'
atlas = 'Schaefer2018_1000Parcels_17Networks'
nmaps = ["NMDA", "mGluR5", "GABA"]
maps = ["cest", "NMDA", "mGluR5", "GABA"]

# Set paths
inpath = "/Users/pecsok/Desktop/ImageData/PMACS_remote/data/nmaps/analyses/" + atlas
outpath = "/Users/pecsok/Desktop/ImageData/PMACS_remote/data/nmaps/analyses/" + atlas

# Read in data
long_df = pd.read_csv(inpath + '/imputed_long_df_standardnmaps_' + dataset + '_' + atlas + '.csv', sep=',')
long_df

Unnamed: 0,Subject,Parcel,GluCEST,group,hstatus,NMDA_standard,GABA_standard,mGluR5_standard
0,100522_12003,17Networks_RH_VisCent_Striate_2,9.532144,TD/NC,HC,0.946837,2.481264,-0.377612
1,100522_12371,17Networks_RH_VisCent_Striate_2,9.532144,TD/NC,HC,0.946837,2.481264,-0.377612
2,100522_12783,17Networks_RH_VisCent_Striate_2,9.532144,TD/NC,HC,0.946837,2.481264,-0.377612
3,102041_12037,17Networks_RH_VisCent_Striate_2,6.896060,PRO/CHR,PSY,0.946837,2.481264,-0.377612
4,102041_12500,17Networks_RH_VisCent_Striate_2,9.101267,PRO/CHR,PSY,0.946837,2.481264,-0.377612
...,...,...,...,...,...,...,...,...
11519,96902_11903,17Networks_RH_DefaultA_pCunPCC_12,8.674353,TD/NC,HC,1.035005,0.225411,0.970607
11520,96902_12440,17Networks_RH_DefaultA_pCunPCC_12,8.276765,TD/NC,HC,1.035005,0.225411,0.970607
11521,96902_12788,17Networks_RH_DefaultA_pCunPCC_12,8.276765,TD/NC,HC,1.035005,0.225411,0.970607
11522,98370_12558,17Networks_RH_DefaultA_pCunPCC_12,8.809414,TD/NC,HC,1.035005,0.225411,0.970607


In [23]:
# Address NaN issue
nan_counts = long_df.groupby('Parcel').agg(
    GluCEST_nans=('GluCEST', lambda x: x.isna().sum()),
    NMDA_nans=('NMDA_standard', lambda x: x.isna().sum()),
    mGluR5_nans=('mGluR5_standard', lambda x: x.isna().sum()),
    GABA_nans=('GABA_standard', lambda x: x.isna().sum())
).reset_index()
nan_counts
#nan_counts.to_csv("~/Desktop/GluCEST_nancounts.csv", index=False)




Unnamed: 0,Parcel,GluCEST_nans,NMDA_nans,mGluR5_nans,GABA_nans
0,17Networks_RH_ContA_Cingm_1,0,0,0,0
1,17Networks_RH_ContA_Cingm_2,0,0,0,0
2,17Networks_RH_ContB_PFCmp_1,0,0,0,0
3,17Networks_RH_ContB_PFCmp_2,0,0,0,0
4,17Networks_RH_ContB_PFCmp_3,0,0,0,0
...,...,...,...,...,...
62,17Networks_RH_VisPeri_ExStrSup_3,0,0,0,0
63,17Networks_RH_VisPeri_ExStrSup_4,0,0,0,0
64,17Networks_RH_VisPeri_ExStrSup_7,0,0,0,0
65,17Networks_RH_VisPeri_StriCal_3,0,0,0,0


In [24]:
# Run GEE 

# Define the independent variable (Glutamate) and dependent variable (Receptor)
X = long_df[['NMDA_standard']]  # Predictor variable (Glutamate)
y = long_df['GluCEST']       # Response variable (Receptor expression)

# Add a constant to the predictor (for the intercept term)
X = sm.add_constant(X)

# Define the groups (subject-level grouping)
groups = long_df['Subject']

# Initialize GEE with an Exchangeable covariance structure (since all parcels per subject are treated equally correlated)
model = GEE(y, X, groups=groups, cov_struct=Exchangeable())

# Fit the GEE model
results = model.fit()

# Output the results summary
print(results.summary())

                               GEE Regression Results                              
Dep. Variable:                     GluCEST   No. Observations:                11524
Model:                                 GEE   No. clusters:                      172
Method:                        Generalized   Min. cluster size:                  67
                      Estimating Equations   Max. cluster size:                  67
Family:                           Gaussian   Mean cluster size:                67.0
Dependence structure:         Exchangeable   Num. iterations:                     2
Date:                     Tue, 01 Oct 2024   Scale:                           1.948
Covariance type:                    robust   Time:                         17:16:38
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
const             7.6776      0.034    224.361      0.000       7.611       7.74