## Pipeline Script 


Inputs: Group CEST and nmap data output from pyGluCEST as well as demographic data from _________. 
Outputs: Compiled dataframes with GluCEST and nmap data. Trimmed based on number of people with sufficient data.

    Trimmed subject-wise dfs: e.g., cestmat (outpath + 'trimmed_cestmat' + dataset + atlas + '.csv')
    Long form dfs: e.g., long_df (outpath + 'longform_grpdf' + dataset + '_' + atlas + '.csv')
         Also have version with standard nmap values
    Mean dfs: e.g., grouped_df (outpath + 'means_' + dataset + '_' + atlas + '.csv')


### Import Packages

In [2]:
import os
import glob
import numpy as np
import pandas as pd
#import network_fcon as fc
import scipy as sp
from scipy.stats import pearsonr
from scipy.stats import linregress
import seaborn as sns
import matplotlib.pyplot as plt
import re
from nilearn.datasets import fetch_atlas_schaefer_2018
import netneurotools 
from netneurotools.datasets import fetch_cammoun2012

### Define paths and variables

In [5]:
# Set variables
dataset = 'longglucest_outputmeasures2'
atlas = 'atl-Cammoun2012_res-500'
nmaps = ["NMDA", "mGluR5", "GABA"]
maps = ["cest", "NMDA", "mGluR5", "GABA"]
normalize_cest = True
scale = 'scale500'

# Set paths
og_path = "/Users/pecsok/Desktop/ImageData/PMACS_remote/data/nmaps/" + dataset
inpath = "/Users/pecsok/Desktop/ImageData/PMACS_remote/data/nmaps/" + dataset + "/" + atlas
outpath = "/Users/pecsok/Desktop/ImageData/PMACS_remote/data/nmaps/" + dataset

# Read in data
og_cestmat = pd.read_csv(og_path + "/all_subs_GluCEST_Schaefer2018_1000Parcels_17Networks_UNI.csv", sep=',')
cestmat = pd.read_csv(inpath + "/GluCEST-ROI-" + atlas + "-Measures_UNI.tsv", sep='\t')
NMDAmat = pd.read_csv(inpath + "/NMDA_normalized-ROI-" + atlas + "-Measures_UNI.tsv", sep='\t')
mGluR5mat = pd.read_csv(inpath + "/mGluR5_normalized-ROI-" + atlas + "-Measures_UNI.tsv", sep='\t')
GABAmat = pd.read_csv(inpath + "/GABA_normalized-ROI-" + atlas + "-Measures_UNI.tsv", sep='\t')

# Import atlas labels
cam = fetch_cammoun2012()
info = pd.read_csv(cam['info'], sep=',')
info = info[info['scale']== scale]
print(info)
# Set indices and correct column names
og_cestmat.set_index('Subject', inplace = True)

#print(cestmat)

        id                    label hemisphere  structure     scale  \
909      1   lateralorbitofrontal_9          R     cortex  scale500   
910      2  lateralorbitofrontal_11          R     cortex  scale500   
911      3   lateralorbitofrontal_5          R     cortex  scale500   
912      4   lateralorbitofrontal_6          R     cortex  scale500   
913      5   lateralorbitofrontal_7          R     cortex  scale500   
...    ...                      ...        ...        ...       ...   
1919  1011                 pallidum          L  subcortex  scale500   
1920  1012            accumbensarea          L  subcortex  scale500   
1921  1013              hippocampus          L  subcortex  scale500   
1922  1014                 amygdala          L  subcortex  scale500   
1923  1015                brainstem          L  subcortex  scale500   

          yeo_7                von_economo  
909      limbic  primary/secondary sensory  
910      limbic  primary/secondary sensory  
911      lim

In [6]:
measures = ["NZMean", "NZcount", "NZSigma"]
dfs = [cestmat, NMDAmat, GABAmat, mGluR5mat]

for df in dfs:
    # Fix subject name
    df['Subject'] = df['Subject'].apply(lambda x: x.split('/')[3])
    # Add subject group
    df['group']=None 
    for j in range(len(df)):
        subject = df.loc[j, 'Subject']
        if subject in og_cestmat.index:
            df.loc[j, 'group'] = og_cestmat.loc[subject, 'group']  # Assuming 'group' column exists in og_cestmat
    # Rename columns
    for measure in measures:
        for i in range(1, 1016):
            column_name = f"{measure}_{i}"
            #print(column_name)
            if column_name in df.columns:
                # Find the label from info_df where idcolumn equals i
                label = info.loc[info['id'] == i, 'label'].values[0]
                hemisphere = info.loc[info['id'] == i, 'hemisphere'].values[0]
                newname=f"{label}{hemisphere} {measure}"
                # Rename the column in cestmat_df using the label
                df.rename(columns={column_name: newname}, inplace=True)

print(NMDAmat)
print(dfs[1])

          Subject  lateralorbitofrontal_15R NZMean  \
0    100522_12003                              0.0   
1    100522_12371                              0.0   
2    100522_12783                              0.0   
3    102041_12037                              NaN   
4    102041_12500                              NaN   
..            ...                              ...   
177   96902_11903                              NaN   
178   96902_12440                              NaN   
179   96902_12788                              NaN   
180   98370_12558                              NaN   
181   98370_12952                              NaN   

     lateralorbitofrontal_15R NZcount  lateralorbitofrontal_15R NZSigma  \
0                                 0.0                               0.0   
1                                 0.0                               0.0   
2                                 0.0                               0.0   
3                                 NaN              

In [7]:
# Save em out
cestmat.to_csv(outpath + '/all_subs_GluCEST_' + atlas + "_UNI.csv", sep=',')
NMDAmat.to_csv(outpath + "/all_subs_NMDA_normalized_" + atlas + "_UNI.csv", sep=',')
GABAmat.to_csv(outpath + "/all_subs_GABA_normalized_" + atlas + "_UNI.csv", sep=',')
mGluR5mat.to_csv(outpath + "/all_subs_mGluR5_normalized_" + atlas + "_UNI.csv", sep=',')