## Compile outputs

This script concatenates pyGluCEST output tsvs for the 7 network parcellation

### Import Packages

In [184]:
import os
import glob
import numpy as np
import pandas as pd
import scipy as sp
from scipy.stats import pearsonr
from scipy.stats import linregress
import seaborn as sns
import matplotlib.pyplot as plt
import re
import shutil

# Set paths and variables
cestpath = "/Users/pecsok/Desktop/ImageData/PMACS_remote/data/cest/"
batches = ['output_measures/UNI', 'analysis_batch2/output_measures/UNI', 'analysis_batch3/output_measures/UNI', 'march_analysis/output_measures/UNI'] #]
networks = ["Cont", "Default", "DorsAttn", "Vis", "SalVentAttn", "SomMot", "Limbic"] # ] 
fieldstrength = '3T'

# Import data
subjlist = pd.read_csv("~/Desktop/ImageData/PMACS_remote/data/subject_list_031124.csv", sep=',') 
subjlist = subjlist.dropna(subset=['BBLID'])
subjlist['BBLID'] = subjlist['BBLID'].astype(int)
subjlist['SCANID_CEST'] = subjlist['SCANID_CEST'].astype(int)


# Make group df
allcols = []
for network in networks:
    print(network)
    colnames = [network + "_Mean", network + "_Count", network + "_Sigma"]
    allcols = allcols + colnames

grp_df = pd.DataFrame(columns = ["BBLID"] + ["Session"] + ["fMRI Field Strength"] + allcols)



Cont
Default
DorsAttn
Vis
SalVentAttn
SomMot
Limbic


In [185]:

# Loop through all subjs
for index, row in subjlist.iterrows(): 
    bblid = str(row['BBLID'])
    session = str(row['SCANID_CEST']) 
    fieldstrength = str(row['fMRI Field Strength']) 
    cestid = bblid + "_" + session
    print(cestid)
    # Loop through batches to find subj's data
    if bblid != '88760':
        for batch in batches:
            #print(batch)
            # Add an extra loop to avoid looking for subj in multiple batches once we've found it:
            #found_subject = False
            subfolders = [f.path for f in os.scandir(cestpath + batch) if f.is_dir()]
            for subfolder in subfolders: 
                # If subject's data in is this folder, continue extracting data for all networks:
                if cestid in subfolder:
                    # Make boolean to avoid looking for subject in more than one batch folder
                    found_subject = True
                    # Add a row to group df 
                    ids = [bblid, session, fieldstrength]
                    grp_df.loc[len(grp_df)] = ids + [float('nan')] * (len(grp_df.columns) - 3)
                    for network in networks: 
                        #print(network)
                        cest_pattern = subfolder + "/" + cestid + "-2d-GluCEST-s100_7-" + network + "-measures_UNI.tsv"
                        cestfile = glob.glob(cest_pattern)
                        cestfile = cestfile[0]
                        #print(cestfile)
                        cestmat = pd.read_csv(cestfile, sep='\t')
                        means = []
                        counts = []
                        col_name = network + "_Mean" # for grp_df
                        ct_name = network + "_Count" # for grp_df
                        sigma_name = network + "_Sigma" # for grp_df
                        for index, value in enumerate(cestmat.loc[0,:]):
                            if "Mean" in cestmat.columns[index] and not np.isnan(value):
                                grp_df.loc[grp_df['Session'] == session, grp_df.columns == col_name] = cestmat.at[0, cestmat.columns[index]]
                            elif "count" in cestmat.columns[index] and not np.isnan(value):
                                grp_df.loc[grp_df['Session'] == session, grp_df.columns == ct_name] = cestmat.at[0, cestmat.columns[index]]
                            elif "Sigma" in cestmat.columns[index] and not np.isnan(value):
                                grp_df.loc[grp_df['Session'] == session, grp_df.columns == sigma_name] = cestmat.at[0, cestmat.columns[index]]
                    #break
            # If we just transferred the data, avoid looking through the other folders:
     #       if found_subject:
     #           break
print(grp_df)

20303_12234
90217_12230
88608_12108
21874_12094
94288_12092
94703_12082
93757_12015
96902_11903
20792_11887
20325_11852
88760_11846
125073_11814
115783_11788
118864_11783
116354_11774
111720_11766
91422_11753
20642_11261
20645_11260
20543_11259
112126_11157
106057_11122
89095_11100
97994_11114
19981_11106
96659_11096
91962_11090
92211_10981
85743_10944
93292_10938
94276_10927
125511_10906
90281_10902
20011_10888
121085_10851
19970_10827
105979_10791
93274_10765
132179_10760
80557_10738
83835_10706
120217_10722
93734_10694
119791_10705
132641_10692
106880_10699
102041_10675
121407_10688
90077_10962
20082_11821
20754_12200
20871_11917
20902_12766
20903_12159
92089_12089
95257_12041
98370_12558
127065_12752
127935_12101
20916_12762
21118_12784
22744_12533
93242_12442
94378_11833
96465_12069
100522_12003
112807_11890
114738_11706
117847_12740
126176_12780
126532_12582
128079_11934
128259_12837
128865_12325
130438_11999
131384_12198
132869_12109
135085_11812
135277_12808
117397_10686
87646_

In [187]:
grp_df = grp_df.drop_duplicates().reset_index(drop=True)
grp_df.to_csv('cestdata_032324.csv', index=True)
print(grp_df)


      BBLID Session fMRI Field Strength  Cont_Mean  Cont_Count  Cont_Sigma  \
0     20303   12234                  3T   8.419253      1575.0    1.667800   
1     90217   12230                  3T   8.000094      1608.0    1.767681   
2     88608   12108                  3T   8.441115      1517.0    1.790544   
3     21874   12094                  3T   7.696566      1231.0    1.932075   
4     94288   12092                  3T   8.445856      1447.0    2.301475   
5     94703   12082                  3T   7.728024      1496.0    1.887663   
6     93757   12015                  3T   8.059349      1020.0    1.880428   
7     96902   11903                  3T   8.740338      1253.0    1.878775   
8     20792   11887                  3T   6.749906      1079.0    2.215491   
9     20325   11852                  3T   7.907324      1476.0    1.926387   
10   125073   11814                  3T   7.453772       331.0    1.593587   
11   115783   11788                  3T   7.888304       977.0  

In [171]:
# Figuring out missing BBLIDs



grp_df = grp_df.drop_duplicates().reset_index(drop=True)
#print(grp_df)
pd.set_option('display.max_rows', None)

#print(grp_df)
#print(subjlist)
missing_bblids = []


grp_df2 = grp_df
grp_df2['BBLID'].astype(str)

var = 0
for bblid in str(subjlist['BBLID']):
    #print(bblid)
    
    contains = grp_df2['BBLID'].str.contains(bblid)
    #print(contains)
    if contains.any():
        var = var+1
    else:
        missing_bblids.append(bblid)
        
                 

#print(missing_bblids)


In [172]:
missing_bblids = []


grp_df2 = grp_df
grp_df2['BBLID'].astype(str)


for index, row in subjlist.iterrows(): 
    bblid = str(row['BBLID'])
 #   print(bblid)
    contains = grp_df2['BBLID'].str.contains(bblid)
    if contains.any():
        var = var+1
    else:
        missing_bblids.append(bblid)

print(missing_bblids)
    
    

['88760', '96659', '100522', '117847', '96659']


In [29]:
Missing BBLIDs:
    '88760', =
    '96659', 
    '100522', 
    '117847',
    '117397', 
    '87646', 
    '96659', 
    '81725', 
    '87225', 
    '90877', 
    '92155', 
    '139272'