## Pipeline Script 
This script 

### Import Packages

In [59]:
import os
import glob
import numpy as np
import pandas as pd
import scipy as sp
from scipy.stats import pearsonr
from scipy.stats import linregress
import seaborn as sns
import matplotlib.pyplot as plt
import re
import shutil

### Define paths and variables

In [65]:
# Set paths
fcpath = "/Users/pecsok/Desktop/ImageData/PMACS_remote/data/fmri/postprocessed/3T/xcp_d"
outpath = "~/Desktop/ImageData/PMACS_remote/analysis/postprocessing/"
clinpath = "~/Desktop/ImageData/PMACS_remote/data/clinical"
cestpath = "~/Desktop/ImageData/PMACS_remote/data/cest"

# Choose what to analyse
networks = ["Cont", "Default", "DorsAttn", "Vis", "SalVentAttn", "SomMot", "Limbic"] 
btwn_ntwks = ["Cont_btwn", "Default_btwn", "DorsAttn_btwn", "Vis_btwn", "SalVentAttn_btwn", "SomMot_btwn", "Limbic_btwn"] 
CESTnetworks = ["avgCEST_Cont", "ctCEST_Cont", "avgCEST_Default", "ctCEST_Default","avgCEST_SomMot", "ctCEST_SomMot", "avgCEST_DorsAttn", "ctCEST_DorsAttn", "avgCEST_Vis", "ctCEST_Vis","avgCEST_Limbic", "ctCEST_Limbic",  "avgCEST_SalVentAttn","ctCEST_SalVentAttn"]
CNB_scores = ["tap_tot","volt_cr","volt_rtcr"]
CNB_valids = ["tap_valid","volt_valid","volt_valid"] 
diag_scores = ["hstatus","age"]
demo_scores = ["sex", "race","ethnic","dateDiff"]
diag_details = ["axis1_desc1", "axis1_desc2", "axis1_desc3","axis1_desc4","axis1_desc5", "axis1_desc6"]

# Make dataframe based on metrics of interest
grp_df = pd.DataFrame(columns = ["BBLID"] + ["Session"] + ["SCANID_CEST"] + demo_scores + networks + btwn_ntwks + CESTnetworks + CNB_scores + diag_scores)
diag_df = pd.DataFrame(columns = ["BBLID"] + ["Session"] + ["SCANID_CEST"] + diag_scores + diag_details)
print(grp_df)

# Initialize empty lists and vars
bblids = []
sesids = []
gluseses = []

# Import group dataframes and set indices
subjlist = pd.read_csv("~/Desktop/ImageData/PMACS_remote/data/subject_list_031124.csv", sep=',') 
subjlist = subjlist.dropna(subset=['BBLID'])
subjlist['BBLID'] = subjlist['BBLID'].astype(int)
subjlist['SCANID_CEST'] = subjlist['SCANID_CEST'].astype(int)
cnbmat = pd.read_csv(clinpath + "/maggie_datarequest_fullcnb-2.csv", sep=',') 
diagmat = pd.read_csv(clinpath + "/maggie_datarequest_diagnosis-2.csv", sep=',')
demomat = pd.read_csv(clinpath + "/maggie_datarequest_demographics-2.csv", sep=',')
cestmat = pd.read_csv(cestpath + "/compiled_outputs/compiled_cortical_UNI.csv", sep=',')
# cestmat = pd.read_csv(clinpath + "/demographics.csv", sep='\t') add grp CEST map here
cnbmat.set_index('bblid', inplace = True)
diagmat.set_index('bblid', inplace = True)
demomat.set_index('bblid', inplace = True)
cestmat.set_index('Subject', inplace = True)

# Set up renaming dictionary for CEST df
schaefer_indices = pd.read_csv('~/Desktop/ImageData/PMACS_remote/github/glucest-rsfmri/Schaefer2018_100Parcels_17Networks_order_FSLMNI152_2mm.Centroid_RAS.csv', sep=',') # Load the CSV with the mapping of numbers to labels
schaefer_dict = dict(zip(schaefer_indices['ROI Label'], schaefer_indices['ROI Name']))

#print(subjlist)

Empty DataFrame
Columns: [BBLID, Session, SCANID_CEST, sex, race, ethnic, dateDiff, Cont, Default, DorsAttn, Vis, SalVentAttn, SomMot, Limbic, Cont_btwn, Default_btwn, DorsAttn_btwn, Vis_btwn, SalVentAttn_btwn, SomMot_btwn, Limbic_btwn, avgCEST_Cont, ctCEST_Cont, avgCEST_Default, ctCEST_Default, avgCEST_SomMot, ctCEST_SomMot, avgCEST_DorsAttn, ctCEST_DorsAttn, avgCEST_Vis, ctCEST_Vis, avgCEST_Limbic, ctCEST_Limbic, avgCEST_SalVentAttn, ctCEST_SalVentAttn, tap_tot, volt_cr, volt_rtcr, hstatus, age]
Index: []

[0 rows x 40 columns]


### Choose which modules to run

In [66]:
runfcon = True
runCNB = True
rundiag = True
rundemo = True
runcest = True
run_grpanalysis = True

## Stage 1: Create Group Data Frame
### Import data, loop through subjects, and establish file paths


FIX THIS ERROR:
/var/folders/ls/hy_z7hgd4_13km3h7j84vqh40000gp/T/ipykernel_77945/3898733492.py:72: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'PSY' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  grp_df.loc[grp_df['BBLID'].astype(str) == bblid, grp_df.columns == diag_score] = diagnosis

In [46]:
#print(cestmat)
cestid = 102041_10675
col_cestmat = 'Cont' + ' NZMean'
#cest_avg = cestmat.loc[cestmat.index.str.contains(str(cestid)), cestmat.columns == col_cestmat]

#network_fc = fcmat.loc[fcmat.index.str.contains(network), fcmat.columns[fcmat.columns.str.contains(network)]]

cest_avg = cestmat.loc[cestmat.index == str(cestid), cestmat.columns == col_cestmat]

print(cest_avg)


            ##################################################################################################
            if runcest and bblid != "88760": 
                #print("Processing " + bblid + "'s CEST data'")
                # Extract Glu Session ID
                if bblid in subjlist['BBLID'].astype(str).values:
                    gluses = subjlist.loc[subjlist['BBLID'].astype(str) == bblid, 'SCANID_CEST'].values[0].astype(str) #.
                    cestid = bblid + "_" + gluses
                    print(cestid)
                    # Import data
                    for network in networks:
                        col_name = "avgCEST_" + network # for grp_df
                        ct_name = "ctCEST_" + network # for grp_df
                        if str(cestid) in cestmat.index:
                            #print("yes")
                            col_cestmat = network + " NZMean"
                            ct_cestmat = network + " NZcount"
                            cest_avg = cestmat.loc[cestmat.index == str(cestid), cestmat.columns == col_cestmat]
                            #print(cest_avg)
                            cest_ct = cestmat.loc[cestmat.index.astype(str) == cestid, cestmat.columns == ct_cestmat]
                            #print(cest_ct)
                            grp_df.loc[grp_df['BBLID'].astype(str) == bblid, grp_df.columns == col_name] = cest_avg 
                            grp_df.loc[grp_df['BBLID'].astype(str) == bblid, grp_df.columns == ct_name] = cest_ct 





col_name = "avgCEST_" + network # for grp_df
ct_name = "ctCEST_" + network # for grp_df
if str(cestid) in cestmat.index:    
    print(yes)
    cest_avgs = cestmat[col_name]
    cest_avg = cest_avgs[str(cestid)]
    print(cest_avg)

    
    diagnosis = diagnoses[int(bblid)]
    grp_df.loc[grp_df['BBLID'].astype(str) == bblid, grp_df.columns == diag_score] = diagnosis 
    diag_df.loc[diag_df['BBLID'].astype(str) == bblid, diag_df.columns == diag_score] = diagnosis 


Empty DataFrame
Columns: [Cont NZMean]
Index: []


In [67]:
# Generates list of all file names
folder_names = [folder for folder in glob.glob(os.path.join(fcpath, "*")) if os.path.isdir(folder)]
#subje

# Loop through subjects
for subj_path in folder_names: # loop through all rows of the spreadsheet
    if "sub" in subj_path:
        # Extract bblid id:
        bblid = subj_path.split('-')[1]
        print("Processing subject " + bblid)
        # Extract session id: 
        items = os.listdir(subj_path)
        ses_folder = [item for item in items if item.startswith("ses")]
        ses = ses_folder[0].split('-')[1]
        ses_path = os.path.join(fcpath, subj_path, ses_folder[0]) # full path to session
        if bblid in subjlist['BBLID'].astype(str).values:
            gluses = subjlist.loc[subjlist['BBLID'].astype(str) == bblid, 'SCANID_CEST'].values[0].astype(str) #.
         # Add to running list of IDs grp analysis later:
        bblids.append(bblid)
        sesids.append(ses)
        gluseses.append(gluses)
        # Start new row in grp_df for this subject:
        ids = [bblid, ses, gluses]  # Values for the first two columns
        grp_df.loc[len(grp_df)] = ids + [float('nan')] * (len(grp_df.columns) - len(ids))
        diag_df.loc[len(diag_df)] = ids + [float('nan')] * (len(diag_df.columns) - len(ids))   

        # Run a subset of subjs or exclude specific subjs.
        if bblid != "20902" and bblid != "93242"  and bblid != "20754" and bblid != "127065":
            ##################################################################################################
            ## FC
            ##################################################################################################
            if runfcon:
                os.path.join(fcpath, "sub-" + bblid, "ses-" + ses)
                ses_path = os.path.join(fcpath, subj_path, ses_folder[0]) # full path to session
                fcmat_glob = f"{ses_path}/func/*Schaefer117_measure-pearsoncorrelation_conmat.tsv"
                if os.path.isfile(glob.glob(fcmat_glob)[0]):
                    fcmat = pd.read_csv(glob.glob(fcmat_glob)[0], sep='\t') # read in fcmat
                    fcmat.set_index('Node', inplace = True)
                    # Loop through the networks
                    for i in range(len(networks)):
                        network = networks[i]
                        btwn_ntwk = btwn_ntwks[i]
                        print("Running " + network + " fcon")
                        # Select rows and columns corresponding to the network
                        # Add language to select top half of cormat
                        mask = np.triu(np.ones_like(fcmat, dtype=bool))
                        fcmat = fcmat.mask(mask)
                        network_fc = fcmat.loc[fcmat.index.str.contains(network), fcmat.columns[fcmat.columns.str.contains(network)]]
                        
                        # Calculate avg network fc and add value to proper column in grp_df
                        grp_df.loc[len(grp_df)-1, network] = np.nanmean(network_fc.values)

                        # Select rows and columns for within- and between-network connectivity:
                        network_fc_btwn = fcmat.loc[fcmat.index.str.contains(network), ]
                        # Calculate avg network fc and add value to proper column in grp_df
                        grp_df.loc[len(grp_df)-1, btwn_ntwk] = np.nanmean(network_fc_btwn.values)
                                           
            ##################################################################################################
            ## CNB
            ##################################################################################################
            if runCNB:
                # Loop through the CNB scores
                for i in range(len(CNB_scores)):
                    CNB_score = CNB_scores[i]
                    CNB_valid = CNB_valids[i]
                    # Select score of interest & validity of that score
                    scores = cnbmat[CNB_score]
                    if int(bblid) in scores.index:
                        score = scores[int(bblid)]
                        valids = cnbmat[CNB_valid]
                        valid = str(valids[int(bblid)])
                        # If score was valid, add to grp_df
                        if 'V' in valid: 
                            grp_df.loc[grp_df['BBLID'] == bblid, grp_df.columns == CNB_score] = score 
            ##################################################################################################
            ## Diagnosis
            ##################################################################################################
            if rundiag:
                # Add hstatus
                for i in range(len(diag_scores)):
                    diag_score = diag_scores[i]
                    # Select score of interest and add to grp_df
                    diagnoses = diagmat[diag_score]
                    if int(bblid) in diagnoses.index:
                        diagnosis = diagnoses[int(bblid)]
                        grp_df.loc[grp_df['BBLID'].astype(str) == bblid, grp_df.columns == diag_score] = diagnosis 
                        diag_df.loc[diag_df['BBLID'].astype(str) == bblid, diag_df.columns == diag_score] = diagnosis 
                    else:
                        diagnosis = "Unknown"
                        grp_df.loc[grp_df['BBLID'].astype(str) == bblid, grp_df.columns == diag_score] = diagnosis 
                        diag_df.loc[diag_df['BBLID'].astype(str) == bblid, diag_df.columns == diag_score] = diagnosis
     
                # Loop through diag scores
                for i in range(len(diag_details)):
                    diag_detail = diag_details[i]
                    # Select score of interest and add to grp_df
                    comorbidities = diagmat[diag_detail]
                    if int(bblid) in comorbidities.index:
                        comorbidity = comorbidities[int(bblid)]
                       # print(comorbidity)
                        diag_df.loc[diag_df['BBLID'].astype(str) == bblid, diag_df.columns == diag_detail] = comorbidity 
                    else:
                        comorbidity = "Unknown"
                        diag_df.loc[diag_df['BBLID'].astype(str) == bblid, diag_df.columns == diag_detail] = comorbidity
            ##################################################################################################
            ## Demographics
            ##################################################################################################
            if rundemo:
                # Loop through the CNB scores
                for i in range(len(demo_scores)):
                    demo_score = demo_scores[i]
                    # Select metric of interest
                    scores = demomat[demo_score]
                    if int(bblid) in scores.index:
                        score = scores[int(bblid)]
                        # Add to grp_df
                        grp_df.loc[grp_df['BBLID'] == bblid, grp_df.columns == demo_score] = score 
            ##################################################################################################
            ## CEST
            ##################################################################################################
            if runcest and bblid != "88760": 
                #print("Processing " + bblid + "'s CEST data'")
                # Extract Glu Session ID
                if bblid in subjlist['BBLID'].astype(str).values:
                    gluses = subjlist.loc[subjlist['BBLID'].astype(str) == bblid, 'SCANID_CEST'].values[0].astype(str) #.
                    cestid = bblid + "_" + gluses
                    print(cestid)
                    # Import data
                    for network in networks:
                        col_name = "avgCEST_" + network # for grp_df
                        ct_name = "ctCEST_" + network # for grp_df
                        col_cestmat = network + " NZMean"
                        ct_cestmat = network + " NZcount"
                        if str(cestid) in cestmat.index:    
                            #print("yes")
                            cest_avgs = cestmat[col_cestmat]
                            cest_avg = cest_avgs[str(cestid)]
                            #print(cest_avg)
                            cest_cts = cestmat[ct_cestmat]
                            cest_ct = cest_avgs[str(cestid)]
                            grp_df.loc[grp_df['BBLID'].astype(str) == bblid, grp_df.columns == col_name] = cest_avg 
                            grp_df.loc[grp_df['BBLID'].astype(str) == bblid, grp_df.columns == ct_name] = cest_ct                            


                        """              
                # Add hstatus
                for i in range(len(diag_scores)):
                    diag_score = diag_scores[i]
                    # Select score of interest and add to grp_df
                    diagnoses = diagmat[diag_score]
                    if int(bblid) in diagnoses.index:
                        diagnosis = diagnoses[int(bblid)]
                        grp_df.loc[grp_df['BBLID'].astype(str) == bblid, grp_df.columns == diag_score] = diagnosis 
                        diag_df.loc[diag_df['BBLID'].astype(str) == bblid, diag_df.columns == diag_score] = diagnosis 
                    else:
                        diagnosis = "Unknown"
                        grp_df.loc[grp_df['BBLID'].astype(str) == bblid, grp_df.columns == diag_score] = diagnosis 
                        diag_df.loc[diag_df['BBLID'].astype(str) == bblid, diag_df.columns == diag_score] = diagnosis
                        
                                        for index, value in enumerate(cestmat.loc[0,:]):
                                         if "Mean" in cestmat.columns[index] and not np.isnan(value):
                                            # cestmat.at[0, cestmat.columns[index]] = float(value) * float(cestmat.iloc[0, index + 1])
                                            means.append(cestmat.at[0, cestmat.columns[index]])
                                            counts.append(cestmat.at[0, cestmat.columns[index + 1]])
                                    if sum(counts) == 0:
                                        grp_df.loc[grp_df['BBLID'] == bblid, grp_df.columns == col_name] = "NaN"
                                        grp_df.loc[grp_df['BBLID'] == bblid, grp_df.columns == col_name] = "NaN"
                                    else:
                                        #if not pd.isnull(grp_df.loc[len(grp_df)-1, network]):
                                        grp_df.loc[grp_df['BBLID'] == bblid, grp_df.columns == col_name] = sum(means) # / sum(counts)
                                        grp_df.loc[grp_df['BBLID'] == bblid, grp_df.columns == ct_name] = sum(counts) # / sum(counts)
                        
                        for cestpath in cestpaths:
                            cest_pattern = cestpath + cestid + "/" + cestid + "-2d-GluCEST-s100_7-" + network + "-measures_UNI.tsv"
                            #print(cest_pattern)
                            cestfile = glob.glob(cest_pattern)
                            for file in cestfile:
                                if os.path.isfile(file):
                                    #print(cestid  + cestpath + "  \n")
                                    cestmat = pd.read_csv(file, sep='\t') 
                                    means = [] 
                                    counts = []
                                    col_name = "avgCEST_" + network # for grp_df
                                    ct_name = "ctCEST_" + network # for grp_df
                                    for index, value in enumerate(cestmat.loc[0,:]):
                                         if "Mean" in cestmat.columns[index] and not np.isnan(value):
                                            # cestmat.at[0, cestmat.columns[index]] = float(value) * float(cestmat.iloc[0, index + 1])
                                            means.append(cestmat.at[0, cestmat.columns[index]])
                                            counts.append(cestmat.at[0, cestmat.columns[index + 1]])
                                    if sum(counts) == 0:
                                        grp_df.loc[grp_df['BBLID'] == bblid, grp_df.columns == col_name] = "NaN"
                                        grp_df.loc[grp_df['BBLID'] == bblid, grp_df.columns == col_name] = "NaN"
                                    else:
                                        #if not pd.isnull(grp_df.loc[len(grp_df)-1, network]):
                                        grp_df.loc[grp_df['BBLID'] == bblid, grp_df.columns == col_name] = sum(means) # / sum(counts)
                                        grp_df.loc[grp_df['BBLID'] == bblid, grp_df.columns == ct_name] = sum(counts) # / sum(counts)
"""
print(grp_df)
# sum_of_mean_columns now contains the sum of values in columns with "Mean" in the column name.

Processing subject 117847
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
117847_12740
Processing subject 20645


  grp_df.loc[grp_df['BBLID'].astype(str) == bblid, grp_df.columns == diag_score] = diagnosis
  diag_df.loc[diag_df['BBLID'].astype(str) == bblid, diag_df.columns == diag_score] = diagnosis
  diag_df.loc[diag_df['BBLID'].astype(str) == bblid, diag_df.columns == diag_detail] = comorbidity


Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
20645_11260
Processing subject 128865
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
128865_12325
Processing subject 120217


  diag_df.loc[diag_df['BBLID'].astype(str) == bblid, diag_df.columns == diag_detail] = comorbidity
  diag_df.loc[diag_df['BBLID'].astype(str) == bblid, diag_df.columns == diag_detail] = comorbidity
  diag_df.loc[diag_df['BBLID'].astype(str) == bblid, diag_df.columns == diag_detail] = comorbidity


Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
120217_10722
Processing subject 21118
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
21118_12784
Processing subject 125511
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
125511_10906
Processing subject 105176
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
Processing subject 132179
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
132179_10760
Processing subject 15305
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Run

  grp_df.loc[grp_df['BBLID'].astype(str) == bblid, grp_df.columns == diag_score] = diagnosis
  diag_df.loc[diag_df['BBLID'].astype(str) == bblid, diag_df.columns == diag_score] = diagnosis
  diag_df.loc[diag_df['BBLID'].astype(str) == bblid, diag_df.columns == diag_detail] = comorbidity
  diag_df.loc[diag_df['BBLID'].astype(str) == bblid, diag_df.columns == diag_detail] = comorbidity


Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
135277_12808
Processing subject 117397
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
117397_10686
Processing subject 93274
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
93274_10765
Processing subject 20902
Processing subject 128259
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
128259_12837
Processing subject 93242
Processing subject 106880
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
106880_10699
Processing subject 131384
Running Cont fcon
Running Default fcon
Run

  grp_df.loc[len(grp_df)-1, network] = np.nanmean(network_fc.values)


Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
19981_11106
Processing subject 93734
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
93734_10694
Processing subject 132641
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
132641_10692
Processing subject 88608
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
88608_12108
Processing subject 93757
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
93757_12015
Processing subject 97994
Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn 

  grp_df.loc[len(grp_df)-1, network] = np.nanmean(network_fc.values)


Running Cont fcon
Running Default fcon
Running DorsAttn fcon
Running Vis fcon
Running SalVentAttn fcon
Running SomMot fcon
Running Limbic fcon
20082_11821
     BBLID  Session SCANID_CEST  sex  race  ethnic  dateDiff      Cont  \
0   117847    12564       12740  1.0   1.0     2.0      18.0  0.083760   
1    20645    11274       11260  1.0   2.0     2.0       0.0  0.266631   
2   128865    12165       12325  1.0   1.0     2.0      49.0  0.080022   
3   120217    10702       10722  2.0   1.0     2.0       8.0  0.170671   
4    21118    12258       12784  2.0   1.0     2.0       0.0  0.155164   
..     ...      ...         ...  ...   ...     ...       ...       ...   
87  127935  motive2       12101  1.0   2.0     2.0      35.0  0.127090   
88   94703    11928       12082  2.0   2.0     2.0      13.0  0.124214   
89   91335     9346       12082  1.0   2.0     2.0     113.0  0.138340   
90   89095     9972       11100  2.0   1.0     2.0       0.0  0.169108   
91   20082  motive2       11821

## Stage 2: Group Comparisons and Regressions

In [63]:
print(grp_df)

     BBLID  Session SCANID_CEST  sex  race  ethnic  dateDiff      Cont  \
0   117847    12564       12740  1.0   1.0     2.0      18.0  0.083760   
1    20645    11274       11260  1.0   2.0     2.0       0.0  0.266631   
2   128865    12165       12325  1.0   1.0     2.0      49.0  0.080022   
3   120217    10702       10722  2.0   1.0     2.0       8.0  0.170671   
4    21118    12258       12784  2.0   1.0     2.0       0.0  0.155164   
..     ...      ...         ...  ...   ...     ...       ...       ...   
87  127935  motive2       12101  1.0   2.0     2.0      35.0  0.127090   
88   94703    11928       12082  2.0   2.0     2.0      13.0  0.124214   
89   91335     9346       12082  1.0   2.0     2.0     113.0  0.138340   
90   89095     9972       11100  2.0   1.0     2.0       0.0  0.169108   
91   20082  motive2       11821  2.0   1.0     2.0      18.0  0.033836   

     Default  DorsAttn  ...  ctCEST_Vis  avgCEST_Limbic  ctCEST_Limbic  \
0   0.181050  0.321492  ...         N

In [68]:
print(grp_df['avgCEST_SomMot'].isna().sum())

#print(subjlist.loc[subjlist['BBLID'] == 91962, subjlist.columns == 'SCANID_CEST'])
#print(grp_df.loc[grp_df['BBLID'] == 88608, grp_df.columns == 'avgCEST_SomMot'])
#print(grp_df.loc[grp_df['BBLID'] == 91422, grp_df.columns == 'avgCEST_SomMot'])
print(subjlist)

20
      BBLID                  PROTOCOL_CEST DOSCAN_CEST  SCANID_CEST  lagtime  \
0     20303           843329 - LongGluCEST      6/2/22        12234    -85.0   
1     90217           843329 - LongGluCEST     5/19/22        12230     90.0   
2     88608           843329 - LongGluCEST     3/22/22        12108     55.0   
3     21874           843329 - LongGluCEST      3/8/22        12094     42.0   
4     94288           843329 - LongGluCEST      3/4/22        12092    126.0   
..      ...                            ...         ...          ...      ...   
115   90877  825940 - GluCEST in Psychosis  08/17/2018        10907      NaN   
116   92155  825940 - GluCEST in Psychosis  11/09/2018        11022      NaN   
117   93274  825940 - GluCEST in Psychosis  04/19/2018        10765      NaN   
118  112126  825940 - GluCEST in Psychosis  02/06/2019        11157      NaN   
119  139272  825940 - GluCEST in Psychosis  03/29/2018        10739      NaN   

    DOSCAN_rs SCANID_rs             

In [15]:
print(grp_df)


     BBLID  Session SCANID_CEST  sex    age  race  ethnic  dateDiff      Cont  \
0   117847    12564       12740  NaN    NaN   NaN     NaN       NaN  0.141025   
1    20645    11274       11260  1.0  19.84   2.0     2.0       0.0  0.312466   
2   128865    12165       12325  NaN    NaN   NaN     NaN       NaN  0.137521   
3   120217    10702       10722  2.0  24.77   1.0     2.0       8.0  0.222504   
4    21118    12258       12784  NaN    NaN   NaN     NaN       NaN  0.207966   
..     ...      ...         ...  ...    ...   ...     ...       ...       ...   
87  127935  motive2       12101  NaN    NaN   NaN     NaN       NaN  0.181647   
88   94703    11928       12082  2.0  21.94   2.0     2.0      13.0  0.178950   
89   91335     9346       12082  1.0  19.77   2.0     2.0     113.0  0.192194   
90   89095     9972       11100  2.0  24.62   1.0     2.0       0.0  0.221038   
91   20082  motive2       11821  2.0  20.28   1.0     2.0      18.0  0.094221   

     Default  ...  avgCEST_

In [64]:
grp_df.to_csv('grp_df_3T.csv', index=True)
diag_df.to_csv('diag_df_3T.csv', index=True)

In [13]:
#grp_df = pd.read_csv('012624_grp_df_3T.csv')
pd.set_option('display.max_rows', None)


### Data Trimming

In [14]:
print(grp_df)

     BBLID  Session  sex  age  race  ethnic  dateDiff  Cont  Default  \
0   117847    12564  NaN  NaN   NaN     NaN       NaN   NaN      NaN   
1    20645    11274  NaN  NaN   NaN     NaN       NaN   NaN      NaN   
2   128865    12165  NaN  NaN   NaN     NaN       NaN   NaN      NaN   
3   120217    10702  NaN  NaN   NaN     NaN       NaN   NaN      NaN   
4    21118    12258  NaN  NaN   NaN     NaN       NaN   NaN      NaN   
5   125511    10565  NaN  NaN   NaN     NaN       NaN   NaN      NaN   
6   105176     9362  NaN  NaN   NaN     NaN       NaN   NaN      NaN   
7   132179    10780  NaN  NaN   NaN     NaN       NaN   NaN      NaN   
8    15305     8635  NaN  NaN   NaN     NaN       NaN   NaN      NaN   
9    17621     9270  NaN  NaN   NaN     NaN       NaN   NaN      NaN   
10   98831    10084  NaN  NaN   NaN     NaN       NaN   NaN      NaN   
11   20916    12577  NaN  NaN   NaN     NaN       NaN   NaN      NaN   
12  116019    10078  NaN  NaN   NaN     NaN       NaN   NaN     

In [86]:
#CLUNKIER COMPREHENSIVE VERSION:




In [87]:
#BBS

In [34]:
!jupyter nbconvert --to html motor_pipeline.ipynb --output motor_pipeline_3T.html

[NbConvertApp] Converting notebook motor_pipeline.ipynb to html
[NbConvertApp] Writing 1146801 bytes to motor_pipeline_3T.html
