## Pipeline Script 
This script 

### Import Packages

In [167]:
import os
import glob
import numpy as np
import pandas as pd
import network_fcon as fc
from scipy.stats import pearsonr

### Define paths and variables

In [168]:
# Set paths
inpath = "/Users/pecsok/Desktop/Maggie/Graduate School/Project1_GluCEST_rsfMRI/xcp_output/xcp_d_test/" 
outpath = "~/Desktop/ImageData/PMACS_remote/analysis/postprocessing"
clinpath = "~/Desktop/ImageData/PMACS_remote/data/clinical"
# Choose what to analyse
networks = ["SalVentAttn", "VisCent"] 
CNB_scores = ["tap_tot", "er40_cr"]
CNB_valid = ["tap_valid", "er40_valid"]
clinical_scores = ["BDI", "PROMIS-Anx"]
# Make dataframe based on metrics of interest
columns = ["BBLID"] + ["Session"] + networks
fcon_df = pd.DataFrame(columns=columns)
clin_df = pd.DataFrame(columns=["BBLID", "Session"] + CNB_scores + clinical_scores)
grp_df = pd.DataFrame(columns=columns + CNB_scores + clinical_scores)
print(grp_df)
# Initialize empty lists and vars
bblids = []
sesids = []

# Import group dataframes and set indices
cnbmat = pd.read_csv(clinpath + "/cnb.csv", sep=',') 
diagmat = pd.read_csv(clinpath + "/diagnosis.csv", sep=',')
demomat = pd.read_csv(clinpath + "/demographics.csv", sep=',')
cnbmat.set_index('bblid', inplace = True)
diagmat.set_index('bblid', inplace = True)
demomat.set_index('bblid', inplace = True)

#### Troubleshooting: Initialize empty matrix for fcmats # FIX if desired:
num_matrices = 100  # You can change this to the desired number
matrix_shape = (700, 701)
index = pd.MultiIndex.from_product([range(num_matrices), range(matrix_shape[0]), range(matrix_shape[1])], names=['Matrix', 'Row', 'Column'])
fcmats = pd.DataFrame(data, index=index, columns=['Value'])

Empty DataFrame
Columns: [BBLID, Session, SalVentAttn, VisCent, tap_tot, er40_cr, BDI, PROMIS-Anx]
Index: []


### Choose which modules to run

In [169]:
runfcon = False
runALFF = True
runCNB = True
runCEST = True
runClinical = True

### Import data, loop through subjects, and establish file paths

In [173]:
# Generate list of subjects glob.glob(inpath + 'sub-*) # Generates list of all file names
folder_names = [folder for folder in glob.glob(os.path.join(inpath, "*")) if os.path.isdir(folder)]

for subj_path in folder_names:
    # Extract bblid id:
    bblid = subj_path.split('-')[1]
    print("Processing subject " + bblid)
    # Extract session id: 
    items = os.listdir(subj_path)
    ses_folder = [item for item in items if item.startswith("ses")]
    ses = ses_folder[0].split('-')[1]
    ses_path = os.path.join(inpath, subj_path, ses_folder[0]) # full path to session
    # Add to running list of IDs grp analysis later:
    bblids.append(bblid)
    sesids.append(ses)
    # Start new row in grp_df for this subject:
    ids = [bblid, ses]  # Values for the first two columns
    grp_df.loc[len(grp_df)] = ids + [float('nan')] * (len(grp_df.columns) - len(ids))
    
    ##################################################################################################
    ## FC
    ##################################################################################################
    if runfcon:
    #    fc.subj_fcon(ses_path, bblid, ses, grp_df, networks) Troubleshoot function
        fcmat_glob = f"{ses_path}/func/*Schaefer717_measure-pearsoncorrelation_conmat.tsv"
        fcmat = pd.read_csv(glob.glob(fcmat_glob)[0], sep='\t') # read in fcmat
        fcmat.set_index('Node', inplace = True)
        # Loop through the networks
        for network in networks:
            # Select rows and columns corresponding to the network
            network_fc = fcmat.loc[fcmat.index.str.contains(network), fcmat.columns[fcmat.columns.str.contains(network)]]
            # Calculate avg network fc and add value to proper column in grp_df
            grp_df.loc[len(grp_df)-1, network] = network_fc.values.mean()


    ##################################################################################################
    ## CNB
    ##################################################################################################
    if runCNB:
        # Loop through the CNB scores
        for i in range(len(CNB_scores)):
            CNB_score = CNB_scores[i]
            print(CNB_score)
            # Select score of interest
            score = cnbmat.at[float(bblid), CNB_score[i]]
            print(score)
            #score2 = cnbmat.loc[cnbmat.index == float(bblid), cnbmat.columns == CNB_score]
            #print(score2)
            valid = cnbmat.loc[cnbmat.index == float(bblid), cnbmat.columns == CNB_valid[i]]
            print(valid)
            # If score was valid, add to grp_df
            if valid.values[0][0] == 'V': # FIX some items have "CV" or "V1". convert this to a str.contains statement
                #print("yes")
                grp_df.loc[grp_df['BBLID'] == bblid, grp_df.columns == CNB_score] = float(score)
                
                
                
    ##################################################################################################
    ## Clinical
    ##################################################################################################

    
    ##################################################################################################
    ## Clinical
    ##################################################################################################


Processing subject 18199
tap_tot


KeyError: 't'

### Network FCON 
Call fcon function and calculate network-level connectivity measures for subject and add  subject data to group file for future group analysis

In [None]:
        path_to_fcmat = f"{subj_path}/func/*Schaefer717_measure-pearsoncorrelation_conmat.tsv"
              fcmat = pd.read_csv(path_to_fcmat, sep='\t')
              # Add to 3D matrix for group analysis later
              fcmats = pd.concat([fcmats, fcmat], axis=2)
              # Calculate avg within-network connectivity for the current subject
              avg_fcon = subj_fcon(fcmat, networks_of_interest) 
              temp_df = pd.DataFrame(bblid, columns=["BBLID"])
              temp_df["Session"] = ses
              temp_df[networks_of_interest] = avg_fcon
              # Append the subject's data as a new row to the DataFrame
              grp_df = grp_df.append(temp_df, ignore_index=True)
                    
          
          
          if runalff:
              path_to_fcmat = f"{subj_path}/func/*Schaefer717_measure-pearsoncorrelation_conmat.tsv"
              fcmat = pd.read_csv(path_to_fcmat, sep='\t')
              # Add to 3D matrix for group analysis later
              fcmats = pd.concat([fcmats, fcmat], axis=2)
              # Calculate avg within-network connectivity for the current subject
              avg_fcon = subj_fcon(fcmat, networks_of_interest) 
              temp_df = pd.DataFrame(bblid, columns=["BBLID"])
              temp_df["Session"] = ses
              temp_df[networks_of_interest] = avg_fcon
              # Append the subject's data as a new row to the DataFrame
              grp_df = grp_df.append(temp_df, ignore_index=True)      
          
          if runcnb:
              
              