In [1]:
import os
import fnmatch
from multiprocessing import Pool, cpu_count
import numpy as np
import pandas as pd

In [2]:
repo = os.getcwd()
subjDirs = []
for subjDir in os.listdir(repo):
    if os.path.isdir(subjDir) & (fnmatch.fnmatch(subjDir, 'GroupID')):
        subjDirs.append(subjDir)

In [3]:
def zeropad(n,zeros=3):
    "Pad number n with zeros. Example: zeropad(7,3) == '007'"
    nstr = str(n)
    while len(nstr) < zeros:
        nstr = "0" + nstr
    return nstr

In [4]:
## merging script
def processInput(subj):
    
    ## set the consensus matrix (thresholded at 15%)
    # initial module partition
    mod_init = np.genfromtxt(subj + '/modOut/clink_thr150.clu', delimiter=None)
    df_mod_init = pd.DataFrame(mod_init, columns=['node_ID', 'module', 'flow'])

    # consensus matrix
    consMat = np.zeros((333,333))
    for roi in range(1,334):
        roi_mask = df_mod_init.isin([roi])
        row_roi = list(roi_mask['node_ID'][roi_mask['node_ID'] == True].index)
        for roi2 in range(1,334):
            roi2_mask = df_mod_init.isin([roi2])
            row_roi2 = list(roi2_mask['node_ID'][roi2_mask['node_ID'] == True].index)
            if (df_mod_init.loc[row_roi,'module'].values == df_mod_init.loc[row_roi2,'module'].values):
                consMat[roi-1,roi2-1] = 1
    
    
    ## iteratively update the consensus matrix from threshold of 0.15 to 0.01 by 0.001
    ## takes less than 12 hrs/subj
    for thr in reversed(range(10,150)):

        # load the modularity data (node ID, module, flow) of all tresholded connectivity matrix
        linkFiles = 'clink_thr' + str(zeropad(thr)) + '.clu'
        mod = np.genfromtxt(subj + '/modOut/' + linkFiles, delimiter=None)
        df_mod = pd.DataFrame(mod, columns=['node_ID', 'module', 'flow'])

        # consensus matrix at next threshold
        consMat2 = np.zeros((333,333))
        for roi in range(1,334):
            roi_mask = df_mod.isin([roi])
            row_roi = list(roi_mask['node_ID'][roi_mask['node_ID'] == True].index)
            for roi2 in range(1,334):
                roi2_mask = df_mod.isin([roi2])
                row_roi2 = list(roi2_mask['node_ID'][roi2_mask['node_ID'] == True].index)
                if (df_mod.loc[row_roi,'module'].values == df_mod.loc[row_roi2,'module'].values):
                    consMat2[roi-1,roi2-1] = 1

        # find modules consisting of at least five ROIs
        ms = []
        for m in list(set(mod[:,1])):
            if np.count_nonzero(mod[:,1] == m) >= 5:
                ms.append(m)
        df_mod_surv = df_mod[df_mod['module'].isin(ms)]

        # find connections survived at each threshold
        netFiles = 'clink_thr' + str(zeropad(thr)) + '.net'
        conn = np.genfromtxt(subj + '/' + netFiles, delimiter=None)
        df_conn = pd.DataFrame(conn, columns=['node_X', 'node_Y', 'connectivity'])

        # update consensus matrix
        for roi in df_mod_surv['node_ID'].values:  # iterate in survived modules
            for roi2 in df_mod_surv['node_ID'].values:
                if (df_conn['node_X'].isin([roi]) & df_conn['node_Y'].isin([roi2])).any():  # iterate in survived connections
                    consMat[int(roi-1),int(roi2-1)] = consMat2[int(roi-1),int(roi2-1)] 

    np.savetxt('consMats/consMat_' + subj + '.csv', consMat, delimiter=' ')

In [None]:
## Parallel processing
if __name__ == '__main__':
    # num_cores = multiprocessing.cpu_count()
    # max num_cores = 128

    pool = Pool(processes=20)
    mp_results = pool.map(processInput, [subj for subj in subjDirs])