In [10]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pyreadstat
import seaborn as sns
from scipy import stats
import os
import shutil
import glob

from src.configs import configs

cmap = plt.cm.rainbow
import h5py

In [2]:
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 300)

In [5]:
halfpipe_dir = f'{configs.PROJECT_ROOT}/EPOC/halfpipe_patients_only'

In [None]:
subj_folders = os.listdir(halfpipe_dir)
subj_folders.sort()
print(subj_folders)

In [7]:
mask = np.ones((263,263))
mask = pd.DataFrame(mask)

In [None]:
cnt = 0

for subj in subj_folders:
    
    file_name = subj + '_task-rest_feature-corrMatrixAroma_atlas-brainnetomeCombinedDseg_desc-correlation_matrix.tsv'
    file_path = os.path.join(halfpipe_dir, subj, 'func', 'task-rest', file_name)
    
    # load correlation matrix
    rs_mat = pd.read_csv(file_path, sep="\t", header=None)
    
    # set lower triangle and diagonal to nan
    df_mat = rs_mat.where(np.triu(np.ones(rs_mat.shape), k=1).astype(bool))
    
    # get nan values in correlation matrix
    subj_mask = ~df_mat.isna()
    
    # mask out nan values from all-subject mask
    mask = mask.where(subj_mask, 0)
    
    print('subject {} done'.format(subj))
    
    cnt = cnt+1

In [9]:
df_mask = mask.copy()

In [10]:
mask.replace(0, np.nan, inplace=True)
mask_vec = mask.stack().reset_index()
mask_vec = mask_vec.drop(['level_0', 'level_1'], axis=1)

In [11]:
df_mask = df_mask.astype(bool)

In [12]:
df_corr=pd.DataFrame()

In [13]:
df_corr.shape

(0, 0)

In [None]:
cnt = 0

for subj in subj_folders:
    
    file_name = subj + '_task-rest_feature-corrMatrixAroma_atlas-brainnetomeCombinedDseg_desc-correlation_matrix.tsv'
    file_path = os.path.join(halfpipe_dir, subj, 'func', 'task-rest', file_name)
    
    # load correlation matrix
    rs_mat = pd.read_csv(file_path, sep="\t", header=None)
    
    # use mask, mask to nan (lower triangle and diagonal as well as missing values from all)
    df = rs_mat.where(df_mask)
    
    # convert masked matrix to vector
    df = df.stack().reset_index()
    df = df.drop(['level_0', 'level_1'], axis=1)
    
    df_corr = pd.concat([df_corr, df.T], axis=0, ignore_index=True)
     
    print('subject {} done'.format(subj))
    
    cnt = cnt+1

In [15]:
df_corr.shape

(93, 30628)

In [None]:
df_corr.head()

In [17]:
subj_codes = pd.DataFrame(subj_folders, columns=['subject'])

In [20]:
subj_codes.iloc[:,0] = subj_codes.iloc[:,0].str[4:8] + '_' + subj_codes.iloc[:,0].str[8] + '_' + subj_codes.iloc[:,0].str[9:13]

In [None]:
subj_codes

In [22]:
df_corr_codes = pd.concat([subj_codes, df_corr], axis=1)

In [None]:
df_corr_codes.head()

### Save .csv with subjects that have outcome and demografic data

In [None]:
# select only subjects with demografic info
epoc_dem = pd.read_csv(f'{configs.PROJECT_ROOT}/EPOC/csv_files/EPOC_fMRI_pat_80.csv', sep=",")
pat_labels_epoc = epoc_dem['Code'].unique()
pat_labels_epoc.sort()
print(pat_labels_epoc.shape)
pat_labels_epoc = np.ndarray.tolist(pat_labels_epoc)
print(pat_labels_epoc)

In [None]:
epoc_dem.head()

In [28]:
df_fMRI_pat = df_corr_codes.loc[df_corr_codes.iloc[:,0].isin(pat_labels_epoc)]
print(df_fMRI_pat.shape)

(80, 30629)


In [29]:
df_fMRI_pat.to_csv(f'{configs.PROJECT_ROOT}/EPOC/csv_files/MRI_csv_files/rsfMRI_80pat.csv', index=False)