In [24]:
from nilearn import datasets
from preprocess_data import Reader
import os
import shutil
import sys
import pandas as pd
import numpy as np
import deepdish as dd
import warnings
import os.path as osp
from pathlib import Path

In [2]:
def str2bool(v):
    if isinstance(v, bool):
        return v
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')

#### Fetch Data

In [None]:
root_folder = '/home/ch225256/Data'
data_folder = os.path.join(root_folder, 'ABIDE_pcp/cpac/filt_noglobal/')
if not os.path.exists(data_folder):
    os.makedirs(data_folder)

pipeline = 'cpac'
atlas = 'cc200'
download = True
id_file_path = 'subject_IDs.txt'

# Files to fetch

files = ['rois_' + atlas]

# Download database files
if download == True:
    abide = datasets.fetch_abide_pcp(data_dir=root_folder, pipeline=pipeline,
                                        band_pass_filtering=True, global_signal_regression=False, derivatives=files,
                                        quality_checked=False)

#### Subject-wise foldering and extracting correlation 

In [None]:
root_folder = '/home/ch225256/Data'
id_file_path = 'subject_IDs.txt'
reader = Reader(root_folder, id_file_path)
data_folder = os.path.join(root_folder, 'ABIDE_pcp/cpac/filt_noglobal/')

pipeline = 'cpac'
atlas = 'cc200'
files = ['rois_' + atlas]
filemapping = {'func_preproc': 'func_preproc.nii.gz',
                files[0]: files[0] + '.1D'}

#phenotype_file = os.path.join(root_folder, "ABIDE_pcp/Phenotypic_V1_0b_preprocessed1.csv")
#phenotype_df = pd.read_csv(phenotype_file, index_col=0)

temp = '/home/ch225256/Data/subject_IDs.txt'
subject_IDs = np.genfromtxt(temp, dtype=str)

# Create a folder for each subject
for s, fname in zip(subject_IDs, reader.fetch_filenames(subject_IDs, files[0], atlas)):
    subject_folder = os.path.join(data_folder, s)
    if not os.path.exists(subject_folder):
        os.mkdir(subject_folder)

    # Get the base filename for each subject
    base = fname.split(files[0])[0]

    # Move each subject file to the subject folder
    for fl in files:
        if not os.path.exists(os.path.join(subject_folder, base + filemapping[fl])):
            shutil.move(base + filemapping[fl], subject_folder)

time_series = reader.get_timeseries(subject_IDs, atlas)

# Compute and save connectivity matrices
reader.subject_connectivity(time_series, subject_IDs, atlas, 'correlation')
reader.subject_connectivity(time_series, subject_IDs, atlas, 'partial correlation')

#### Correlation matrix generation

In [2]:

root_path = '/home/ch225256/Data'
data_folder = os.path.join(root_path, 'ABIDE_pcp/cpac/filt_noglobal/')

params = dict()

params['seed'] = 123  # seed for random initialization
id_file_path = 'subject_IDs.txt'

# Algorithm choice
params['atlas'] = 'cc200'  # Atlas for network construction
atlas = 'cc200'  # Atlas for network construction (node definition)

reader = Reader(root_path, id_file_path)
# Get subject IDs and class labels
temp = '/home/ch225256/Data/subject_IDs.txt'
subject_IDs = np.genfromtxt(temp, dtype=str)
labels = reader.get_subject_score(subject_IDs, score='DX_GROUP')

# Number of subjects and classes for binary classification
num_classes = 2
num_subjects = len(subject_IDs)
params['n_subjects'] = num_subjects

# Initialise variables for class labels and acquisition sites
# 1 is autism, 2 is control
y_data = np.zeros([num_subjects, num_classes]) # n x 2
y = np.zeros([num_subjects, 1]) # n x 1

# Get class labels for all subjects
for i in range(num_subjects):
    y_data[i, int(labels[subject_IDs[i]]) - 1] = 1
    y[i] = int(labels[subject_IDs[i]])

# Compute feature vectors (vectorized connectivity networks)
fea_corr = reader.get_networks(subject_IDs, iter_no='', kind='correlation', atlas_name=atlas) #(1035, 200, 200)
fea_pcorr = reader.get_networks(subject_IDs, iter_no='', kind='partial correlation', atlas_name=atlas) #(1035, 200, 200)

if not os.path.exists(os.path.join(data_folder,'raw')):
    os.makedirs(os.path.join(data_folder,'raw'))
for i, subject in enumerate(subject_IDs):
    dd.io.save(os.path.join(data_folder,'raw',subject+'.h5'),{'corr':fea_corr[i],'pcorr':fea_pcorr[i],'label':(y[i]-1)})

#### Final Data Generation

In [26]:
root_path = '/home/ch225256/Data'
data_dir =  os.path.join(root_path, 'ABIDE_pcp/cpac/filt_noglobal/raw')
timeseires = os.path.join(root_path, 'ABIDE_pcp/cpac/filt_noglobal/')

meta_file = os.path.join(root_path, "ABIDE_pcp/Phenotypic_V1_0b_preprocessed1.csv")
meta_file = pd.read_csv(meta_file, header=0)

id2site = meta_file[["subject", "SITE_ID"]]

# pandas to map
id2site = id2site.set_index("subject")
id2site = id2site.to_dict()['SITE_ID']

times = []
piq = []
viq = []
fiq = []
labels = []
pcorrs = []
corrs = []
site_list = []

for f in os.listdir(data_dir):
    if osp.isfile(osp.join(data_dir, f)):
        fname = f.split('.')[0]
        site = id2site[int(fname)]
        
        fiq_ = meta_file.loc[meta_file['subject'] == int(fname), 'FIQ'].values[0]
        viq_ = meta_file.loc[meta_file['subject'] == int(fname), 'VIQ'].values[0]
        piq_ = meta_file.loc[meta_file['subject'] == int(fname), 'PIQ'].values[0]
        
        if np.isnan(fiq_) or np.isnan(viq_) or np.isnan(piq_):
            continue
        elif fiq_ > 200 or fiq_ < 20:
            continue
        elif viq_ > 200 or viq_ < 20:
            continue
        elif piq_ > 200 or piq_ < 20:
            continue
        
        files = os.listdir(osp.join(timeseires, fname))

        file = list(filter(lambda x: x.endswith("1D"), files))[0]

        time = np.loadtxt(osp.join(timeseires, fname, file), skiprows=0).T

        if time.shape[1] < 100:
            continue

        temp = dd.io.load(osp.join(data_dir,  f))
        pcorr = temp['pcorr'][()]

        pcorr[pcorr == float('inf')] = 0

        att = temp['corr'][()]

        att[att == float('inf')] = 0

        label = temp['label']

        times.append(time[:,:100])
        labels.append(label[0])
        fiq.append(fiq_)
        viq.append(viq_)
        piq.append(piq_)
        corrs.append(att)
        pcorrs.append(pcorr)
        site_list.append(site)

np.save(Path(root_path)/'ABIDE_pcp/abide.npy', {'timeseires': np.array(times), "label": np.array(labels), "fiq": np.array(fiq), "viq": np.array(viq), "piq": np.array(piq), "corr": np.array(corrs),"pcorr": np.array(pcorrs), 'site': np.array(site_list)})