### *This notebook is just for showing all the steps of the data processing for GCN, but for saving time all the output data of this notebook have already been uploaded to the cloud and we don't need to run it during the educational session.* 

In [8]:
import os
from nilearn import datasets
from nilearn.input_data import NiftiMasker
import pandas as pd
import glob
import pathlib
import numpy as np
import csv

# Fetching Haxby dataset

In [2]:
# We are fetching the data for subject 4
data_dir = os.path.join('..', 'data')
sub_no = 4
haxby_ds = datasets.fetch_haxby(subjects=[sub_no], fetch_stimuli=True, data_dir=data_dir)

func_file = haxby_ds.func[0]

# Standardizing
mask_vt_file = haxby_ds.mask_vt[0]
masker = NiftiMasker(mask_img=mask_vt_file, standardize=True)

labels = pd.read_csv(haxby_ds.session_target[0], sep=" ")

# Selecting data
X = masker.fit_transform(func_file)
y = labels['labels']

categories = y.unique()

# Data paths

In [3]:
proc_path = os.path.join(data_dir, 'haxby_proc/')
concat_path = os.path.join(data_dir, 'haxby_concat/')
conn_path = os.path.join(data_dir, 'haxby_connectomes/')
split_path = os.path.join(data_dir, 'haxby_split_win/')

if not os.path.exists(proc_path):
    os.makedirs(proc_path)
if not os.path.exists(concat_path):
    os.makedirs(concat_path)
if not os.path.exists(conn_path):
    os.makedirs(conn_path)
if not os.path.exists(split_path):
    os.makedirs(split_path)
    
# delete the contents of a folder to avoid inconsistency
old_files = glob.glob(concat_path + '/*')
for f in old_files:
    os.remove(f)    
if os.path.exists(split_path):
    files = glob.glob(os.path.join(split_path, "*"))
    for f in files:
        os.remove(f)

# Data processing

In [4]:
old_dirContents = os.listdir(concat_path)
print(old_dirContents)

concat_bold_files = []
if (len(old_dirContents) == 0 or len(old_dirContents) == 1):    
    if ((len(X)) == len(y)):
        
        for i in range(0,len(y)):
            label = y[i]
            concat_bold_files = X[i:i+1]
            concat_file_name = concat_path + '{}_concat_fMRI.npy'.format(label)
            file = pathlib.Path(concat_file_name)
            
            if file.exists ():
                concat_file = np.load(concat_file_name, allow_pickle = True)
                concat_file = np.concatenate((concat_file, concat_bold_files), axis = 0)
                np.save(concat_file_name, concat_file)
            else:
                np.save(concat_file_name, concat_bold_files)
            
else:
    print('Folder is Not Empty')

[]


In [5]:
with open(concat_path + 'phenotypic_data.tsv', 'wt') as out_file:
    
    tsv_writer = csv.writer(out_file, delimiter='\t')
    tsv_writer.writerow(['label'])
    
    for category in categories: 
        tsv_writer.writerow([category])

# Time windows

In [6]:
window_length = 1

# Path for saving the files
pheno_file = os.path.join(concat_path, 'phenotypic_data.tsv')
processed_bold_files = sorted(glob.glob(concat_path + '/*.npy'))
out_file = os.path.join(split_path, '{}_{:04d}.npy')
out_csv = os.path.join(split_path, 'labels.csv')

## Split fMRI data
Now we are going to split bold input files to the desired windows lenght, then we will also create a csv file that will contain label for each splited data.

In [7]:
dic_labels = {'rest':0,'face':1,'chair':2,'scissors':3,'shoe':4,'scrambledpix':5,'house':6,'cat':7,'bottle':8}
label_df = pd.DataFrame(columns=['label', 'filename'])

for proc_bold in processed_bold_files:
    
    ts_data = np.load(proc_bold)
    ts_duration = len(ts_data)

    ts_filename = os.path.basename(proc_bold)
    ts_label = ts_filename.split('_', 1)[0]

    valid_label = dic_labels[ts_label]
    
    # Split the timeseries
    rem = ts_duration % window_length
    n_splits = int(np.floor(ts_duration / window_length))

    ts_data = ts_data[:(ts_duration-rem), :]   
    
    for j, split_ts in enumerate(np.split(ts_data, n_splits)):
        ts_output_file_name = out_file.format(ts_filename, j)

        split_ts = np.swapaxes(split_ts, 0, 1)
        np.save(ts_output_file_name, split_ts)
        curr_label = {'label': valid_label, 'filename': os.path.basename(ts_output_file_name)}
        label_df = label_df.append(curr_label, ignore_index=True)
    
label_df.to_csv(out_csv, index=False)  