In [1]:
import scipy.io
import pandas as pd
import numpy as np
import os
import pickle
from tqdm import tqdm
from sys import getsizeof

In [2]:
def data_maker(subject_path, window_size=50, overlap=0):
    df = None
    for session_path in os.listdir(subject_path):
        mat_path = os.path.join(subject_path, session_path, 'atlas_rfMRI.mat')
        mat = scipy.io.loadmat(mat_path) 
        data = np.array(mat['ts'])[19:,:]
        labels = mat['labels'][19:]
        _df = pd.DataFrame(data=data.T, columns=labels)
        df = pd.concat([df, _df])
    df.reset_index(drop=True, inplace=True)
    
    
    num_windows = df.shape[0]//(window_size - overlap)
    initial_indexes = [i * window_size for i in np.arange(num_windows)]
    corr_res = []
    for init_window in initial_indexes:
        part_df = df[init_window:init_window + window_size]
        _corr = np.corrcoef(df.T)
        _flat_corr = []
        for i in range(_corr.shape[0]):
            for j in range(i+1, _corr.shape[1]):
                _flat_corr.append(_corr[i, j])
        corr_res.append(_flat_corr)
        
    return np.array(corr_res)

In [3]:
df_behav = pd.read_csv('behavioural_data.csv')

In [6]:
path_read = '/home/matin/school/Amir_Omidvarnia/HCP_data'
path_write = '/home/matin/school/Amir_Omidvarnia/gender_data'

for subject_path in tqdm(os.listdir(path_read)):
    gender = 1 if df_behav[df_behav['Subject'] == int(subject_path)]['Gender'].values[0] == 'M' else 0
    
    subject_read_path = os.path.join(path_read, subject_path)
    data = data_maker(subject_read_path, 50)
    if data.shape[0] == 96:
        res = {}
        res['data'] = data
        res['target'] = gender
        subject_write_path = os.path.join(path_write, subject_path)
        file_to_write = open(subject_write_path + '.pkl', "wb")
        pickle.dump(res, file_to_write)
    else:
        print(subject_path + ' was rejected!', data.shape[0])

 88%|████████▊ | 174/198 [15:49<02:07,  5.31s/it]

140420 was rejected! 94


100%|██████████| 198/198 [17:54<00:00,  5.43s/it]


In [8]:
path = '/home/matin/school/Amir_Omidvarnia/gender_data'

names_train = {}
names_test = {}
for i, j in enumerate(os.listdir(path)):
    if i <180:
        names_train[i] = j
    else:
        names_test[i-180] = j
    
save_path = '/home/matin/school/Amir_Omidvarnia/data_idx2name_train'
file_to_write = open(save_path + '.pkl', "wb")
pickle.dump(names_train, file_to_write)

print(names_test)
save_path2 = '/home/matin/school/Amir_Omidvarnia/data_idx2name_test'
file_to_write2 = open(save_path2 + '.pkl', "wb")
pickle.dump(names_test, file_to_write2)

{0: '103414.pkl', 1: '106016.pkl', 2: '101309.pkl', 3: '115017.pkl', 4: '117122.pkl', 5: '105014.pkl', 6: '137633.pkl', 7: '114318.pkl', 8: '124220.pkl', 9: '100408.pkl', 10: '140117.pkl', 11: '108828.pkl', 12: '144226.pkl', 13: '128127.pkl', 14: '102715.pkl', 15: '119126.pkl', 16: '130821.pkl'}
