In [1]:
import scipy.io
import pandas as pd
import numpy as np
import os
import pickle
from tqdm import tqdm
from sys import getsizeof

In [2]:
def data_maker(subject_path, window_size=50, overlap=0):
    df = None
    for session_path in os.listdir(subject_path):
        mat_path = os.path.join(subject_path, session_path, 'atlas_rfMRI.mat')
        mat = scipy.io.loadmat(mat_path) 
        data = np.array(mat['ts'])[19:,:]
        labels = mat['labels'][19:]
        _df = pd.DataFrame(data=data.T, columns=labels)
        df = pd.concat([df, _df])
    df.reset_index(drop=True, inplace=True)
    
    
    num_windows = df.shape[0]//(window_size - overlap)
    initial_indexes = [i * window_size for i in np.arange(num_windows)]
    corr_res = []
    for init_window in initial_indexes:
        part_df = df[init_window:init_window + window_size]
        _corr = np.corrcoef(df.T)
        _flat_corr = []
        for i in range(_corr.shape[0]):
            for j in range(i+1, _corr.shape[1]):
                _flat_corr.append(_corr[i, j])
        corr_res.append(_flat_corr)
        
    return np.array(corr_res)
    'CardSort_Unad',
    'ProcSpeed_AgeAd',


In [3]:
excluded = ['CardSort_Unad', 'ProcSpeed_AgeAd']

behav_measures = [
    'Subject',
    'Gender',
    'Flanker_Unadj',
    'WM_Task_Acc',
    'PMAT24_A_CR',
    'MMSE_Score',
    'PSQI_AmtSleep',
    'PicSeq_Unadj',
    'ReadEng_Unadj',
    'PicVocab_Unadj',
    'DDisc_AUC_200',
    'DDisc_AUC_40K',
    'VSPLOT_CRTE',
    'SCPT_SEN',
    'SCPT_SPEC',
    'IWRD_TOT',
    'IWRD_RTC',
    'ER40ANG',
    'ER40HAP',
    'ER40FEAR',
    'ER40SAD',
    'ER40NOE',
    'AngAffect_Unadj',
    'AngAggr_Unadj',
    'AngHostil_Unadj',
    'FearAffect_Unadj',
    'FearSomat_Unadj',
    'Sadness_Unadj',
    'LifeSatisf_Unadj',
    'PosAffect_Unadj',
    'Friendship_Unadj',
    'Loneliness_Unadj',
    'PercHostil_Unadj',
    'PercReject_Unadj',
    'PercStress_Unadj',
    'EmotSupp_Unadj',
    'InstruSupp_Unadj',
    'SelfEff_Unadj',
    'Emotion_Task_Acc',
    'Language_Task_Acc',
    'Relational_Task_Acc',
    'Social_Task_Perc_NLR',
    'Social_Task_Perc_Random',
    'Social_Task_Perc_TOM',
    'Social_Task_Perc_Unsure',
    'NEOFAC_A',
    'NEOFAC_C',
    'NEOFAC_E',
    'NEOFAC_N',
    'NEOFAC_O']

In [4]:
df_behav = pd.read_csv('behavioural_data.csv')

df_behav_measures = df_behav[behav_measures]
df_behav_measures.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_behav_measures.dropna(inplace=True)


In [6]:
path_read = '/home/matin/school/Amir_Omidvarnia/HCP_data'
path_write = '/home/matin/school/Amir_Omidvarnia/prepared_data/'

for subject_path in tqdm(os.listdir(path_read)):
    if int(subject_path) in df_behav_measures['Subject'].values:
        
        gender = 1 if df_behav_measures[df_behav_measures['Subject'] == int(subject_path)]['Gender'].values[0] == 'M' else 0

        subject_read_path = os.path.join(path_read, subject_path)
        data = data_maker(subject_read_path, 50)
        if data.shape[0] == 96:
            res = {}
            res['data'] = data
            res['gender'] = gender
            for measure in behav_measures:
                res[measure] = df_behav_measures[df_behav_measures['Subject'] == int(subject_path)][measure].values[0]

            subject_write_path = os.path.join(path_write, subject_path)
            file_to_write = open(subject_write_path + '.pkl', "wb")
            pickle.dump(res, file_to_write)
        else:
            print(subject_path + ' was rejected!', data.shape[0])
    else:
        print(subject_path + ' was rejected!')

 85%|████████▌ | 17/20 [02:14<00:28,  9.54s/it]

103010 was rejected!


100%|██████████| 20/20 [02:33<00:00,  7.66s/it]


In [8]:
path = '/home/matin/school/Amir_Omidvarnia/prepared_data/'

names_train = {}
names_test = {}
for i, j in enumerate(os.listdir(path)):
    if i <198:
        names_train[i] = j
    else:
        names_test[i-198] = j
    
save_path = '/home/matin/school/Amir_Omidvarnia/data_idx2name_train'
file_to_write = open(save_path + '.pkl', "wb")
pickle.dump(names_train, file_to_write)

print(names_test)
save_path2 = '/home/matin/school/Amir_Omidvarnia/data_idx2name_test'
file_to_write2 = open(save_path2 + '.pkl', "wb")
pickle.dump(names_test, file_to_write2)

{}
