# Generate BIDS behavioural data and meta-information

In [10]:
import numpy as np
import pandas as pd
import json
import os
from collections import OrderedDict
from scipy import io
RAW_DIR =  '/home/lukehearne/qm89_scratch/LSTBIDS/0_behaviour/behavioural_data/' # raw behav
BIDS_DIR = '/home/lukehearne/qm89_scratch/LSTBIDS/LST_bids/' # i.e., the output dir
SUBJ_LIST = np.arange(1,66,1)

#--------#
# LST ---#
#--------#
print('LST:')
# 3 16 32 44 are no good in the LST.
bad_subjs = np.array((3,16,32,44))
for subj in np.delete(SUBJ_LIST,bad_subjs-1):
    #print('\tSubj:',subj)
    file = 'Sub_'+str(subj)+'_fMRI_LST_results.mat'
    data = io.loadmat(RAW_DIR+file)
    #print('\t',data.keys())

    fMRI_info = data['fMRI_out']
    runs = np.arange(1,4,1) #there are 3 runs in the task
    for i in runs:
        #print('\t\tRun:',i)
        #index by run
        rundata = fMRI_info[fMRI_info[:,0]==i,:]

        # onset
        df = pd.DataFrame(data=rundata[:,2],columns=['onset'])

        # add the duration
        df['duration'] = 5

        # trial type
        df['trial_type'] = rundata[:,1]

        # put in the actual condition names
        df['trial_type'].replace(to_replace=1, value='Binary',inplace=True)
        df['trial_type'].replace(to_replace=2, value='Ternary',inplace=True)
        df['trial_type'].replace(to_replace=3, value='Quaternary',inplace=True)
        df['trial_type'].replace(to_replace=4, value='Null',inplace=True)

        # add the response time
        rt = rundata[:,6]
        rt[rt>2] = float('NaN')
        df['response_time'] = rt

        # add the accuracy
        df['accuracy'] = rundata[:,5]

        # we also want to add motor and confidence information
        df['motor_onset'] = rundata[:,3]
        df['motor_duration'] = 2

        df['confidence_onset'] = rundata[:,4]
        df['confidence_duration'] = 2

        #sort by the onset
        df.sort_values('onset', inplace=True)

        # print the dataframe to check our working  
        #print(df.head(20))

        # save data
        subj_label = str(subj).zfill(2)
        subj_path = BIDS_DIR + 'sub-' + subj_label + '/func/'
        if not os.path.exists(subj_path):
            os.makedirs(subj_path)
        filename = (subj_path + 'sub-' + subj_label + '_task-LST_run-' 
                    + str(i) +'_events.tsv')
        #print(filename)
        df.to_csv(filename, index=False, sep='\t',na_rep='n/a')
        
#--------#
# APM ---#
#--------#
print('APM:')
set1_answers = np.array((8, 4, 5, 1, 2, 5, 6, 3, 7, 8, 7, 6))
set2_answers = np.array((5, 1, 7, 4, 3, 1, 6, 1, 8, 4, 5, 6, 2, 1,
                         2, 4, 6, 7, 3, 8, 8, 7, 6, 3, 7, 2, 7, 5,
                         6, 5, 4, 8, 5, 1, 3, 2))
APM_set1_accuracy = np.zeros((len(SUBJ_LIST)))
APM_set2_accuracy = np.zeros((len(SUBJ_LIST)))
subj_labels = []
for i,subj in enumerate(SUBJ_LIST):
    #print('\tSubj:',subj)
    subj_labels.append('sub-'+str(subj).zfill(2))
    #set 1
    file = 'Sub_'+str(subj)+'_APM_Set1_results.mat'
    data = io.loadmat(RAW_DIR+file)
    resp = data['resp'][0]
    APM_set1_accuracy[i] = np.sum(set1_answers==resp)
    
    #set 2
    file = 'Sub_'+str(subj)+'_APM_Set2_results.mat'
    data = io.loadmat(RAW_DIR+file)
    resp = data['resp'][0]
    if len(resp) > 36:
        resp = resp[0:36]
    APM_set2_accuracy[i] = np.sum(set2_answers==resp)

# save data
if not os.path.exists(BIDS_DIR + 'phenotype/'):
    os.makedirs(BIDS_DIR + 'phenotype/')
tsv_filename = BIDS_DIR + 'phenotype/APM.tsv'
df = pd.DataFrame(index=subj_labels)
df['APM_set1_accuracy'] = APM_set1_accuracy
df['APM_set2_accuracy'] = APM_set2_accuracy
df.to_csv(tsv_filename, sep='\t', index_label='participant_id')

# create json
# meta information
meta = {}
mtm = {'Description': 'The Ravens Advanced Progressive Matrices'}
mtm['TermURL'] = 'https://link.springer.com/referenceworkentry/10.1007/978-3-319-28099-8_69-1'
meta['MeasurementToolMetadata'] = mtm
# column information
meta['APM_set1_accuracy'] = {}
meta['APM_set1_accuracy']['Description'] = 'Total accuracy for set 1 of the APM'
meta['APM_set2_accuracy'] = {}
meta['APM_set2_accuracy']['Description'] = 'Total accuracy for set 2 of the APM'

json_filename = BIDS_DIR + 'phenotype/APM.json'
with open(json_filename, 'w') as jsonFile:
    json.dump(meta, jsonFile, indent=4)

#-------#
# VS ---#
#-------#  
print('VS:')
n_trials = 240
n_trials_cond = np.int(n_trials/3)
VS_acc_avg = np.zeros((len(SUBJ_LIST),3))
VS_rt_avg = np.zeros((len(SUBJ_LIST),3))

for i,subj in enumerate(SUBJ_LIST):
    try:
        #print('Subj:',subj)
        file = 'Sub_'+str(subj)+'_VS_results.mat'
        data = io.loadmat(RAW_DIR+file)

        trial_order = data['param'][0][0][2][0].copy()
        trial_order_sort = np.argsort(trial_order)

        #acc
        acc = data['rec'][0][0][2][0]
        acc = acc[trial_order_sort]
        acc = np.reshape(acc,(n_trials_cond,3),order='F')
        VS_acc_avg[i,:] = np.mean(acc,axis=0)

        #rt
        rt = data['rec'][0][0][0][0].copy()
        rt = rt[trial_order_sort]
        rt = np.reshape(rt,(n_trials_cond,3),order='F')
        rt[acc==0] = float('NaN')
        VS_rt_avg[i,:] = np.nanmean(rt,axis=0)
    except:
        VS_acc_avg[i,:] = float('NaN')
        VS_rt_avg[i,:] = float('NaN')

# save data
if not os.path.exists(BIDS_DIR + 'phenotype/'):
    os.makedirs(BIDS_DIR + 'phenotype/')
tsv_filename = BIDS_DIR + 'phenotype/VisualSearch.tsv'
df = pd.DataFrame(index=subj_labels)
df['VS_acc_avg_set8'] = VS_acc_avg[:,0]
df['VS_acc_avg_set16'] = VS_acc_avg[:,1]
df['VS_acc_avg_set24'] = VS_acc_avg[:,2]
df['VS_rt_avg_set8'] = VS_rt_avg[:,0]
df['VS_rt_avg_set16'] = VS_rt_avg[:,1]
df['VS_rt_avg_set24'] = VS_rt_avg[:,2]
df.to_csv(tsv_filename, sep='\t', index_label='participant_id',na_rep='n/a')

# create json
# meta information
meta = {}
mtm = {'Description': 'Visual Search Task - 43 participants completed a conjunction visual search task in which they were instructed to report the orientation of a target letter “L” (rotated 90° leftward or rightward) among “T” distractors in set sizes of 8, 16, or 24 items (80 trials each, 240 total). The search cost was defined as the increase in reaction time between the smallest and largest set sizes. This task was chosen as a “low reasoning” counterpart to the Ravens Progressive Matrices to demonstrate the specificity of brain–behavior correlations, as described in detail in the Results.'}
mtm['TermURL'] = 'n/a'
meta['MeasurementToolMetadata'] = mtm
# column information
meta['VS_acc_avg_set8'] = {}
meta['VS_acc_avg_set8']['Description'] = 'Average visual search accuracy for sets of 8 stimuli'
meta['VS_acc_avg_set16'] = {}
meta['VS_acc_avg_set16']['Description'] = 'Average visual search accuracy for sets of 16 stimuli'
meta['VS_acc_avg_set24'] = {}
meta['VS_acc_avg_set24']['Description'] = 'Average visual search accuracy for sets of 24 stimuli'
meta['VS_rt_avg_set8'] = {}
meta['VS_rt_avg_set8']['Description'] = 'Average reaction time (correct trials only) for sets of 8 stimuli'
meta['VS_rt_avg_set16'] = {}
meta['VS_rt_avg_set16']['Description'] = 'Average reaction time (correct trials only) for sets of 8 stimuli'
meta['VS_rt_avg_set24'] = {}
meta['VS_rt_avg_set24']['Description'] = 'Average reaction time (correct trials only) for sets of 8 stimuli'

json_filename = BIDS_DIR + 'phenotype/VisualSearch.json'
with open(json_filename, 'w') as jsonFile:
    json.dump(meta, jsonFile, indent=4)
    
#------------------------#
# Additional fg tests ---#
#------------------------#
print('fg tests:')
df_fg = pd.read_csv(RAW_DIR + 'fluid_tests_fMRI.csv')

# form boards
tsv_filename = BIDS_DIR + 'phenotype/FormBoards.tsv'
df = pd.DataFrame(index=subj_labels)
df['Total_correct'] = df_fg.loc[0:64,'FB-correct'].values
df['Total_incorrect'] = df_fg.loc[0:64,'FB-incorrect'].values
df.to_csv(tsv_filename, sep='\t', index_label='participant_id',na_rep='n/a')
# create json
# meta information
meta = {}
mtm = {'Description': 'The Minnesota Paper Form Board Test'}
mtm['TermURL'] = 'https://en.wikipedia.org/wiki/Minnesota_Paper_Form_Board_Test'
meta['MeasurementToolMetadata'] = mtm
# column information
meta['Total_correct'] = {}
meta['Total_correct']['Description'] = 'Total number of correct items (total score is often correct - incorrect, participants can choose not to answer items)'
meta['Total_incorrect'] = {}
meta['Total_incorrect']['Description'] = 'Total number of incorrect items'

json_filename = BIDS_DIR + 'phenotype/FormBoards.json'
with open(json_filename, 'w') as jsonFile:
    json.dump(meta, jsonFile, indent=4)
    
# paper-folding
tsv_filename = BIDS_DIR + 'phenotype/PaperFolding.tsv'
df = pd.DataFrame(index=subj_labels)
df['Total_correct'] = df_fg.loc[0:64,'PF-correct'].values
df['Total_incorrect'] = df_fg.loc[0:64,'PF-incorrect'].values
df.to_csv(tsv_filename, sep='\t', index_label='participant_id',na_rep='n/a')
# create json
# meta information
meta = {}
mtm = {'Description': 'Paper folding test of fluid intelligence'}
mtm['TermURL'] = 'n/a'
meta['MeasurementToolMetadata'] = mtm
# column information
meta['Total_correct'] = {}
meta['Total_correct']['Description'] = 'Total number of correct items (total score is often correct - incorrect, participants can choose not to answer items)'
meta['Total_incorrect'] = {}
meta['Total_incorrect']['Description'] = 'Total number of incorrect items'

json_filename = BIDS_DIR + 'phenotype/PaperFolding.json'
with open(json_filename, 'w') as jsonFile:
    json.dump(meta, jsonFile, indent=4)
    
#----------------------------#
# Create participants.tsv ---#
#----------------------------#
print('Participants.tsv:')
df_demo = pd.read_csv(RAW_DIR + 'Project_Summary_DEIDENTIFIED.csv')
age = df_demo.loc[0:64,'Age'].values
sex = df_demo.loc[0:64,'Gender (M=1, F=2)']
sex[sex==1] = 'M'
sex[sex==2] = 'F'

# write df_out
df = pd.DataFrame({'age': age, 'sex': sex.values}, index=subj_labels)
df.to_csv(BIDS_DIR+'participants.tsv', sep='\t', index_label='participant_id',na_rep='n/a')

#-----------------------#
# Extra demographics ---#
#-----------------------#
tsv_filename = BIDS_DIR + 'phenotype/Extended_demographics.tsv'
df = pd.DataFrame(index=subj_labels)
df['Years_of_education'] = df_demo.loc[0:64,'Years of Education'].values
df['Sudoku_experience'] = df_demo.loc[0:64,'Sudoku Score (1-5)'].values
df['Motivation_rating'] = df_demo.loc[0:64,'Motivation'].values
df['Sleep_rating'] = df_demo.loc[0:64,'Sleep Report (0=no, 1=mild, 2=yes)'].values
df.to_csv(tsv_filename, sep='\t', index_label='participant_id',na_rep='n/a')

# create json
# meta information
meta = {}
mtm = {'Description': 'Extended demographic information'}
mtm['TermURL'] = 'n/a'
meta['MeasurementToolMetadata'] = mtm
# column information
meta['Years_of_education'] = {}
meta['Years_of_education']['Description'] = 'Total number of education in years including counting from primary school'
meta['Sudoku_experience'] = {}
meta['Sudoku_experience']['Description'] = 'Participant rating of sudoku experience from 1 to 5, 1 being low (never played sudoku), to 5 (daily sudoku use)'
meta['Motivation_rating'] = {}
meta['Motivation_rating']['Description'] = '"How motivated were you to complete the task?" where 1 = not motivated at all, 5 = very motivated'
meta['Sleep_rating'] = {}
meta['Sleep_rating']['Description'] = 'Did the participant fall asleep in the scan? 0 = no, 1 = maybe, 2 = yes'

json_filename = BIDS_DIR + 'phenotype/Extended_demographics.json'
with open(json_filename, 'w') as jsonFile:
    json.dump(meta, jsonFile, indent=4)
    
#---------------------#
# Data description ---#
#---------------------#
print('Dataset description:')
# create the dataset description - basically verbatim from the BIDS standard
data= OrderedDict()
#General fields, shared with MRI BIDS and MEG BIDS:
#Required fields:
#name of the dataset
data['Name'] = 'LST'

#The version of the BIDS standard that was used
data['BIDSVersion'] = '1.2.0'

#Recommended fields:
#what license is this dataset distributed under? The use of license name abbreviations is suggested for specifying a license. A list of common licenses with suggested abbreviations can be found in appendix III.
data['License'] = 'PD'

#List of individuals who contributed to the creation/curation of the dataset
data['Authors'] = ['Luke Hearne','Luca Cocchi','Jason Mattingley']

#who should be acknowledged in helping to collect the data
data['Acknowledgements'] = 'This work was supported by the Australian Research Council (ARC) Special Research Initiatives Science of Learning Research Centre (SR120300015), and the ARC Centre of Excellence for Integrative Brain Function (ARC Centre Grant CE140100007). J.B.M. was supported by an ARC Australian Laureate Fellowship (FL110100103). L.C. was supported by a National Health and Medical Research Council (NHMRC) grant (APP1099082). A.Z. was supported by an NHMRC Career Development Fellowship (GNT1047648). L.J.H. was supported by an Australian Postgraduate Award. We thank Oscar Jacoby and Zoie Nott for data-collection assistance, Tong Wu for assistance with imaging analysis, and Dr. Kieran OBrien, Associate Professor Markus Barth, and Dr. Steffen Bollmann for performing the sequence optimizations.'

#Instructions how researchers using this dataset should acknowledge the original authors. This field can also be used to define a publication that should be cited in publications that use the dataset
data['HowToAcknowledge'] = 'Please cite: Hearne, L. J., Cocchi, L., Zalesky, A., & Mattingley, J. B. (2017). Reconfiguration of brain network architectures between resting-state and complexity-dependent cognitive reasoning. Journal of Neuroscience, 37(35), 8399-8411.'

#sources of funding (grant numbers)
data['Funding'] = ['See Acknowledgements']

#a list of references to publication that contain information on the dataset, or links.
data['ReferencesAndLinks'] = ['https://doi.org/10.1523/JNEUROSCI.0485-17.2017']

#the Document Object Identifier of the dataset (not the corresponding paper).
data['DatasetDOI'] = ''

dataset_json_folder = BIDS_DIR
dataset_json_name = dataset_json_folder+'/'+'dataset_description.json'

with open(dataset_json_name, 'w') as ff:
    json.dump(data, ff,sort_keys=False, indent=4)

LST:
APM:
VS:
fg tests:
Participants.tsv:
Dataset description:


In [22]:
#append dcm2nii json files with task names
for i,subj in enumerate(SUBJ_LIST):
    subj_label = str(subj).zfill(2)
    #LST
    for run in [1,2,3]:
        filename = BIDS_DIR + 'sub-' + subj_label + '/func/sub-' + subj_label + '_task-LST_run-' + str(run) + '_bold.json'
        if not os.path.exists(filename):
            print(subj_label)
            print('\t LST-',str(run),'json file not found')
            continue
        a_dict = {'TaskName': 'LST'}
        with open(filename) as f:
            data = json.load(f)
        data.update(a_dict)
        with open(filename,'w') as f:
            json.dump(data,f)

    # rest
    for run in [1,2]:
        filename = BIDS_DIR + 'sub-' + subj_label + '/func/sub-' + subj_label + '_task-rest_run-' + str(run) + '_bold.json'
        if not os.path.exists(filename):
            print(subj_label)
            print('\t rest-',str(run),'json file not found')
            continue
        a_dict = {'TaskName': 'rest'}
        with open(filename) as f:
            data = json.load(f)
        data.update(a_dict)
        with open(filename,'w') as f:
            json.dump(data,f)

01
	 rest- 1 json file not found
01
	 rest- 2 json file not found
02
	 LST- 1 json file not found
04
	 LST- 2 json file not found
04
	 rest- 1 json file not found
04
	 rest- 2 json file not found
06
	 LST- 1 json file not found
06
	 LST- 2 json file not found
06
	 LST- 3 json file not found
06
	 rest- 1 json file not found
07
	 LST- 1 json file not found
08
	 LST- 2 json file not found
08
	 rest- 2 json file not found
09
	 LST- 1 json file not found
09
	 LST- 2 json file not found
09
	 rest- 2 json file not found
12
	 LST- 1 json file not found
16
	 LST- 1 json file not found
16
	 LST- 2 json file not found
16
	 LST- 3 json file not found
16
	 rest- 1 json file not found
16
	 rest- 2 json file not found
19
	 LST- 1 json file not found
21
	 LST- 2 json file not found
22
	 LST- 1 json file not found
23
	 LST- 1 json file not found
24
	 LST- 1 json file not found
26
	 LST- 2 json file not found
26
	 rest- 1 json file not found
26
	 rest- 2 json file not found
27
	 rest- 2 json file not fo