In [None]:
#This script scores and converts raw .log files from the EF version of the fractal n-back task to BIDS
#Inputs:
    #directory of raw task files (naming conversion bblid_scanid/bblid_scanid-frac2B_1.00_no1B2.log)
    #.xml file specific task to score trials 
#Outputs:
    #BIDS-valid events.tsv file
    #BIDS-valid json sidecar
    #Summary CSV with scoring information (including dprime) for all participants 

#adapted from Zizu

In [2]:
import glob
import json
import xml.etree.ElementTree as ET
import numpy as np
import pandas as pd
from scipy.stats import norm
import math

In [4]:
#read template xml file, which is the key to scoring the task, change file path if needed 
#can be found on github, or on saturn 
root = ET.parse('/Volumes/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/inputs/msmri522_2vs0_back.xml').getroot() 
scorelabel=[]
stim=root[5]
for s in stim:
    scorelabel.append(s) #the stimuli scores is in index 5 

In [5]:
#set root_dir as directory where T1 task data is found 
root_dir = '/Users/krmurtha/Box Sync/EXECUTIVE_FUNCTION/SCANNER_TASK/' #change file path for appropriate user 
#set task_dir to directory where individual subject folders are found
task_dir = 'T1_rename_2022_CORRECT'
all_files1=glob.glob(root_dir + task_dir + '*/*/*')
#print(all_files)

In [7]:
#set root_dir as directory where T2 task data is found
root_dir = '/Users/krmurtha/Box Sync/EXECUTIVE_FUNCTION/SCANNER_TASK/' #change file path for appropriate user 
#set task_dir to directory where individual subject folders are found
task_dir = 'SCANNER_TASK_T2'
all_files2=glob.glob(root_dir + task_dir + '*/*/*')
print(all_files2[:3])

['/Users/krmurtha/Box Sync/EXECUTIVE_FUNCTION/SCANNER_TASK/SCANNER_TASK_T2/20961_12245/20961_12245.session', '/Users/krmurtha/Box Sync/EXECUTIVE_FUNCTION/SCANNER_TASK/SCANNER_TASK_T2/20961_12245/20961_12245-frac2B_1.00_no1B.log', '/Users/krmurtha/Box Sync/EXECUTIVE_FUNCTION/SCANNER_TASK/SCANNER_TASK_T2/20976_11873/20976_11873-frac2B_1.00_no1B.log']


In [8]:
# TO CONVERT ALL FILES IN A DIRECTORY:
#read in each .log, convert to BIDS valid .tsv + accompanying .json
#bblid and scanid of each file will print out as they are converted 
for f in all_files2:
    try:
        if '.log' in f:
            string=f.split('/')
            string1=string[len(string)-2].split('_')
            bblid=string1[0]
            scanid=(string1[1])
            bb=pd.read_csv(f ,skiprows=3,sep='\t',header=None)
            bb.columns = ['Subject','Trial','EventType','Code','Time','TTime','Uncertainty0','Duration','Uncertainty1',
            'ReqTime','ReqDur','StimType','PairIndex']
            bb = bb[2:]
            bb[['Trial', 'Time', 'TTime', 'Duration', 'ReqTime', 'Uncertainty0', 'Uncertainty1', 'PairIndex']] = bb[['Trial', 'Time', 'TTime', 'Duration', 'ReqTime', 'Uncertainty0', 'Uncertainty1', 'PairIndex']].apply(pd.to_numeric)
            #print(bb)
            back0=[] #0back
            back2=[] #2back
            for i in range(0,len(scorelabel)):
                if scorelabel[i].get('category') == '0BACK':
                    back0.append([scorelabel[i].get('expected'),scorelabel[i].get('index')])
                elif scorelabel[i].get('category') == '2BACK':
                    back2.append([scorelabel[i].get('expected'),scorelabel[i].get('index')])
            scoresummary={'0BACK':back0,'2BACK':back2}
            c=list(scoresummary.items())

            allback=[]

            templateback0=c[0][1]
            templateback2=c[1][1]
            for i in range(0,len(templateback0)):
                a1=bb[bb['Trial'] >= np.int(templateback0[i][1])-2]
                a2=bb[bb['Trial'] <= np.int(templateback0[i][1])]
                aa=np.array(pd.merge(a1, a2,how='inner')['TTime'].to_list())
                if len(aa) > 6 :
                    if aa[0] > 0 :
                        response=aa[0]/10
                    else : 
                        res = next((i for i, j in enumerate(aa[range(0,len(aa),2)]) if j), None)
                        ste=res-1
                        centr=2*res-1
                        response=aa[centr]/10+ste*800
                else:
                    response=0
                allback.append([c[0][0],templateback0[i][1],templateback0[i][0],response])
            for i in range(0,len(templateback2)):
                a1=bb[bb['Trial'] >= np.int(templateback2[i][1])-2]
                a2=bb[bb['Trial'] <= np.int(templateback2[i][1])]
                aa=np.array(pd.merge(a1, a2,how='inner')['TTime'].to_list())
                if len(aa) > 6 :
                    if aa[0] > 0 :
                        response=aa[0]/10
                    else : 
                        res = next((i for i, j in enumerate(aa[range(0,len(aa),2)]) if j), None)
                        ste=res-1
                        centr=2*res-1
                        response=aa[centr]/10+ste*800
                else:
                    response=0

                allback.append([c[1][0],templateback2[i][1],templateback2[i][0],response])
            #now we convert into BIDS vaild headers!    
            #now we convert into BIDS vaild headers    
            dfallback=pd.DataFrame(allback)
            dfallback.columns=['task','Index','Results','ResponseTime']
            dfallback['Index'] = dfallback['Index'].astype(int)
            dfallback['Onset'] = 0.8 * dfallback['Index']
            dfallback['Duration'] = 3*0.8
            dfallback['ResponseTime'] = dfallback['ResponseTime'].astype(int)
            dfallback['Response_Time']=dfallback['ResponseTime']/1000
            dfallback=dfallback.drop(columns=['Index', 'ResponseTime'])
            dfallback=dfallback.rename(columns={'task': 'Trial_Type'})
            #next, score the task 
            score=[]
            for val in dfallback.values:
                if 'NR' in val and val[4]!=0:#no response expected, response detected
                    score.append('false_positive')
                if 'NR' in val and val[4]==0:#no response expected, no response detected
                    score.append('true_negative')
                if 'Match' in val and val[4]<=2.4 and val[4]>0:#response expected, response detected
                    score.append('true_positive')
                if 'Match' in val and (val[4]>2.4 or val[4]==0):#response expected, no response detected
                    score.append('false_negative')
            dfallback['Score']=score
            #display(dfallback)
            #and fill in the .json sidecar  
            sidecar={
                "Trial_type": {
                    "Description": "Indicator of type of action that is expected",
                    "Levels": {
                        "0BACK": "0back trial, participant responds to target picture",
                        "2BACK": "2back trial, participant responds if target picture is the same as picture shown 2 trials previously",
                    }
                }, 
                "Results":{
                    "Description": "Expected result of each trial",
                    "Levels":{
                        "NR": "No response, participant not expected to respond",
                        "Match":"Target is a match based on task rules, participant expected to repond",
                    }
                },

                "Onset": {
                    "Description": "Onset (in seconds) of the event measured from the beginning of the acquisition of the first volume in the corresponding task imaging data file",
                }, 

                "Duration":{
                    "Description":"Duration of the event (measured from onset) in seconds",
                },

                "Response_Time":{
                    "Description":"Time for a participant to make respond to each trial"
                },
                "Score":{
                    "Descripton":"Outcome of each trial.", 
                    "Levels":{
                        "false_positive":"no response expected, response detected",
                        "true_negative":"no response expected, no response detected",
                        "true_positive":"response expected, response detected",
                        "false_negative":"response expected, no response detected",
                    },
                },
            }
            print(bblid,scanid)
            #change file paths as necessary 
            dfallback.to_csv('/Users/krmurtha/Box Sync/EXECUTIVE_FUNCTION/SCANNER_TASK/BIDS_2022/T1/sub-{0}_ses-{1}_task-fracback_acq-singleband_bold_events.tsv'.format(bblid, scanid), sep = '\t', index=False)
            with open('/Users/krmurtha/Box Sync/EXECUTIVE_FUNCTION/SCANNER_TASK/BIDS_2022/T1/sub-{0}_ses-{1}_task-fracback_acq-singleband_bold_events.json'.format(bblid, scanid), 'w') as fp:
                json.dump(sidecar, fp)
    except:
        print("exception in {}".format(f))


20961 12245
20976 11873


In [25]:
#TO CONVERT ONE FILE:
one_file=[]
for f in all_files2:
    if '12245' in f: ##sub in relevant scan ID for '99999'
        one_file.append(f)
#read in each .log, convert to BIDS valid .tsv + accompanying .json
#bblid and scanid of each file will print out as they are converted 
for f in one_file:
    if '.log' in f:
        string=f.split('/')
        string1=string[len(string)-2].split('_')
        bblid=string1[0]
        scanid=(string1[1])
        bb=pd.read_csv(f ,skiprows=3,sep='\t',header=None)
        bb.columns = ['Subject','Trial','EventType','Code','Time','TTime','Uncertainty0','Duration','Uncertainty1',
        'ReqTime','ReqDur','StimType','PairIndex']
        bb = bb[2:]
        bb[['Trial', 'Time', 'TTime', 'Duration', 'ReqTime', 'Uncertainty0', 'Uncertainty1', 'PairIndex']] = bb[['Trial', 'Time', 'TTime', 'Duration', 'ReqTime', 'Uncertainty0', 'Uncertainty1', 'PairIndex']].apply(pd.to_numeric)
        #print(bb)
        back0=[] #0back
        back2=[] #2back
        for i in range(0,len(scorelabel)):
            if scorelabel[i].get('category') == '0BACK':
                back0.append([scorelabel[i].get('expected'),scorelabel[i].get('index')])
            elif scorelabel[i].get('category') == '2BACK':
                back2.append([scorelabel[i].get('expected'),scorelabel[i].get('index')])
        scoresummary={'0BACK':back0,'2BACK':back2}
        c=list(scoresummary.items())

        allback=[]

        templateback0=c[0][1]
        templateback2=c[1][1]
        for i in range(0,len(templateback0)):
            a1=bb[bb['Trial'] >= np.int(templateback0[i][1])-2]
            a2=bb[bb['Trial'] <= np.int(templateback0[i][1])]
            aa=np.array(pd.merge(a1, a2,how='inner')['TTime'].to_list())
            if len(aa) > 6 :
                if aa[0] > 0 :
                    response=aa[0]/10
                else : 
                    res = next((i for i, j in enumerate(aa[range(0,len(aa),2)]) if j), None)
                    ste=res-1
                    centr=2*res-1
                    response=aa[centr]/10+ste*800
            else:
                response=0
            allback.append([c[0][0],templateback0[i][1],templateback0[i][0],response])
        for i in range(0,len(templateback2)):
            a1=bb[bb['Trial'] >= np.int(templateback2[i][1])-2]
            a2=bb[bb['Trial'] <= np.int(templateback2[i][1])]
            aa=np.array(pd.merge(a1, a2,how='inner')['TTime'].to_list())
            if len(aa) > 6 :
                if aa[0] > 0 :
                    response=aa[0]/10
                else : 
                    res = next((i for i, j in enumerate(aa[range(0,len(aa),2)]) if j), None)
                    ste=res-1
                    centr=2*res-1
                    response=aa[centr]/10+ste*800
            else:
                response=0

            allback.append([c[1][0],templateback2[i][1],templateback2[i][0],response])
        #now we convert into BIDS vaild headers!    
        #now we convert into BIDS vaild headers    
        dfallback=pd.DataFrame(allback)
        dfallback.columns=['task','Index','Results','ResponseTime']
        dfallback['Index'] = dfallback['Index'].astype(int)
        dfallback['Onset'] = 0.8 * dfallback['Index']
        dfallback['Duration'] = 3*0.8
        dfallback['ResponseTime'] = dfallback['ResponseTime'].astype(int)
        dfallback['Response_Time']=dfallback['ResponseTime']/1000
        dfallback=dfallback.drop(columns=['Index', 'ResponseTime'])
        dfallback=dfallback.rename(columns={'task': 'Trial_Type'})
        #next, score the task 
        score=[]
        for val in dfallback.values:
            if 'NR' in val and val[4]!=0:#no response expected, response detected
                score.append('false_positive')
            if 'NR' in val and val[4]==0:#no response expected, no response detected
                score.append('true_negative')
            if 'Match' in val and val[4]<=2.4 and val[4]>0:#response expected, response detected
                score.append('true_positive')
            if 'Match' in val and (val[4]>2.4 or val[4]==0):#response expected, no response detected
                score.append('false_negative')
        dfallback['Score']=score
        #display(dfallback)
        #and fill in the .json sidecar  
        sidecar={
            "Trial_type": {
                "Description": "Indicator of type of action that is expected",
                "Levels": {
                    "0BACK": "0back trial, participant responds to target picture",
                    "2BACK": "2back trial, participant responds if target picture is the same as picture shown 2 trials previously",
                }
            }, 
            "Results":{
                "Description": "Expected result of each trial",
                "Levels":{
                    "NR": "No response, participant not expected to respond",
                    "Match":"Target is a match based on task rules, participant expected to repond",
                }
            },

            "Onset": {
                "Description": "Onset (in seconds) of the event measured from the beginning of the acquisition of the first volume in the corresponding task imaging data file",
            }, 

            "Duration":{
                "Description":"Duration of the event (measured from onset) in seconds",
            },

            "Response_Time":{
                "Description":"Time for a participant to make respond to each trial"
            },
            "Score":{
                "Descripton":"Outcome of each trial.", 
                "Levels":{
                    "false_positive":"no response expected, response detected",
                    "true_negative":"no response expected, no response detected",
                    "true_positive":"response expected, response detected",
                    "false_negative":"response expected, no response detected",
                },
            },
        }
        print(bblid,scanid)
        #change file paths as necessary 
        dfallback.to_csv('/Users/krmurtha/Box Sync/EXECUTIVE_FUNCTION/SCANNER_TASK/BIDS_2022/T2/sub-{0}_ses-{1}_task-fracback_acq-singleband_bold_events.tsv'.format(bblid, scanid), sep = '\t', index=False)
        with open('/Users/krmurtha/Box Sync/EXECUTIVE_FUNCTION/SCANNER_TASK/BIDS_2022/T2/sub-{0}_ses-{1}_task-fracback_acq-singleband_bold_events.json'.format(bblid, scanid), 'w') as fp:
            json.dump(sidecar, fp)


20961 12245


In [24]:
#defines function to calculate dprime for all subs 
Z = norm.ppf

def SDT(hits, misses, fas, crs):
    """ returns d-prime measure given hits, misses, false alarms, and correct rejections"""
    # Floors an ceilings are replaced by half hits and half FA's
    half_hit = 0.5 / (hits + misses)
    half_fa = 0.5 / (fas + crs)
 
    # Calculate hit_rate and avoid d' infinity
    hit_rate = hits / (hits + misses)
    if hit_rate == 1: 
        hit_rate = 1 - half_hit
    if hit_rate == 0: 
        hit_rate = half_hit
 
    # Calculate false alarm rate and avoid d' infinity
    fa_rate = fas / (fas + crs)
    if fa_rate == 1: 
        fa_rate = 1 - half_fa
    if fa_rate == 0: 
        fa_rate = half_fa
 
    # Return d', beta, c and Ad'
    out = Z(hit_rate) - Z(fa_rate)
    
    return(out)

In [25]:
#set bids_dir to the directory where all BIDS valid csv's were saved to 
bids_dir = 'BIDS_2022/T1'
all_files_t1=glob.glob(root_dir + bids_dir + '*/*')
#print(all_files_2)

In [26]:
#set bids_dir to the directory where all BIDS valid csv's were saved to 
bids_dir = 'BIDS_2022/T2'
all_files_t2=glob.glob(root_dir + bids_dir + '*/*')
#print(all_files_2)

In [None]:
#loop through each .tsv and calculate summary scores for each trial outcome per condition
#ie: false pos, false neg, true pos, true neg for 0Back, 2Back, and combined (AllBack)
all_subs=[]
for f in all_files_t1:
    #initialize variables 
    back0fp=0
    back0fn=0
    back0tp=0
    back0tn=0
    back2fp=0
    back2fn=0
    back2tp=0
    back2tn=0
    if '.tsv' in f:
        string=f.split('/')
        file=(string[-1]).split('_')
        bblid=(file[0].split('-'))[-1]
        scanid=(file[1].split('-'))[-1]
        df=pd.read_csv(f ,sep='\t')
        #print(df)
        for val in df.values:
            if '0BACK' in val and 'false_positive' in val:
                back0fp+=1
            if '0BACK' in val and 'false_negative' in val:
                back0fn+=1
            if '0BACK' in val and 'true_positive' in val:
                back0tp+=1
            if '0BACK' in val and 'true_negative' in val:
                back0tn+=1
            if '2BACK' in val and 'false_positive' in val:
                back2fp+=1
            if '2BACK' in val and 'false_negative' in val:
                back2fn+=1
            if '2BACK' in val and 'true_positive' in val:
                back2tp+=1
            if '2BACK' in val and 'true_negative' in val:
                back2tn+=1
        summary={'bblid':bblid,'0BackFalsePositive':back0fp,'0BackFalseNegative':back0fn,'0BackTruePositive':back0tp,'0BackTrueNegative':back0tn,'2BackFalsePositive':back2fp,'2BackFalseNegative':back2fn,'2BackTruePositive':back2tp,'2BackTrueNegative':back2tn}
        summary.update({'AllBackTruePositive':(back0tp+back2tp), 'AllBackTrueNegative':(back0tn+back2tn),'AllBackFalsePositive':(back0fp+back2fp), 'AllBackFalseNegative':(back0fn+back2fn)})
        #add all correct + incorrect across conditions 
        summary.update({'0BackAllCorrect':(back0tp+back0tn),'0BackAllIncorrect':(back0fp+back0fn), '2BackAllCorrect':(back2tp+back2tn),'2BackAllIncorrect':(back2fp+back2fn),'AllBackAllCorrect':(back2tp+back2tn+back0tp+back0tn),'AllBackAllIncorrect':(back0fp+back0fn+back2fp+back2fn)})
        #add hit/false alarm rate across conditions (ie: true pos/ # targets, false pos/ # foils)
        summary.update({'0BackHitRate':(back0tp/15),'0BackFalseAlarmRate':(back0fp/45),'2BackHitRate':(back2tp/15),'2BackFalseAlarmRate':(back2fp/45),'AllBackHitRate':((back0tp+back2tp)/30),'AllBackFalseAlarmRate':((back0fp+back2fp)/90)})
        #calc dprime for various conditions using function defined above
        back0d=SDT(back0tp, back0fn, back0fp, back0tn)
        back2d=SDT(back2tp, back2fn, back2fp, back2tn)
        backAllD=SDT((back0tp+back2tp), (back0fn+back2fn), (back0fp+back2fp), (back0tn+back2tn))
        #add to dictionary  
        summary.update({'0BackDprime':back0d, '2BackDprime':back2d, 'AllBackDprime':backAllD})
        all_subs.append(summary)
        #print(all_subs)
        all_subs_df=pd.DataFrame(all_subs)
display(all_subs_df)
#change file paths as necessary
all_subs_df.to_csv('/Users/krmurtha/Box Sync/EXECUTIVE_FUNCTION/summary.csv', sep = ',', index=False)

In [27]:
#and for T2
all_subs=[]
for f in all_files_t2:
    #initialize variables 
    back0fp=0
    back0fn=0
    back0tp=0
    back0tn=0
    back2fp=0
    back2fn=0
    back2tp=0
    back2tn=0
    if '.tsv' in f:
        string=f.split('/')
        file=(string[-1]).split('_')
        bblid=(file[0].split('-'))[-1]
        scanid=(file[1].split('-'))[-1]
        df=pd.read_csv(f ,sep='\t')
        #print(df)
        for val in df.values:
            if '0BACK' in val and 'false_positive' in val:
                back0fp+=1
            if '0BACK' in val and 'false_negative' in val:
                back0fn+=1
            if '0BACK' in val and 'true_positive' in val:
                back0tp+=1
            if '0BACK' in val and 'true_negative' in val:
                back0tn+=1
            if '2BACK' in val and 'false_positive' in val:
                back2fp+=1
            if '2BACK' in val and 'false_negative' in val:
                back2fn+=1
            if '2BACK' in val and 'true_positive' in val:
                back2tp+=1
            if '2BACK' in val and 'true_negative' in val:
                back2tn+=1
        summary_2={'bblid':bblid,'0BackFalsePositive':back0fp,'0BackFalseNegative':back0fn,'0BackTruePositive':back0tp,'0BackTrueNegative':back0tn,'2BackFalsePositive':back2fp,'2BackFalseNegative':back2fn,'2BackTruePositive':back2tp,'2BackTrueNegative':back2tn}
        summary_2.update({'AllBackTruePositive':(back0tp+back2tp), 'AllBackTrueNegative':(back0tn+back2tn),'AllBackFalsePositive':(back0fp+back2fp), 'AllBackFalseNegative':(back0fn+back2fn)})
        #add all correct + incorrect across conditions 
        summary_2.update({'0BackAllCorrect':(back0tp+back0tn),'0BackAllIncorrect':(back0fp+back0fn), '2BackAllCorrect':(back2tp+back2tn),'2BackAllIncorrect':(back2fp+back2fn),'AllBackAllCorrect':(back2tp+back2tn+back0tp+back0tn),'AllBackAllIncorrect':(back0fp+back0fn+back2fp+back2fn)})
        #add hit/false alarm rate across conditions (ie: true pos/ # targets, false pos/ # foils)
        summary_2.update({'0BackHitRate':(back0tp/15),'0BackFalseAlarmRate':(back0fp/45),'2BackHitRate':(back2tp/15),'2BackFalseAlarmRate':(back2fp/45),'AllBackHitRate':((back0tp+back2tp)/30),'AllBackFalseAlarmRate':((back0fp+back2fp)/90)})
        #calc dprime for various conditions using function defined above
        back0d=SDT(back0tp, back0fn, back0fp, back0tn)
        back2d=SDT(back2tp, back2fn, back2fp, back2tn)
        backAllD=SDT((back0tp+back2tp), (back0fn+back2fn), (back0fp+back2fp), (back0tn+back2tn))
        #add to dictionary  
        summary_2.update({'0BackDprime':back0d, '2BackDprime':back2d, 'AllBackDprime':backAllD})
        all_subs.append(summary_2)
        #print(all_subs)
        all_subs_df=pd.DataFrame(all_subs)
#display(all_subs_df)
#change file paths as necessary
all_subs_df.to_csv('/Users/krmurtha/Box Sync/EXECUTIVE_FUNCTION/summary.csv', sep = ',', index=False)