In [1]:
import os
import pandas as pd
import glob
import re

In [2]:
os.getcwd()

'/net/vast-storage.ib.cluster/scratch/scratch/Tue/dclb/mmc/code/preprocessing/events_file_population'

In [15]:
def make_events_file(MID_stim_locations, subj, run): 
    dfs = []
    for loc in MID_stim_locations:
        trial_type = loc.split("/")[-1].split(".")[0]
        if trial_type == 'NeuHit' and subj == 'HC013' and run == '2': #because this file doesn't have any values
            continue
        df = pd.read_csv(loc, sep='\t+',header = None,engine='python')
        df.drop(df[df[0] <= 0].index, inplace=True) #because these files have a 0 for the onset (col0) since this event didn't happen we need to ignore that row 
        numrows_df = df.shape[0]
        df[2]=[trial_type for val in range(0,numrows_df)]
        dfs.append(df)
    all_events = pd.concat(dfs, ignore_index=True)
    all_events.columns = ['onset', 'duration', 'trial_type']
    all_events.sort_values(by='onset', inplace=True, ascending=True)
    
    all_events.duration = all_events.duration.astype(float)

    #set duration to 2000ms for Cue stimuli and to (next cue stimulus - onset of hit/miss stimulus)
    for i in range(all_events.shape[0]):
        if 'Cue' in all_events.iat[i,2]:
            all_events.iat[i,1] = 2.0
        elif i < (all_events.shape[0]-1): #.shape[0] gets rows
            dur = all_events.iat[i+1,0] - all_events.iat[i,0]
            all_events.iat[i,1] = dur
        else:
            dur = 317.4 - all_events.iat[i,0]
            all_events.iat[i,1] = dur
    return all_events



In [18]:
events_file_locations = (glob.glob("../../../sub-*/ses-*/func/*mid*run-*events.tsv"))

#empty df 
empty_df = pd.DataFrame({'onset' : [],'duration' : [],'trial_type' : []})

#pull from nifti_to_behavioral_run_match.tsv
#make sure we don't read in the missing files (marked by 'none')
nifti_to_behavioral_run_match = pd.read_csv("nifti_to_behavioral_run_match.tsv", sep = '\t')

events_to_run_match_dict = dict(zip(nifti_to_behavioral_run_match['nifti_filename'], nifti_to_behavioral_run_match['run']))
events_to_run_match_dict = {key.split('bold')[0]+'events.tsv': value for key, value in events_to_run_match_dict.items() if key != 'none'}


for file_path in events_file_locations:
    
    split_path = file_path.split('/')
    grp, num = re.split('(\d+)', split_path[3].split("-")[-1])[0:2]
    ses = split_path[4].split('-')[-1]
    run_old = split_path[-1].split('_')[-2].split('-')[-1]
    file = split_path[-1]
    
    #only focus on mid events.tsv files (ignoring other bold events.tsv files)
    if 'mid' in file: 
        if file in events_to_run_match_dict.keys():
            #need to not just select the events.tsv but also make sure they get matched with the right run number
            #thus we get the run matching our events.tsv from the dictionary we created from nifti_to_behavioral_run_match.tsv
            run = str(events_to_run_match_dict[file])
            #get all the behavioral files based on the run number pulled from nifti_to_behavioral_run_match.tsv
            MID_stim_locations = (glob.glob(f"../../../sourcedata/behavioral/{grp}_{num}/{ses}/mid_R{run}/*.txt")) 
            all_events = make_events_file(MID_stim_locations,grp+num,run)
            all_events.to_csv(file_path, index = False, header=True, sep='\t', float_format='%.3f')
        #will set all mid files of runs we're not considering and all mid files with moco to an empty file with only a header
        else:
            empty_df.to_csv(file_path, index = False, header=True, sep='\t')
            
            
            

In [3]:
nifti_to_behavioral_run_match = pd.read_csv("nifti_to_behavioral_run_match.tsv", sep = '\t')
nifti_to_behavioral_run_match.shape

(929, 8)

In [4]:
# #don't need this anymore! Leaving here for reference for now :) 
# #note that I am first doing this just for one subject, one run, and one session, and will then loop later once this works! 
# MJ_Go = pd.read_csv ("../../sourcedata/behavioral/HC_009/baseline/SST_R1/MJ_Go.txt", sep = '\t',header = None)
# MJ_SuccStop = pd.read_csv ("../../sourcedata/behavioral/HC_009/baseline/SST_R1/MJ_SuccStop.txt", sep = '\t',header = None)
# MJ_UnsuccStop = pd.read_csv ("../../sourcedata/behavioral/HC_009/baseline/SST_R1/MJ_UnsuccStop.txt", sep = '\t',header = None)
# N_Go = pd.read_csv ("../../sourcedata/behavioral/HC_009/baseline/SST_R1/N_Go.txt", sep = '\t',header = None)
# N_SuccStop = pd.read_csv ("../../sourcedata/behavioral/HC_009/baseline/SST_R1/N_SuccStop.txt", sep = '\t',header = None)
# N_UnsuccStop = pd.read_csv ("../../sourcedata/behavioral/HC_009/baseline/SST_R1/N_UnsuccStop.txt", sep = '\t',header = None)
# grp, num = re.split('(\d+)', 'sub-HC006'.split("-")[-1])[0:2]

In [113]:
# subj_grp="MM"
# subj_num="340"
# session="baseline"
# run_num="2"
# MID_stim_locations_indiv = (glob.glob("../../sourcedata/behavioral/{}_{}/{}/mid_R{}/*.txt".format(subj_grp,subj_num,session,run_num))) 


In [138]:
# dfs = []
# for loc in MID_stim_locations_indiv:
#     trial_type = loc.split("/")[-1].split(".")[0]
#    # if trial_type == 'NeuHit':
#    #     continue
#     df = pd.read_csv (loc, sep='\t+',header = None,engine='python')
#     df.drop(df[df[0] <= 0].index, inplace=True)
#     numrows_df = df.shape[0]
#     df[2]=[trial_type for val in range(0,numrows_df)]
#     dfs.append(df)
# all_events = pd.concat(dfs, ignore_index=True)
# all_events.set_axis(['onset', 'duration', 'trial_type'], axis=1,inplace=True)
# all_events.sort_values(by='onset', inplace=True, ascending=True)

# all_events.duration = all_events.duration.astype(float)

# for i in range(all_events.shape[0]):
#     if 'Cue' in all_events.iat[i,2]:
#         all_events.iat[i,1] = 2.0
#     elif i < 95:
#         dur = all_events.iat[i+1,0] - all_events.iat[i,0]
#         all_events.iat[i,1] = dur
#     else:
#         dur = 317.4 - all_events.iat[i,0]
#         all_events.iat[i,1] = dur
        
# display(all_events)
# print(all_events.shape[0])

In [7]:
# #writing to all applicable events.tsv files (moco and unco) for a particular grp, num, ses, run 

# events_files_indiv_locations = (glob.glob("../../sub-{}{}/ses-{}/func/*sst*run-0{}*events.tsv".format(subj_grp,subj_num,session,run)))
# for file in events_files_indiv_locations:
#     all_events.to_csv(file, index = False, header=True, sep='\t')

In [8]:
# subj_nums_locations = (glob.glob("../../sub-*"))
# subj_nums = list((re.split('(\d+)', num.split("/")[-1].split("-")[-1])[0:2] for num in subj_nums_locations))
# sessions = ['baseline','1year']
# runs = ['1','2','3']

In [9]:
# for grp, num, ses, run in subj_nums:
    

In [10]:
# ses_locations = (glob.glob("../../sub-{}{}/ses-*".format('HC','009')))
# print(ses_locations)

In [6]:
#OLD:

# def make_events_file(MID_stim_locations, subj, run): 
#     dfs = []
#     for loc in MID_stim_locations:
#         trial_type = loc.split("/")[-1].split(".")[0]
#         if trial_type == 'NeuHit' and subj == 'HC013' and run == '2': #because this file doesn't have any values
#             continue
#         df = pd.read_csv(loc, sep='\t+',header = None,engine='python')
#         print(df[df[0] <= 0])
#         df.drop(df[df[0] <= 0].index, inplace=True) #because these files have a 0 for the onset (col0) since this event didn't happen we need to ignore that row 
#         numrows_df = df.shape[0]
#         df[2]=[trial_type for val in range(0,numrows_df)]
#         dfs.append(df)
#     all_events = pd.concat(dfs, ignore_index=True)
#     all_events.set_axis(['onset', 'duration', 'trial_type'], axis=1,inplace=True)
#     all_events.sort_values(by='onset', inplace=True, ascending=True)
    
#     all_events.duration = all_events.duration.astype(float)

#     for i in range(all_events.shape[0]):
#         if 'Cue' in all_events.iat[i,2]:
#             all_events.iat[i,1] = 2.0
#         elif i < (all_events.shape[0]-1):
#             dur = all_events.iat[i+1,0] - all_events.iat[i,0]
#             all_events.iat[i,1] = dur
#         else:
#             dur = 317.4 - all_events.iat[i,0]
#             all_events.iat[i,1] = dur
#     return all_events


In [None]:
#OLD:

# events_file_locations = (glob.glob("../../../sub-*/ses-*/func/*mid*run-*events.tsv"))

# events_dict={}

# for events_loc in events_file_locations:
#     split_loc = events_loc.split('/')
#     grp, num = re.split('(\d+)', split_loc[3].split("-")[-1])[0:2]
#     ses = split_loc[4].split('-')[-1]
#     run = split_loc[-1].split('_')[-2].split('-')[-1] 
#     if run != '3': #because Jodi's lab says to ignore run 3 (doesn't have behavioral files anyways)
#         if (grp+num+ses+run not in events_dict.keys()):
#             MID_stim_locations = (glob.glob(f"../../../sourcedata/behavioral/{grp}_{num}/{ses}/mid_R{run}/*.txt")) 
#             all_events = make_events_file(MID_stim_locations,grp+num,run)
#             events_dict[grp+num+ses+run]=all_events
#         else:
#             all_events = events_dict[grp+num+ses+run]
#         all_events.to_csv(events_loc, index = False, header=True, sep='\t', float_format='%.3f')
        

In [53]:
all_events[0]

KeyError: 0

In [5]:
split_loc = '../../../sub-HC006/ses-baseline/func/sub-HC006_ses-baseline_task-mid_rec-moco_run-1_events.tsv'.split('/')
#re.split('(\d+)', split_loc[3].split("-")[-1])[0:2]
#split_loc[4].split('-')[-1]
#split_loc[-1].split('_')[-2].split('-')[-1]
grp, num = re.split('(\d+)', split_loc[3].split("-")[-1])[0:2]
num


'006'