In [None]:
import os
import pandas as pd
import glob
import re
import matplotlib.pyplot as plt


In [None]:
def make_events_file(MID_stim_locations, subj, run): 
    dfs = []
    for loc in MID_stim_locations:
        trial_type = loc.split("/")[-1].split(".")[0]
        if trial_type == 'NeuHit' and subj == 'HC013' and run == '2': #because this file doesn't have any values
            continue
        df = pd.read_csv(loc, sep='\t+',header = None,engine='python')
        df.drop(df[df[0] <= 0].index, inplace=True) #because these files have a 0 for the onset (col0) since this event didn't happen we need to ignore that row 
        numrows_df = df.shape[0]
        df[2]=[trial_type for val in range(0,numrows_df)]
        dfs.append(df)
    all_events = pd.concat(dfs, ignore_index=True)
    all_events.columns = ['onset', 'duration', 'trial_type']
    all_events.sort_values(by='onset', inplace=True, ascending=True)
    
    all_events.duration = all_events.duration.astype(float)
    
    durs = []
    
    #set duration to 2000ms for Cue stimuli and to (next cue stimulus - onset of hit/miss stimulus)
    for i in range(all_events.shape[0]):
        if 'Cue' in all_events.iat[i,2]:
            all_events.iat[i,1] = 2.0

        elif i < (all_events.shape[0]-1): #.shape[0] gets rows
            dur = all_events.iat[i+1,0] - all_events.iat[i,0]
            all_events.iat[i,1] = dur
            durs.append(dur)

        else:
            dur = 317.4 - all_events.iat[i,0]
            all_events.iat[i,1] = dur
            durs.append(dur)
    
    return [all_events, durs]


In [None]:
events_file_locations = (glob.glob("../../../sub-*/ses-*/func/*mid*run-*events.tsv"))

#empty df 
empty_df = pd.DataFrame({'onset' : [],'duration' : [],'trial_type' : []})

#pull from nifti_to_behavioral_run_match.tsv
#make sure we don't read in the missing files (marked by 'none')
nifti_to_behavioral_run_match = pd.read_csv("nifti_to_behavioral_run_match.tsv", sep = '\t')

events_to_run_match_dict = dict(zip(nifti_to_behavioral_run_match['nifti_filename'], nifti_to_behavioral_run_match['run']))
events_to_run_match_dict = {key.split('bold')[0]+'events.tsv': value for key, value in events_to_run_match_dict.items() if key != 'none'}


durs_all = []

for file_path in events_file_locations:
    
    split_path = file_path.split('/')
    grp, num = re.split('(\d+)', split_path[3].split("-")[-1])[0:2]
    ses = split_path[4].split('-')[-1]
    run_old = split_path[-1].split('_')[-2].split('-')[-1]
    file = split_path[-1]
    
    #only focus on mid events.tsv files (ignoring other bold events.tsv files)
    if 'mid' in file: 
        if file in events_to_run_match_dict.keys():
            #need to not just select the events.tsv but also make sure they get matched with the right run number
            #thus we get the run matching our events.tsv from the dictionary we created from nifti_to_behavioral_run_match.tsv
            run = str(events_to_run_match_dict[file])
            #get all the behavioral files based on the run number pulled from nifti_to_behavioral_run_match.tsv
            MID_stim_locations = (glob.glob(f"../../../sourcedata/behavioral/{grp}_{num}/{ses}/mid_R{run}/*.txt")) 
            all_events, durs = make_events_file(MID_stim_locations,grp+num,run)
            durs_all+=durs
            
            all_events.to_csv(file_path, index = False, header=True, sep='\t', float_format='%.3f')
        #will set all mid files of runs we're not considering and all mid files with moco to an empty file with only a header
        else:
            empty_df.to_csv(file_path, index = False, header=True, sep='\t')


#untoggle to print distribution of of feedback duration            
#plt.hist(durs_all, bins=50, edgecolor='black')
#plt.show()     