In [1]:
import os
import sys
import argparse
import pandas as pd
import numpy as np
from glob import glob


In [14]:
task_id = 'ToneLearning'

project_dir = os.path.abspath('/bgfs/bchandrasekaran/krs228/data/FLT/')
behav_dir   = os.path.join(project_dir, 'sourcedata', 'behav_files', 'CHA-IBR/')
bids_dir    = os.path.join(project_dir, 'data_bids_noIntendedFor')
#project_dir = os.path.join('/Users/krs228', 'data', 'FLT')
#behav_dir = os.path.join('/Users/krs228/','OneDrive - University of Pittsburgh/','CHA-IBR/')

# bids task names
bids_task_list = ['tonecat', 'stgrid']

#subject_list = ['FLT01', 'FLT04', 'FLT06', 'FLT07',  
                 # 
#                'FLT08', 'FLT09', 'FLT10', 'FLT11', 'FLT12', 'FLT13'] # 
subject_list = ['FLT02', 'FLT03', 'FLT05', ]

for subject_id in subject_list:
    print(subject_id)

    file_list = sorted(glob(behav_dir + '/*%s*/sub-%s*.csv'%(task_id, subject_id)))
    #file_list = [sorted(glob(behav_dir + '/*%s*/sub-%s*.csv'%(task_id, subject_id)))[0]]
    print('file list: ', file_list)
    
    # define initial BOLD acquisition time before task begins during silent gap
    first_acq = 2



    ''' ToneLearning task '''
    if 'ToneLearning' in task_id:
        # in this task, stimuli start 0.5 s after the silent gap starts
        stim_delay = 0.5

        # define the time before the first stimulus starts
        first_stim_delay = first_acq + stim_delay
        
        run_i = 1
        for rx, filename in enumerate(file_list):
            #try:
            print('converting ', filename)
            fpath = os.path.join(behav_dir, filename)
            df = pd.read_csv(fpath)

            # create a temp dataframe of only trials where sounds were presented
            trial_df = df[df.corrAns>0]

            if len(trial_df)<30:
                print('too few trials – incomplete run. Skipping')
            else:
                ''' Stimulus dataframe '''
                # set up stimulus dataframe
                stim_df = pd.DataFrame(columns=['onset', 
                                                'duration', 
                                                'trial_type',
                                                'stim_file'])

                # define onset time (relative to the first stimulus presentation)
                stim_df.onset = trial_df['sound_1.started'] - (trial_df['sound_1.started'].iloc[0]-first_stim_delay)

                # define duration
                # stim_df.duration = trial_df['sound_1.stopped'].astype(np.float16) - trial_df['sound_1.started'].astype(np.float16)
                stim_df.duration = 0.3

                # define stimulus type (based on sound file – HARDCODED)
                stim_df.trial_type = 'sound_'+trial_df.soundfile.str[8:14]
                '''
                stim_df.trial_type[trial_df.soundfile=='stimuli/di1-aN_48000Hz_pol2_S15filt.wav'] = 'di1-aN'
                '''

                # define stimulus soundfile
                stim_df.stim_file = trial_df.soundfile

                ''' Response dataframe '''
                # set up response dataframe
                resp_df = pd.DataFrame(columns=['onset', 
                                                'duration',
                                                'response_time', 
                                                'correct_key',
                                                'trial_type'])

                # define onset time (relative to the first stimulus presentation)
                resp_df.onset = trial_df['sound_1.started'] + trial_df['key_resp.rt']  - (trial_df['sound_1.started'].iloc[0]-first_stim_delay)

                # define duration (arbitrary)
                resp_df.duration = 0.5

                resp_df.response_time = trial_df['key_resp.rt']        
                resp_df.correct_key = trial_df['corrAns']
                resp_df.trial_type = 'resp_'+trial_df['key_resp.keys']

                ''' Feedback dataframe '''
                # set up feedback dataframe
                fb_df = pd.DataFrame(columns=['onset',
                                                'duration', 
                                                'trial_type'])        

                # define onset time (relative to the first stimulus presentation)
                fb_df.onset = trial_df['text_2.started'] - (trial_df['sound_1.started'].iloc[0]-first_stim_delay)

                # feedback is visible from the onset of text_2 to the onset of jitter_cross_post_fb
                fb_df.duration = trial_df['jitter_cross_post_fb.started'] - trial_df['text_2.started']

                # define feedback presented
                fb_df['trial_type'] = np.where(trial_df['key_resp.corr']==1, 'fb_correct', 
                                                (np.where(trial_df.corrAns==0, 'none', 'fb_wrong')))

                ''' combine all three dataframes '''
                bids_df = pd.concat([stim_df, resp_df, fb_df], 
                                    axis=0, join='outer', ignore_index=True)
                bids_df.sort_values(by=['onset'], ignore_index=True,
                                    inplace=True)

                # save to output path
                out_fpath = os.path.join(bids_dir,
                                         'sub-%s'%subject_id, 'func',
                                         'sub-%s_task-%s_run-%02d_events.tsv'%(subject_id, bids_task_list[0], run_i))

                bids_df.to_csv(out_fpath, sep='\t')
                print('saved output to ', out_fpath)
                run_i += 1
           # except:
           #     print('could not process this csv file')
           #     print(df.head)
           #     pass

FLT02
file list:  ['/bgfs/bchandrasekaran/krs228/data/FLT/sourcedata/behav_files/CHA-IBR/01-ToneLearning/sub-FLT02_tone_learning_16-tone_jitter-fb_fmri_2022_Mar_28_1901.csv', '/bgfs/bchandrasekaran/krs228/data/FLT/sourcedata/behav_files/CHA-IBR/01-ToneLearning/sub-FLT02_tone_learning_16-tone_jitter-fb_fmri_2022_Mar_28_1913.csv', '/bgfs/bchandrasekaran/krs228/data/FLT/sourcedata/behav_files/CHA-IBR/01-ToneLearning/sub-FLT02_tone_learning_16-tone_jitter-fb_fmri_2022_Mar_28_1922.csv', '/bgfs/bchandrasekaran/krs228/data/FLT/sourcedata/behav_files/CHA-IBR/01-ToneLearning/sub-FLT02_tone_learning_16-tone_jitter-fb_fmri_2022_Mar_28_1931.csv', '/bgfs/bchandrasekaran/krs228/data/FLT/sourcedata/behav_files/CHA-IBR/01-ToneLearning/sub-FLT02_tone_learning_16-tone_jitter-fb_fmri_2022_Mar_28_1941.csv', '/bgfs/bchandrasekaran/krs228/data/FLT/sourcedata/behav_files/CHA-IBR/01-ToneLearning/sub-FLT02_tone_learning_16-tone_jitter-fb_fmri_2022_Mar_28_1950.csv']
converting  /bgfs/bchandrasekaran/krs228/data

In [15]:
bids_df

Unnamed: 0,onset,duration,trial_type,stim_file,response_time,correct_key
0,2.500000,0.300000,sound_di1-iN,stimuli/di1-iN_48000Hz_pol2_S15filt.wav,,
1,3.622022,0.100000,resp_7,,1.122022,7.0
2,6.079869,0.749823,fb_correct,,,
3,8.494255,0.300000,sound_di4-hN,stimuli/di4-hN_48000Hz_pol2_S15filt.wav,,
4,9.211985,0.100000,resp_2,,0.717730,2.0
...,...,...,...,...,...,...
139,351.160549,0.100000,resp_1,,1.352762,1.0
140,353.188481,0.765435,fb_correct,,,
141,355.819401,0.300000,sound_di3-iN,stimuli/di3-iN_48000Hz_pol2_S15filt.wav,,
142,357.060995,0.100000,resp_1,,1.241594,1.0


In [16]:
bids_df.trial_type.str.split('_', expand=True)[0]

0      sound
1       resp
2         fb
3      sound
4       resp
       ...  
139     resp
140       fb
141    sound
142     resp
143       fb
Name: 0, Length: 144, dtype: object

In [2]:
task_id = 'STgrid'
project_dir = os.path.abspath('/bgfs/bchandrasekaran/krs228/data/FLT/')
behav_dir   = os.path.join(project_dir, 'sourcedata', 'behav_files', 'CHA-IBR/')
bids_dir    = os.path.join(project_dir, 'data_bids_noIntendedFor')
#project_dir = os.path.join('/Users/krs228', 'data', 'FLT')
#behav_dir = os.path.join('/Users/krs228/','OneDrive - University of Pittsburgh/','CHA-IBR/')

# bids task names
bids_task_list = ['tonecat', 'stgrid']

subject_list = [
                #'FLT01', 'FLT04', 'FLT06', 'FLT07',  
                #'FLT02', 'FLT03', 'FLT05', # 
                #'FLT08', 'FLT09', 'FLT10', 'FLT11', 'FLT12', 'FLT13',
                'FLT14', 'FLT15'] # 

for subject_id in subject_list:
    print(subject_id)

    file_list = sorted(glob(behav_dir + '/*%s*/sub-%s*.csv'%(task_id, subject_id)))
    print('file list: ', file_list)
    
    # define initial BOLD acquisition time before task begins during silent gap
    first_acq = 2


    if 'STgrid' in task_id:
        stim_delay = 0.4

        # define the time before the first stimulus starts
        first_stim_delay = first_acq + stim_delay

        for rx, filename in enumerate(file_list):
            print('converting ', filename)
            fpath = os.path.join(behav_dir, filename)
            df = pd.read_csv(fpath)
            
            if len(df) < 30:
                print('too few trials. skipping')
            else:
                # define output path
                out_fpath = os.path.join(project_dir, 'data_bids_noIntendedFor', 
                                         'sub-%s'%subject_id, 'func',
                                         'sub-%s_task-%s_run-%02d_events.tsv'%(subject_id, bids_task_list[1], rx+1))

                # set up dataframe
                bids_df = pd.DataFrame(columns=['onset', 'duration', 'trial_type',
                                                'response_time', 'stim_file'])

                bids_df.onset = df['sound_stimulus.started']-(df['sound_stimulus.started'][1]-first_stim_delay)
                bids_df.duration[df['sound_stimulus.started']>0] = 1.0

                #bids_df.trial_type[df['sound_stimulus.started'] > 0]   = 'sound'
                #bids_df.trial_type[df.soundFile == 'stimuli/null.wav'] = 'silent'
                for sx, stim in enumerate(df.soundFile):
                    if isinstance(stim, str):
                        if 'S15' in stim:
                            bids_df.trial_type[sx] = '_'.join(stim.split('_')[2:4]) 
                        elif 'null' in stim:
                            bids_df.trial_type[sx] = 'null'

                # define response time (minus stim delay)
                bids_df.response_time = df['key_resp.rt'] - stim_delay

                bids_df.stim_file = df.soundFile

                # drop the first row if it's not a stimulus
                try:
                    # is an error if index has been removed
                    pd.isna(bids_df.stim_file[0]) 
                    bids_df.drop(axis=0, index=0, inplace=True)
                except:
                    pass

                print(bids_df)

                # save to output path
                #bids_df.to_csv(out_fpath, sep='\t')
                print('saved output to ', out_fpath)


FLT14
file list:  ['/bgfs/bchandrasekaran/krs228/data/FLT/sourcedata/behav_files/CHA-IBR/02-STgrid/sub-FLT14_ST_grid-4x4_task-vis_2022_Sep_21_1812.csv', '/bgfs/bchandrasekaran/krs228/data/FLT/sourcedata/behav_files/CHA-IBR/02-STgrid/sub-FLT14_ST_grid-4x4_task-vis_2022_Sep_21_1823.csv']
converting  /bgfs/bchandrasekaran/krs228/data/FLT/sourcedata/behav_files/CHA-IBR/02-STgrid/sub-FLT14_ST_grid-4x4_task-vis_2022_Sep_21_1812.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bids_df.duration[df['sound_stimulus.started']>0] = 1.0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bids_df.trial_type[sx] = '_'.join(stim.split('_')[2:4])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bids_df.trial_type[sx] = 'null'


         onset duration trial_type  response_time  \
1     2.400000      1.0     stim_8            NaN   
2     6.379690      1.0     stim_8       0.169499   
3    10.375692      1.0     stim_8            NaN   
4    14.388463      1.0     stim_8            NaN   
5    18.384924      1.0     stim_8            NaN   
..         ...      ...        ...            ...   
121        NaN      NaN        NaN            NaN   
122        NaN      NaN        NaN            NaN   
123        NaN      NaN        NaN            NaN   
124        NaN      NaN        NaN            NaN   
125        NaN      NaN        NaN            NaN   

                                           stim_file  
1    stimuli/2022-2-8_GenTestGrid_stim_8_S15filt.wav  
2    stimuli/2022-2-8_GenTestGrid_stim_8_S15filt.wav  
3    stimuli/2022-2-8_GenTestGrid_stim_8_S15filt.wav  
4    stimuli/2022-2-8_GenTestGrid_stim_8_S15filt.wav  
5    stimuli/2022-2-8_GenTestGrid_stim_8_S15filt.wav  
..                               

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bids_df.duration[df['sound_stimulus.started']>0] = 1.0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bids_df.trial_type[sx] = '_'.join(stim.split('_')[2:4])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bids_df.trial_type[sx] = 'null'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bids_df.duration[df['soun

         onset duration trial_type  response_time  \
1     2.400000      1.0    stim_10            NaN   
2     6.395868      1.0    stim_10            NaN   
3    10.392190      1.0    stim_10            NaN   
4    14.405143      1.0    stim_10       0.045987   
5    18.401590      1.0    stim_10            NaN   
..         ...      ...        ...            ...   
121        NaN      NaN        NaN            NaN   
122        NaN      NaN        NaN            NaN   
123        NaN      NaN        NaN            NaN   
124        NaN      NaN        NaN            NaN   
125        NaN      NaN        NaN            NaN   

                                            stim_file  
1    stimuli/2022-2-8_GenTestGrid_stim_10_S15filt.wav  
2    stimuli/2022-2-8_GenTestGrid_stim_10_S15filt.wav  
3    stimuli/2022-2-8_GenTestGrid_stim_10_S15filt.wav  
4    stimuli/2022-2-8_GenTestGrid_stim_10_S15filt.wav  
5    stimuli/2022-2-8_GenTestGrid_stim_10_S15filt.wav  
..                         

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bids_df.duration[df['sound_stimulus.started']>0] = 1.0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bids_df.trial_type[sx] = 'null'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bids_df.trial_type[sx] = '_'.join(stim.split('_')[2:4])


In [20]:
df.head(20)

Unnamed: 0,soundFile,visual_stim,blocks.thisRepN,blocks.thisTrialN,blocks.thisN,blocks.thisIndex,trials.thisRepN,trials.thisTrialN,trials.thisN,trials.thisIndex,...,fixation_cross_2.started,fixation_cross_2.stopped,participant,session,run,date,expName,psychopyVersion,frameRate,Unnamed: 39
0,,,,,,,,,,,...,,,sub-FLT15,ses-001,run-02,2022_Sep_28_1152,ST_grid-4x4_task-vis,2021.2.3,59.989694,
1,stimuli/null.wav,+,0.0,0.0,0.0,19.0,0.0,0.0,0.0,4.0,...,,,sub-FLT15,ses-001,run-02,2022_Sep_28_1152,ST_grid-4x4_task-vis,2021.2.3,59.989694,
2,stimuli/null.wav,+,0.0,0.0,0.0,19.0,0.0,1.0,1.0,3.0,...,,,sub-FLT15,ses-001,run-02,2022_Sep_28_1152,ST_grid-4x4_task-vis,2021.2.3,59.989694,
3,stimuli/null.wav,+,0.0,0.0,0.0,19.0,0.0,2.0,2.0,2.0,...,,,sub-FLT15,ses-001,run-02,2022_Sep_28_1152,ST_grid-4x4_task-vis,2021.2.3,59.989694,
4,stimuli/null.wav,x,0.0,0.0,0.0,19.0,0.0,3.0,3.0,0.0,...,,,sub-FLT15,ses-001,run-02,2022_Sep_28_1152,ST_grid-4x4_task-vis,2021.2.3,59.989694,
5,stimuli/null.wav,+,0.0,0.0,0.0,19.0,0.0,4.0,4.0,1.0,...,,,sub-FLT15,ses-001,run-02,2022_Sep_28_1152,ST_grid-4x4_task-vis,2021.2.3,59.989694,
6,stimuli/null.wav,,0.0,0.0,0.0,19.0,,,,,...,,,sub-FLT15,ses-001,run-02,2022_Sep_28_1152,ST_grid-4x4_task-vis,2021.2.3,59.989694,
7,stimuli/2022-2-8_GenTestGrid_stim_14_S15filt.wav,+,0.0,1.0,1.0,13.0,0.0,0.0,0.0,1.0,...,,,sub-FLT15,ses-001,run-02,2022_Sep_28_1152,ST_grid-4x4_task-vis,2021.2.3,59.989694,
8,stimuli/2022-2-8_GenTestGrid_stim_14_S15filt.wav,x,0.0,1.0,1.0,13.0,0.0,1.0,1.0,0.0,...,,,sub-FLT15,ses-001,run-02,2022_Sep_28_1152,ST_grid-4x4_task-vis,2021.2.3,59.989694,
9,stimuli/2022-2-8_GenTestGrid_stim_14_S15filt.wav,+,0.0,1.0,1.0,13.0,0.0,2.0,2.0,2.0,...,,,sub-FLT15,ses-001,run-02,2022_Sep_28_1152,ST_grid-4x4_task-vis,2021.2.3,59.989694,


In [15]:
bids_df.head(20)

Unnamed: 0,onset,duration,trial_type,response_time,stim_file
1,2.4,1.0,,,stimuli/null.wav
2,6.396365,1.0,,,stimuli/null.wav
3,10.392615,1.0,,,stimuli/null.wav
4,14.405394,1.0,,0.229535,stimuli/null.wav
5,18.401831,1.0,,,stimuli/null.wav
6,,,,,stimuli/null.wav
7,22.397909,1.0,stim_14,,stimuli/2022-2-8_GenTestGrid_stim_14_S15filt.wav
8,26.394266,1.0,stim_14,0.210388,stimuli/2022-2-8_GenTestGrid_stim_14_S15filt.wav
9,30.3906,1.0,stim_14,,stimuli/2022-2-8_GenTestGrid_stim_14_S15filt.wav
10,34.403484,1.0,stim_14,,stimuli/2022-2-8_GenTestGrid_stim_14_S15filt.wav


In [36]:
first_df = bids_df.groupby('trial_type', sort=False, as_index=False).first()

first_df.drop(first_df[first_df.trial_type=='null'].index, inplace=True)

first_df.duration = 20

In [38]:
first_df.trial_type = 'sound'

In [39]:
first_df

Unnamed: 0,trial_type,onset,duration,response_time,stim_file
1,sound,22.397909,20,0.210388,stimuli/2022-2-8_GenTestGrid_stim_14_S15filt.wav
2,sound,62.39402,20,0.147137,stimuli/2022-2-8_GenTestGrid_stim_1_S15filt.wav
3,sound,82.391925,20,,stimuli/2022-2-8_GenTestGrid_stim_2_S15filt.wav
4,sound,102.390007,20,0.555418,stimuli/2022-2-8_GenTestGrid_stim_3_S15filt.wav
5,sound,122.404412,20,-0.019619,stimuli/2022-2-8_GenTestGrid_stim_4_S15filt.wav
6,sound,142.40247,20,0.149665,stimuli/2022-2-8_GenTestGrid_stim_5_S15filt.wav
7,sound,162.400312,20,0.026889,stimuli/2022-2-8_GenTestGrid_stim_6_S15filt.wav
8,sound,182.39836,20,0.077583,stimuli/2022-2-8_GenTestGrid_stim_15_S15filt.wav
9,sound,202.39633,20,-0.216713,stimuli/2022-2-8_GenTestGrid_stim_8_S15filt.wav
10,sound,222.394116,20,-0.022231,stimuli/2022-2-8_GenTestGrid_stim_11_S15filt.wav
