# Teaching task event files
Natalia Vélez, March 2022

In [1]:
import os,sys,warnings
import pandas as pd
import numpy as np
from os.path import join as opj
from sklearn.preprocessing import scale

sys.path.append('..')
from utils import str_extract, int_extract, gsearch, read_json, write_json

sys.path.append('../2_behavioral')
import teaching_models as teach

Loading teaching problems
{'A': [[0, 0, 1, 1, 0, 0],
       [0, 1, 1, 1, 1, 0],
       [1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1],
       [0, 1, 1, 1, 1, 0],
       [0, 0, 1, 1, 0, 0]],
 'B': [[1, 1, 1, 0, 0, 0],
       [1, 1, 1, 0, 0, 0],
       [1, 1, 1, 1, 0, 0],
       [0, 0, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]],
 'C': [[0, 0, 0, 1, 1, 1],
       [0, 0, 0, 1, 1, 1],
       [0, 0, 1, 1, 1, 1],
       [0, 0, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]],
 'D': [[0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 1, 1, 0, 0],
       [1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1]]}
Participants excluded from analysis:
[3, 17]
Cleaning up human behavioral data
    subject  run  block_idx  ex_idx first_movement   start  problem  \
0         1    1          0       0         [0, 1]  [0, 0]       22   
1         1    1          0       1         [0, 1]  [0, 0]       22   
3         1    1          1   

Load valid participants:

In [2]:
valid_participants = np.loadtxt('../1_preprocessing/outputs/valid_participants.txt')
valid_participants = ['sub-%02d' % s for s in valid_participants]
print(valid_participants)

['sub-01', 'sub-02', 'sub-04', 'sub-05', 'sub-06', 'sub-07', 'sub-08', 'sub-09', 'sub-10', 'sub-11', 'sub-12', 'sub-13', 'sub-14', 'sub-15', 'sub-16', 'sub-18', 'sub-19', 'sub-20', 'sub-21', 'sub-22', 'sub-23', 'sub-24', 'sub-25', 'sub-26', 'sub-27', 'sub-28', 'sub-29', 'sub-30']


## Vanilla event files (for top-level folder)

Find behavioral files:

In [3]:
data_dir = '../../'
original_files = gsearch(data_dir, 'behavioral', '*.json')
original_files = [f for f in original_files if 'run-practice' not in f] # trim out practice runs
original_files.sort()

print(f'Found {len(original_files)} behavioral files')
print(*original_files[:10], sep='\n')

Found 299 behavioral files
../../behavioral/sub-01_task-teaching_run-01_behavioral_1628798255.json
../../behavioral/sub-01_task-teaching_run-02_behavioral_1628798680.json
../../behavioral/sub-01_task-teaching_run-03_behavioral_1628799162.json
../../behavioral/sub-01_task-teaching_run-04_behavioral_1628800012.json
../../behavioral/sub-01_task-teaching_run-05_behavioral_1628800423.json
../../behavioral/sub-01_task-teaching_run-06_behavioral_1628800796.json
../../behavioral/sub-01_task-teaching_run-07_behavioral_1628801177.json
../../behavioral/sub-01_task-teaching_run-08_behavioral_1628801584.json
../../behavioral/sub-01_task-teaching_run-09_behavioral_1628802076.json
../../behavioral/sub-01_task-teaching_run-10_behavioral_1628802432.json


Helper function: Transforms raw data into BIDS-compliant events files

In [4]:
def data_wrangling(f):

    # load input data
    in_df = pd.read_json(f)

    # copy input data
    out_df = (
        in_df
        .copy()
        .rename(columns={
            'ons': 'onset',
            'dur': 'duration',
            'type': 'trial_type',
            'problem_idx': 'block_no',
            'ex_idx': 'trial_no',
            'problem': 'stimuli',
            'order': 'stimuli_order',
            'rt': 'response_time'
        })
    )

    # create additional rows for motor response
    choose_trials = out_df[~pd.isna(out_df.movements)]
    rate_trials = out_df[out_df.trial_type == 'rate']

    # add cursor movements from choose trials
    movement_list = []
    for _, row in choose_trials.iterrows():
        movements = row.movements
        movement_dict = [{'trial_type': 'motor', 'onset': row.onset + m[1], 'duration': 0} for m in movements if m[1] >= 0]
        movement_list += movement_dict

    # add cursor movements from rate trials
    for _, row in rate_trials.iterrows():
        movement_dict = {'trial_type': 'motor', 'onset': row.onset+row.response_time, 'duration': 0}
        movement_list.append(movement_dict)

    # add to main event df
    movement_df = pd.DataFrame(movement_list)
    out_df = pd.concat([out_df, movement_df])

    # make a single "response" column
    response = np.empty(out_df.shape[0])
    response = np.where(out_df.example.isna(), out_df.rating, out_df.example)
    out_df['response'] = response

    # reorganize df
    out_df = (
        out_df
        .sort_values(by='onset')
        .dropna(axis='index', subset=['onset'])
        .reset_index(drop=True)
        [['onset', 'duration', 'trial_type', 'block_no', 'trial_no', 'stimuli',
          'stimuli_order', 'response', 'response_time']]
    )
    
    out_df = out_df[~out_df.trial_type.isin(['pause', 'pre'])] # don't explicitly model fixation periods

    return out_df

Main loop: Iterate through all behavioral files

In [5]:
data_dir = '../../BIDS_data/'

for f in original_files:
    
#     print(f'Loading file: {f}')
    sub = str_extract('sub-[0-9]{2}', f)
    run = str_extract('run-[0-9]{2}', f)
    out_f = opj(data_dir, sub, 'func', f'{sub}_task-teaching_{run}_events.tsv')
    
#     print(f'Saving to: {out_f}\n')
    out_df = data_wrangling(f)
    out_df.to_csv(out_f, sep='\t', index=False, na_rep='n/a')

## Model-generated regressors (for GLM)

Load model regressors

In [6]:
model_reg = (
    pd.read_csv('../2_behavioral/outputs/model_regressor_values.csv')
    .rename(columns={'block_idx': 'block_no', 'ex_idx': 'trial_no', 'problem': 'stimuli'})
)
model_reg.head()

Unnamed: 0,subject,run,block_no,trial_no,stimuli,pH,pTrue,KL,logp
0,1,1,0,0,22,"[0.3333333333333333, 0.3333333333333333, 0.0, ...",-0.126543,0.516503,-inf
1,1,1,0,1,22,"[0.5, 0.0, 0.0, 0.5]",0.040123,1.2096,-inf
2,1,1,1,0,18,"[0.5, 0.5, 0.0, 0.0]",0.040123,2.341827,-inf
3,1,1,1,1,18,"[0.5, 0.5, 0.0, 0.0]",0.040123,-1.223922,-inf
4,1,1,1,2,18,"[0.5, 0.5, 0.0, 0.0]",0.040123,-1.223922,-inf


Helper function: Merge model-based regressors with vanilla event files

In [7]:
def make_model_reg(f, reg_df=model_reg):

    # get event data
    event_df = data_wrangling(f)
    event_df.head()

    # get regressors
    sub = int_extract('(?<=sub-)[0-9]{2}', f)
    run = int_extract('(?<=run-)[0-9]{2}', f)
    run_reg = reg_df[(reg_df.subject == sub) & (reg_df.run == run)]
    run_reg = run_reg[['block_no', 'trial_no', 'pTrue', 'KL']]

    # split "show" trials based on whether a hint was selected
    choose_trials = event_df[event_df.trial_type == 'choose']
    missed_trial = pd.isnull(choose_trials.response)
    show_renamed = np.where(missed_trial, 'show_missed', 'show_new')
    event_df.loc[event_df.trial_type == 'show', 'trial_type'] = show_renamed

    # merge with event files
    show_trials = (
        event_df
        [event_df.trial_type == 'show_new']
        .copy()
        .drop(columns='trial_type')
        .merge(run_reg, on=['block_no', 'trial_no'], how='left')
        .melt(id_vars=['onset', 'duration', 'block_no', 'trial_no', 'stimuli', 'stimuli_order', 'response', 'response_time'],
              value_vars=['pTrue', 'KL'], var_name='trial_type', value_name='value')
    )

    # put everything together
    model_events = pd.concat([event_df, show_trials])
    model_events = (
        model_events
        .assign(trial_type=model_events.trial_type.astype('category').cat.reorder_categories(event_df.trial_type.unique().tolist() + ['pTrue', 'KL']))
        .sort_values(by=['onset', 'duration', 'trial_type'])
        [['onset', 'duration', 'stimuli', 'trial_type', 'value']]
        .reset_index(drop=True)
    )

    return model_events

Save to file:

In [8]:
out_dir = '../../BIDS_data/derivatives/model_events'
os.makedirs(out_dir, exist_ok=True)

for f in original_files:
    sub = str_extract('sub-[0-9]{2}', f)
    run = str_extract('run-[0-9]{2}', f)
    out_f = opj(out_dir, sub, 'func', f'{sub}_task-teaching_{run}_model-main_events.tsv')
    
    if sub in valid_participants:
        out_df = make_model_reg(f)        
        os.makedirs(opj(out_dir, sub, 'func'), exist_ok=True)
        out_df.to_csv(out_f, sep='\t', index=False, na_rep='n/a')

## Control model: Time regressor

Load control regressors:

In [11]:
time_df = (
    pd.read_csv('../2_behavioral/outputs/control_regressor_values.csv')
    .rename(columns={
            'block_idx': 'block_no',
            'ex_idx': 'trial_no',
            'problem': 'stimuli',
        })
    )
print(time_df.shape)
time_df.head()

(3213, 7)


Unnamed: 0,subject,run,block_no,trial_no,stimuli,pTrue,KL
0,1,7,2,0,0,-0.25463,-1.944137
1,1,7,2,1,0,-0.00463,-1.526076
2,1,7,2,2,0,0.24537,3.446987
3,1,1,2,0,1,-0.25463,-1.944137
4,1,1,2,1,1,-0.00463,-1.526076


Make event files:

In [12]:
for f in original_files:
    sub = str_extract('sub-[0-9]{2}', f)
    run = str_extract('run-[0-9]{2}', f)
    out_f = opj(out_dir, sub, 'func', f'{sub}_task-teaching_{run}_model-control_events.tsv')
    
    if sub in valid_participants:
        out_df = make_model_reg(f, reg_df=time_df)        
        os.makedirs(opj(out_dir, sub, 'func'), exist_ok=True)
        out_df.to_csv(out_f, sep='\t', index=False, na_rep='n/a')

## Blended model (partialling out the effect of time)

Make event files that blend model-based and control (time) regressors:

In [13]:
out_dir = '../../BIDS_data/derivatives/model_events'
os.makedirs(out_dir, exist_ok=True)

for f in original_files:
    sub = str_extract('sub-[0-9]{2}', f)
    run = str_extract('run-[0-9]{2}', f)
    out_f = opj(out_dir, sub, 'func', f'{sub}_task-teaching_{run}_model-blended_events.tsv')

    if sub in valid_participants:
        model_sub = make_model_reg(f)
        time_sub = make_model_reg(f, reg_df=time_df).dropna()
        time_sub = time_sub[time_sub['trial_type'] == 'pTrue']
        time_sub['trial_type'] = 'time'

        out_df = pd.concat([model_sub, time_sub]).sort_values(by=['onset', 'trial_type']).reset_index(drop=True)    
        os.makedirs(opj(out_dir, sub, 'func'), exist_ok=True)
        out_df.to_csv(out_f, sep='\t', index=False, na_rep='n/a')