# Teaching task event files
Natalia Vélez, March 2022

In [None]:
import os,sys
import pandas as pd
import numpy as np
from os.path import join as opj

sys.path.append('..')
from utils import str_extract, int_extract, gsearch, read_json, write_json

sys.path.append('../2_behavioral')
import teaching_models as teach

Load valid participants:

In [None]:
valid_participants = np.loadtxt('../1_preprocessing/outputs/valid_participants.txt')
valid_participants = ['sub-%02d' % s for s in valid_participants]
print(valid_participants)

### Vanilla event files (for top-level folder)

Find behavioral files:

In [None]:
data_dir = '../../'
original_files = gsearch(data_dir, 'behavioral', '*.json')
original_files = [f for f in original_files if 'run-practice' not in f] # trim out practice runs
original_files.sort()

print(f'Found {len(original_files)} behavioral files')
print(*original_files[:10], sep='\n')

Helper function: Transforms raw data into BIDS-compliant events files

In [None]:
def data_wrangling(f):

    # load input data
    in_df = pd.read_json(f)

    # copy input data
    out_df = (
        in_df
        .copy()
        .rename(columns={
            'ons': 'onset',
            'dur': 'duration',
            'type': 'trial_type',
            'problem_idx': 'block_no',
            'ex_idx': 'trial_no',
            'problem': 'stimuli',
            'order': 'stimuli_order',
            'rt': 'response_time'
        })
    )

    # create additional rows for motor response
    choose_trials = out_df[~pd.isna(out_df.movements)]
    rate_trials = out_df[out_df.trial_type == 'rate']

    # add cursor movements from choose trials
    movement_list = []
    for _, row in choose_trials.iterrows():
        movements = row.movements
        movement_dict = [{'trial_type': 'motor', 'onset': row.onset + m[1], 'duration': 0} for m in movements if m[1] >= 0]
        movement_list += movement_dict

    # add cursor movements from rate trials
    for _, row in rate_trials.iterrows():
        movement_dict = {'trial_type': 'motor', 'onset': row.onset+row.response_time, 'duration': 0}
        movement_list.append(movement_dict)

    # add to main event df
    movement_df = pd.DataFrame(movement_list)
    out_df = pd.concat([out_df, movement_df])

    # make a single "response" column
    response = np.empty(out_df.shape[0])
    response = np.where(out_df.example.isna(), out_df.rating, out_df.example)
    out_df['response'] = response

    # reorganize df
    out_df = (
        out_df
        .sort_values(by='onset')
        .dropna(axis='index', subset=['onset'])
        .reset_index(drop=True)
        [['onset', 'duration', 'trial_type', 'block_no', 'trial_no', 'stimuli',
          'stimuli_order', 'response', 'response_time']]
    )
    
    out_df = out_df[~out_df.trial_type.isin(['pause', 'pre'])] # don't explicitly model fixation periods

    return out_df

Main loop: Iterate through all behavioral files

In [None]:
data_dir = '../../BIDS_data/'

for f in original_files:
    
#     print(f'Loading file: {f}')
    sub = str_extract('sub-[0-9]{2}', f)
    run = str_extract('run-[0-9]{2}', f)
    out_f = opj(data_dir, sub, 'func', f'{sub}_task-teaching_{run}_events.tsv')
    
#     print(f'Saving to: {out_f}\n')
    out_df = data_wrangling(f)
    out_df.to_csv(out_f, sep='\t', index=False, na_rep='n/a')

### Model-generated regressors (for GLM)

Load model regressors

In [None]:
model_reg = (
    pd.read_csv('../2_behavioral/outputs/model_regressor_values.csv')
    .rename(columns={'block_idx': 'block_no', 'ex_idx': 'trial_no', 'problem': 'stimuli'})
)
model_reg.head()

In [None]:
model_reg[(model_reg.subject == 1) & (model_reg.run == 3)]

Helper function: Merge model-based regressors with vanilla event files

In [None]:
def make_model_reg(f):

    # get event data
    event_df = data_wrangling(f)
    event_df.head()

    # get regressors
    sub = int_extract('(?<=sub-)[0-9]{2}', f)
    run = int_extract('(?<=run-)[0-9]{2}', f)
    run_reg = model_reg[(model_reg.subject == sub) & (model_reg.run == run)]
    run_reg = run_reg[['block_no', 'trial_no', 'pTrue', 'KL']]

    # split "show" trials based on whether a hint was selected
    choose_trials = event_df[event_df.trial_type == 'choose']
    missed_trial = pd.isnull(choose_trials.response)
    show_renamed = np.where(missed_trial, 'show_missed', 'show_new')
    event_df.loc[event_df.trial_type == 'show', 'trial_type'] = show_renamed


    # merge with event files
    show_trials = (
        event_df
        [event_df.trial_type == 'show_new']
        .copy()
        .drop(columns='trial_type')
        .merge(run_reg, on=['block_no', 'trial_no'], how='left')
        .melt(id_vars=['onset', 'duration', 'block_no', 'trial_no', 'stimuli', 'stimuli_order', 'response', 'response_time'],
              value_vars=['pTrue', 'KL'], var_name='trial_type', value_name='value')
    )

    # put everything together
    model_events = pd.concat([event_df, show_trials])
    model_events = (
        model_events
        .assign(trial_type=model_events.trial_type.astype('category').cat.reorder_categories(event_df.trial_type.unique().tolist() + ['pTrue', 'KL']))
        .sort_values(by=['onset', 'duration', 'trial_type'])
        [['onset', 'duration', 'stimuli', 'trial_type', 'value']]
        .reset_index(drop=True)
    )

    return model_events

Save to file:

In [None]:
out_dir = '../../BIDS_data/derivatives/model_events'
os.makedirs(out_dir, exist_ok=True)

for f in original_files:
    sub = str_extract('sub-[0-9]{2}', f)
    run = str_extract('run-[0-9]{2}', f)
    out_f = opj(out_dir, sub, 'func', f'{sub}_task-teaching_{run}_model-main_events.tsv')
    
    if sub in valid_participants:
        print(f'Loading file: {f}')
        out_df = make_model_reg(f)
        
        print(f'Saving to: {out_f}\n')
        os.makedirs(opj(out_dir, sub, 'func'), exist_ok=True)
        out_df.to_csv(out_f, sep='\t', index=False, na_rep='n/a')

## Exploratory: Identifiable vs. non-identifiable problems

Helper function: Does a problem contain pixels that are unique to the correct answer? 

In [None]:
def is_identifiable(prob):

    nonzero_indices = [np.ravel_multi_index(np.nonzero(h), (6,6)) for h in prob.values()]
    unique_points = [len(np.setdiff1d(nonzero_indices[0], alt_h)) for alt_h in nonzero_indices[1:]]

    return all(unique_points)

Check: Which problems are identifiable?

In [None]:
identifiable_bool = [is_identifiable(prob) for prob in teach.problems]
print(f'{sum(identifiable_bool)} problems are uniquely identifiable')

Sanity checks: Plot an identifiable problem:

In [None]:
ident_idx = 0
print(identifiable_bool[ident_idx])
_ = teach.plot_problem(ident_idx)

Plot a non-identifiable problem:

In [None]:
nonident_idx = 2
print(identifiable_bool[nonident_idx])
_ = teach.plot_problem(nonident_idx)

Make events

In [None]:
f = original_files[0]
data_wrangling(f)