# Preprocess event data

In [1]:
import pandas as pd
from config import *

In [55]:
experiment = "ANTI_SACCADE"
df_raw = pd.read_parquet(f"{CLEANED_DIR}/{experiment}.pq")

In [None]:
df_raw

## Experiment specific preprocessing

### Anti-saccade

In [None]:
def transform_numeric_columns(df):
    nummeric_columns = ['participant_id', 'trial_id', 'time', 'stimulus_x', 'stimulus_y', 'start_time', 'end_time', 
                        'duration', 'x', 'y', 'avg_pupil_size', 'start_x', 'start_y', 'end_x', 'end_y', 'amplitude', 
                        'peak_velocity', 'time_elapsed', 'delay']
    for col in nummeric_columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')
        
    return df

def coalesce_time_elapsed(df):
    return (
        df.assign(
            time_elapsed = lambda x: x[['time_elapsed', 'delay']].bfill(axis=1)['time_elapsed']
        )
        .drop(columns=['delay'])
    )

def fill_values_side(df):
    return(
        df.sort_values(['participant_id', 'trial_id', 'time'])
        .groupby(['participant_id', 'trial_id'], group_keys=False)[df.columns]
        .apply(lambda g: g.assign(side=g['side'].ffill().bfill()))
    )

def stimulus_onset_time(df):
    return (
        df.sort_values(['participant_id', 'trial_id', 'time'])
        .groupby(['participant_id', 'trial_id'], group_keys=False)[df.columns]
        .apply(lambda g: g.assign(
                time = g.apply(lambda row: 
                    row['time'] if row['colour'] != '255 0 0' 
                        else (g.loc[(g['colour'] == '255 255 255'), 'time'].iloc[0] + 1000 * g.loc[(g['event'] == 'TRIAL_VAR_DATA'), 'time_elapsed'].iloc[0]), 
                    axis=1))
               )
    )

In [59]:
df_exp_trans = (
    df_raw.pipe(transform_numeric_columns)
    .pipe(coalesce_time_elapsed)
    .pipe(fill_values_side)
    .pipe(stimulus_onset_time)
)

In [None]:
df_exp_trans

## General preprocessing

In [None]:
def coalesce_time(df):
    df.loc[:,"time"] = df[["time", "end_time"]].bfill(axis=1)
    
    return df
    
def remove_start_events(df):
    mask = (df["event"] == "SFIX") | (df["event"] == "SSACC")
    df_masked = df.loc[~mask,:]
    
    return df_masked

def group_df(df):
    grouped_df = df.sort_values(["participant_id", "trial_id", "time"]).groupby(["participant_id", "trial_id"])#[df.columns]
    
    return grouped_df

def standardise_time(df):
    
    grouped_df = group_df(df)
    
    df.loc[:,"time"] = df["time"] - grouped_df.time.transform('min')
    df.loc[:,"start_time"] = df["start_time"] - grouped_df.time.transform('min')
    df.loc[:,"end_time"] = df["end_time"] - grouped_df.time.transform('min')
    
    return df

def fill_values(df):
    
    grouped_df = group_df(df)
    df.loc[:,"colour"] = grouped_df["colour"].ffill()
    df.loc[:,"stimulus_x"] = grouped_df["stimulus_x"].ffill()
    df.loc[:,"stimulus_y"] = grouped_df["stimulus_y"].ffill()
    
    return df

In [62]:
df_trans = (
    df_exp_trans.pipe(remove_start_events)
    .pipe(coalesce_time)
    .pipe(standardise_time)
    .pipe(fill_values)
)

In [63]:
df_trans

Unnamed: 0,experiment,participant_id,trial_id,time,event,colour,stimulus_x,stimulus_y,eye,start_time,...,y,avg_pupil_size,start_x,start_y,end_x,end_y,amplitude,peak_velocity,side,time_elapsed
0,ANTI_SACCADE,106,0.0,0.000000,TRIALID,,,,,,...,,,,,,,,,right,
1,ANTI_SACCADE,106,0.0,25.000000,START,,,,,,...,,,,,,,,,right,
4,ANTI_SACCADE,106,0.0,60.000000,FIXPOINT,255 255 255,960.0,540.0,,,...,,,,,,,,,right,
17,ANTI_SACCADE,106,0.0,2756.673455,FIXPOINT,255 0 0,1290.0,540.0,,,...,,,,,,,,,right,
18,ANTI_SACCADE,106,0.0,3761.000000,TRIAL_VAR_DATA,255 0 0,1290.0,540.0,,,...,,,,,,,,,right,2.696673
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118559,ANTI_SACCADE,404,15.0,2270.000000,ESACC,255 0 0,629.0,540.0,R,2231.0,...,,,1984.9,,2159.3,,5.25,453.0,left,
118560,ANTI_SACCADE,404,15.0,2272.000000,ESACC,255 0 0,629.0,540.0,L,2238.0,...,,,1877.9,28.1,1945.0,,1.30,179.0,left,
118561,ANTI_SACCADE,404,15.0,2466.000000,EFIX,255 0 0,629.0,540.0,R,2271.0,...,,871.0,,,,,,,left,
118562,ANTI_SACCADE,404,15.0,2630.000000,EFIX,255 0 0,629.0,540.0,L,2273.0,...,12.2,1080.0,,,,,,,left,
