In [20]:
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(''))) + '/src')
from config import *

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import numpy as np
from functools import reduce
from tqdm import tqdm

experiment = "KING_DEVICK"

df_event = pd.read_parquet(PREPROCESSED_DIR / f"{experiment}_events.pq").reset_index(drop=True)
df_sample = (pd.read_parquet(PREPROCESSED_DIR / f'{experiment}_samples.pq')
 .sort_values(["experiment", "participant_id", "trial_id","time"])
)

def rename_columns(df):
    """Renames columns by joining multi-level column names with different delimiters."""
    # Iterate over all column names
    df.columns = [f"{col[0]}" if col[1] == '' else f"{col[0]}_{col[1]}" for col in df.columns.values]
    return df



# Event

In [22]:
def get_avg_mistakes_pr_trial(df: pd.DataFrame) -> pd.DataFrame:
    df = (df
          .query("event == 'TRIAL_VAR_DATA'")
          .groupby(["experiment", "participant_id"])
          .agg(avg_mistakes_pr_trial = ('marks', 'mean'))
          .reset_index()
    )
    return df
    
def get_avg_time_elapsed_pr_trial(df: pd.DataFrame) -> pd.DataFrame:
    df = (df
          .query("event == 'TRIAL_VAR_DATA'")
          .groupby(["experiment", "participant_id"])
          .agg(avg_time_elapsed_pr_trial = ('time_elapsed', 'mean'))
          .reset_index()
    )
    return df



def get_pre_calculated_metrics_feature(df: pd.DataFrame) -> pd.DataFrame:
    """
    Returns pd.Dataframe with columns ['experiment','participant_id', X_FEATURES],
    where X_FEATURES is a collection of features found by the following cartesian product:
    {'peak_velocity', 'amplitude', 'duration', 'avg_pupil_size'} x {np.mean, np.min, np.max, np.median, np.std}
    """
    features_df = (df.groupby(["experiment", "participant_id"])
    .agg(
        mean_peak_velocity_sacc = ('peak_velocity', lambda x: x[df.loc[x.index, 'event'] == 'ESACC'].mean()),
        mean_amplitude_sacc = ('amplitude', lambda x: x[df.loc[x.index, 'event'] == 'ESACC'].mean()),
        mean_duration_sacc = ('duration', lambda x: x[df.loc[x.index, 'event'] == 'ESACC'].mean()),
        mean_duration_fix = ('duration', lambda x: x[df.loc[x.index, 'event'] == 'EFIX'].mean()),
        mean_pupil_size_fix = ('avg_pupil_size', lambda x: x[df.loc[x.index, 'event'] == 'EFIX'].mean()),
    )
    .reset_index()
    )    
    return features_df



# Sample

In [23]:
def get_acceleration_feature(df: pd.DataFrame) -> pd.DataFrame:
    """Finds acceleration features for anti saccade experiment

    Args:
        df (pd.DataFrame): Dataframe with raw samples

    Returns:
        pd.DataFrame: Dataframe with columns ['experiment','participant_id', X_FEATURES]
        where X_FEATURES is a collection of features found by the following cartesian product:
        {'total_acceleration_magnitude_left', 'total_acceleration_magnitude_right'} x {np.mean, np.min, np.max, np.median, np.std}
    """
    logging.info("Extracting acceleration")
    acceleration = (df.join((df
    .groupby(["experiment", "participant_id", "trial_id"])[['x_velocity_left', 'y_velocity_left', 'x_velocity_right', 'y_velocity_right']].shift(1)
    .rename(columns={'x_velocity_left': 'x_velocity_left_lagged'
            , 'y_velocity_left': 'y_velocity_left_lagged'
            , 'x_velocity_right': 'x_velocity_right_lagged'
            , 'y_velocity_right': 'y_velocity_right_lagged'}))
    ).assign(x_acceleration_left = lambda x: (x["x_velocity_left"] - x["x_velocity_left_lagged"]) / (1/2000),
            y_acceleration_left = lambda x: (x["y_velocity_left"] - x["y_velocity_left_lagged"]) / (1/2000),
            x_acceleration_right = lambda x: (x["x_velocity_right"] - x["x_velocity_right_lagged"]) / (1/2000),
            y_acceleration_right = lambda x: (x["y_velocity_right"] - x["y_velocity_right_lagged"]) / (1/2000))
    .assign(total_acceleration_magnitude_left = lambda x: np.sqrt( np.power(x["x_acceleration_left"], 2) + np.power(x["y_acceleration_left"], 2)),
            total_acceleration_magnitude_right = lambda x: np.sqrt( np.power(x["x_acceleration_right"], 2) + np.power(x["y_acceleration_right"], 2)))
    .groupby(["experiment", "participant_id"])
    .agg({'total_acceleration_magnitude_left': [np.mean, np.min, np.max, np.median, np.std],
        'total_acceleration_magnitude_right': [np.mean, np.min, np.max, np.median, np.std]
        })
    .reset_index()
    .pipe(rename_columns)
    )
    return acceleration


# Eye disconjugacy
# Paper: https://www.liebertpub.com/doi/full/10.1089/neu.2014.3687

def get_disconjugacy_feature(df:pd.DataFrame) -> pd.DataFrame:
    logging.info("Extracting disconjugacy")
    disconjugacy = (df
        .sort_values(["experiment", "participant_id", "trial_id", "time"])
        .query("x_left == x_left & x_right == x_right & y_left == y_left & y_right == y_right") # same as not null
        .groupby(["experiment", "participant_id"])
        .apply(lambda group: group.assign(
            x_left_rolling=group["x_left"].rolling(window=5, min_periods=1).mean(),
            x_right_rolling=group["x_right"].rolling(window=5, min_periods=1).mean(),
            y_left_rolling=group["y_left"].rolling(window=5, min_periods=1).mean(),
            y_right_rolling=group["y_right"].rolling(window=5, min_periods=1).mean()
        ))
        .reset_index(drop=True)
        .assign(
            X_diffs = lambda x: ((x["x_left_rolling"] - x["x_right_rolling"]) - 0)**2,
            Y_diffs = lambda x: ((x["y_left_rolling"] - x["y_right_rolling"]) - 0)**2
        )
        .groupby(["experiment", "participant_id"])
        .apply(lambda group: group.assign(
            X_squared_scaled = group["X_diffs"] / group.shape[0],
            Y_squared_scaled = group["Y_diffs"] / group.shape[0]
        ))
        .reset_index(drop=True)
        .groupby(["experiment", "participant_id"])
        .agg(
            Var_X = ("X_squared_scaled", "sum"),
            Var_Y = ("Y_squared_scaled", "sum")
        )
        .assign(
            Var_total = lambda x: x["Var_X"] + x["Var_Y"]
        )
        .reset_index()
        [["experiment", "participant_id", "Var_total"]]
    )
    return disconjugacy



# Combining

In [24]:
def get_king_devick_features(df_event: pd.DataFrame, df_sample:pd.DataFrame) -> pd.DataFrame:
    """Runs all king devick features extractions

    Args:
        df (pd.DataFrame): The preprocessed dataframe

    Returns:
        pd.DataFrame: Dataframe with columns ["experiment", "participant_id", X_FEATURES], where X_FEATURES is a collection of features
    """
    logging.info("Starting fitts law feature extraction")
    
    logging.info("Starting event feature extraction")
    event_feature_functions = [get_avg_mistakes_pr_trial, get_avg_time_elapsed_pr_trial, get_pre_calculated_metrics_feature]
    df_event_features_list = [f(df=df_event) for f in event_feature_functions]
    
    logging.info("Starting sample feature extraction")
    sample_feature_functions = [get_acceleration_feature, get_disconjugacy_feature]
    df_sample_features_list = [f(df=df_sample) for f in sample_feature_functions]
    
    df_features_list = df_event_features_list + df_sample_features_list
    
    df_features = reduce(lambda x, y: pd.merge(x, y, on = ["experiment", "participant_id"]), df_features_list)
    
    return df_features


features = get_king_devick_features(df_event=df_event, df_sample=df_sample)
    

    

2025-04-16 16:25:14,236 - INFO - 2866491546.get_king_devick_features:10 - Starting fitts law feature extraction
2025-04-16 16:25:14,238 - INFO - 2866491546.get_king_devick_features:12 - Starting event feature extraction
2025-04-16 16:25:14,456 - INFO - 2866491546.get_king_devick_features:16 - Starting sample feature extraction
2025-04-16 16:25:14,456 - INFO - 615019999.get_acceleration_feature:12 - Extracting acceleration
2025-04-16 16:25:30,490 - INFO - 615019999.get_disconjugacy_feature:39 - Extracting disconjugacy
  .apply(lambda group: group.assign(
  .apply(lambda group: group.assign(


# Save

In [25]:
features.to_parquet(FEATURES_DIR / f"{experiment}_features.pq")
