# Evil Bastard Features

In [1]:
import pandas as pd
import numpy as np

from config import *
from features.feature_utils import *

In [2]:
experiment = "EVIL_BASTARD"
participant_ids = pd.read_parquet(
        f"{PREPROCESSED_DIR}/{experiment}_samples.pq", 
        columns=["participant_id"]
    )

participant_ids = participant_ids["participant_id"].unique()

In [3]:
def get_samples_df(participant_id):
    df_samples = pd.read_parquet(
        f"{PREPROCESSED_DIR}/{experiment}_samples.pq",
        filters=[('participant_id', '=', participant_id)]
        )
    return df_samples
    
def get_events_df(participant_id):
    df_events = pd.read_parquet(
        f"{PREPROCESSED_DIR}/{experiment}_events.pq",
        filters=[('participant_id', '=', participant_id)]
        )
    
    return df_events

In [4]:
participant_id = participant_ids[10]

df_samples = get_samples_df(participant_id)
df_events = get_events_df(participant_id)

In [5]:
def combine_samples_events(df_samples, df_events):
    # Extract fixpoints
    df_fixpoints = df_events[df_events["event"]=="FIXPOINT"].loc[:,["participant_id", "trial_id", "time", "event", "colour", "stimulus_x", "stimulus_y"]]

    # Insert fixpoints in sample data
    # Ensure you are modifying actual copies
    df_samples = df_samples.copy()
    df_fixpoints = df_fixpoints.copy()

    # Make sure both DataFrames are sorted by time
    df_samples = df_samples.sort_values(["time", "trial_id", "participant_id"])
    df_fixpoints = df_fixpoints.sort_values(["time", "trial_id", "participant_id"])

    # Rename 'colour' column to 'fixpoint' so it's ready to merge
    df_fixpoints = df_fixpoints.rename(columns={"colour": "fixpoint"})

    # Perform a backward-looking join: for each row in sample_df, find the most recent fixpoint time
    df_samples = pd.merge_asof(
        df_samples,
        df_fixpoints,
        on="time",
        by=["participant_id", "trial_id"],
        direction="nearest",
        tolerance=10
    )

    df_samples["fixpoint"] = df_samples["fixpoint"].map({RED:"red", GREEN:"green", BLUE:"blue", WHITE:"white"})
    
    return df_samples

In [32]:
df_combined = combine_samples_events(df_samples, df_events)

In [None]:
def evil_bastard_get_distance_to_stimulus_features(df: pd.DataFrame) -> pd.DataFrame:
    features = (df
        .assign(
            distance_to_fixpoint_left = lambda x: (x["x_left"]-x["stimulus_x"])**2+(x["y_left"]-x["stimulus_y"])**2,
            distance_to_fixpoint_right = lambda x: (x["x_right"]-x["stimulus_x"])**2+(x["y_right"]-x["stimulus_y"])**2
        )
        .assign(
            distance_to_fixpoint = lambda x: (x["distance_to_fixpoint_left"]+x["distance_to_fixpoint_right"])/2
        )
        .agg({
            'distance_to_fixpoint': ["mean", "min", "max", "median", "std"],
        })
        .reset_index()
    )
    
    return features

In [None]:
def get_evil_bastard_features() -> pd.DataFrame:
    """Runs all anti saccade features extractions

    Args:
        df_event (pd.DataFrame): The preprocessed event dataframe
        df_samples (pd.DataFrame): The preprocessed sample dataframe

    Returns:
        pd.DataFrame: Dataframe with columns ["experiment", "participant_id", X_FEATURES], where X_FEATURES is a collection of features
    """
    logging.info("Extracting anti saccade features")
    
    logging.info("Starting event feature extraction")
    event_feature_functions = [get_pre_calculated_metrics_feature]
    df_event_features_list = [f(df=df_event) for f in event_feature_functions]

    logging.info("Starting sample feature extraction")
    sample_feature_functions = [get_acceleration_feature]
    df_sample_features_list = [f(df=df_sample) for f in sample_feature_functions]
    
    logging.info("Starting combined feture extraction")
    combined_feature_functions = [evil_bastard_get_distance_to_stimulus_features]
    df_combined_features_list = [f(df=df_combined) for f in combined_feature_functions]
    
    df_features_list = df_event_features_list + df_sample_features_list + df_combined_features_list
    
    df_features = reduce(lambda x, y: pd.merge(x, y, on = ["experiment", "participant_id"]), df_features_list)
    
    logging.info("Finished extracting anti saccade features")
    
    return df_features

In [6]:
def get_evil_bastard_features(df_event: pd.DataFrame, df_sample:pd.DataFrame) -> pd.DataFrame:
    """Runs all anti saccade features extractions

    Args:
        df (pd.DataFrame): The preprocessed dataframe

    Returns:
        pd.DataFrame: Dataframe with columns ["experiment", "participant_id", X_FEATURES], where X_FEATURES is a collection of features
    """
    # Get unique participant ids
    experiment = "EVIL_BASTARD"
    participant_ids = pd.read_parquet(
            f"{PREPROCESSED_DIR}/{experiment}_samples.pq", 
            columns=["participant_id"]
        )
    participant_ids = participant_ids["participant_id"].unique()
    
    # event_feature_functions = [get_pre_calculated_metrics_feature, get_n_correct_trials_feature, get_prop_trials_feature, get_reaction_time_feature]
    # df_event_features_list = [f(df=df_event) for f in event_feature_functions]
    
    for participant_id in participant_ids:
        df_samples = get_samples_df(participant_id)
        
        sample_feature_functions = [get_acceleration_feature]#, get_distance_to_stimulus_features]
        df_sample_features_list = [f(df=df_sample) for f in sample_feature_functions]
    
    # df_features_list = df_event_features_list + df_sample_features_list
    
    # df_features = reduce(lambda x, y: pd.merge(x, y, on = ["experiment", "participant_id"]), df_features_list)
    
    return df_sample_features_list #df_features


features = get_evil_bastard_features(df_event=df_events, df_sample=df_samples)

2025-04-21 11:49:04,109 - INFO - feature_utils.get_acceleration_feature:52 - Extracting acceleration
2025-04-21 11:49:04,590 - INFO - feature_utils.get_acceleration_feature:52 - Extracting acceleration
2025-04-21 11:49:05,020 - INFO - feature_utils.get_acceleration_feature:52 - Extracting acceleration
2025-04-21 11:49:05,491 - INFO - feature_utils.get_acceleration_feature:52 - Extracting acceleration
2025-04-21 11:49:05,998 - INFO - feature_utils.get_acceleration_feature:52 - Extracting acceleration
2025-04-21 11:49:06,571 - INFO - feature_utils.get_acceleration_feature:52 - Extracting acceleration
2025-04-21 11:49:07,020 - INFO - feature_utils.get_acceleration_feature:52 - Extracting acceleration
2025-04-21 11:49:07,541 - INFO - feature_utils.get_acceleration_feature:52 - Extracting acceleration
2025-04-21 11:49:08,047 - INFO - feature_utils.get_acceleration_feature:52 - Extracting acceleration
2025-04-21 11:49:08,674 - INFO - feature_utils.get_acceleration_feature:52 - Extracting acc

In [None]:
participant_id = participant_ids[10]
filters = [
    ('participant_id', '=', participant_id),
]

samples_df = pd.read_parquet(
    f"{PREPROCESSED_DIR}/{experiment}_samples.pq",
    filters=filters
    )

filters = [
    ('participant_id', '=', int(participant_id)),
]

events_df = pd.read_parquet(
    f"{PREPROCESSED_DIR}/{experiment}_events.pq",
    filters=filters
    )

In [7]:
features

[     experiment  participant_id  total_acceleration_magnitude_left_mean  \
 0  EVIL_BASTARD             146                             1563.253278   
 
    total_acceleration_magnitude_left_min  \
 0                                    0.0   
 
    total_acceleration_magnitude_left_max  \
 0                           97659.203355   
 
    total_acceleration_magnitude_left_median  \
 0                                    1000.0   
 
    total_acceleration_magnitude_left_std  \
 0                            3902.689435   
 
    total_acceleration_magnitude_right_mean  \
 0                              1581.885881   
 
    total_acceleration_magnitude_right_min  \
 0                                     0.0   
 
    total_acceleration_magnitude_right_max  \
 0                           119249.989518   
 
    total_acceleration_magnitude_right_median  \
 0                                 894.427191   
 
    total_acceleration_magnitude_right_std  
 0                             4197.048884 