# Evil Bastard Features

In [1]:
import pandas as pd
import numpy as np

from config import *
from features.feature_utils import *

In [16]:
experiment = "EVIL_BASTARD"
participant_ids = pd.read_parquet(
        f"{PREPROCESSED_DIR}/{experiment}_samples.pq", 
        columns=["participant_id"]
    )

participant_ids = participant_ids["participant_id"].unique()

In [15]:
def get_samples_df(participant_id):
    df_samples = pd.read_parquet(
        f"{PREPROCESSED_DIR}/{experiment}_samples.pq",
        filters=[('participant_id', '=', participant_id)]
        )
    return df_samples
    
def get_events_df(participant_id):
    df_events = pd.read_parquet(
        f"{PREPROCESSED_DIR}/{experiment}_events.pq",
        filters=[('participant_id', '=', participant_id)]
        )
    
    return df_events

In [17]:
participant_id = 147

df_sample = get_samples_df(participant_id)
df_event = get_events_df(participant_id)

In [2]:
def combine_samples_events(df_samples, df_events):
    # Extract fixpoints
    df_fixpoints = df_events[df_events["event"]=="FIXPOINT"].loc[:,["participant_id", "trial_id", "time", "event", "colour", "stimulus_x", "stimulus_y"]]

    # Insert fixpoints in sample data
    # Ensure you are modifying actual copies
    df_samples = df_samples.copy()
    df_fixpoints = df_fixpoints.copy()

    # Make sure both DataFrames are sorted by time
    df_samples = df_samples.sort_values(["time", "trial_id", "participant_id"])
    df_fixpoints = df_fixpoints.sort_values(["time", "trial_id", "participant_id"])

    # Rename 'colour' column to 'fixpoint' so it's ready to merge
    df_fixpoints = df_fixpoints.rename(columns={"colour": "fixpoint"})

    # Perform a backward-looking join: for each row in sample_df, find the most recent fixpoint time
    df_samples = pd.merge_asof(
        df_samples,
        df_fixpoints,
        on="time",
        by=["participant_id", "trial_id"],
        direction="nearest",
        tolerance=10
    )

    df_samples["fixpoint"] = df_samples["fixpoint"].map({RED:"red", GREEN:"green", BLUE:"blue", WHITE:"white"})
    
    return df_samples

In [22]:
def evil_bastard_get_distance_to_stimulus_features(df: pd.DataFrame) -> pd.DataFrame:
    features = (df
        .assign(
            distance_to_fixpoint_left = lambda x: (x["x_left"]-x["stimulus_x"])**2+(x["y_left"]-x["stimulus_y"])**2,
            distance_to_fixpoint_right = lambda x: (x["x_right"]-x["stimulus_x"])**2+(x["y_right"]-x["stimulus_y"])**2
        )
        .assign(
            distance_to_fixpoint = lambda x: 
                np.where(
                    ~x["distance_to_fixpoint_left"].isna() & ~x["distance_to_fixpoint_right"].isna(),
                    (x["distance_to_fixpoint_left"]+x["distance_to_fixpoint_right"])/2,
                
                    np.where(
                        ~x["distance_to_fixpoint_left"].isna(),
                        x["distance_to_fixpoint_left"],
                        x["distance_to_fixpoint_right"]
                    )
                )
        )
        .groupby(["experiment", "participant_id"])
        .agg({
            'distance_to_fixpoint': ["mean", "min", "max", "median", "std"],
        })
        .reset_index()
        .pipe(rename_columns)
    )
    
    return features

In [25]:
def get_evil_bastard_features() -> pd.DataFrame:
    """Runs all anti saccade features extractions

    Args:
        df_event (pd.DataFrame): The preprocessed event dataframe
        df_samples (pd.DataFrame): The preprocessed sample dataframe

    Returns:
        pd.DataFrame: Dataframe with columns ["experiment", "participant_id", X_FEATURES], where X_FEATURES is a collection of features
    """

    logging.info("Extracting anti saccade features")
    
    experiment = "EVIL_BASTARD"
    
     # Read participant and trial id to identify unique groups
    df_index = pd.read_parquet(
        f"{CLEANED_DIR}/{experiment}_samples.pq", 
        columns=["participant_id"]
    )
    participant_groups = df_index["participant_id"].unique()
    
    df_features_all_participants = []
    for participant_id in tqdm(participant_groups, total=len(participant_groups)):
        logging.info(f"Processing participant {participant_id}")

        filters = [('participant_id', '=', participant_id)]
        df_event = pd.read_parquet(PREPROCESSED_DIR / f"{experiment}_events.pq", filters=filters)
        df_sample = (pd.read_parquet(PREPROCESSED_DIR / f'{experiment}_samples.pq', filters=filters)
        .sort_values(["experiment", "participant_id", "trial_id","time"])
        )
        df_combined = combine_samples_events(df_sample, df_event)
        
        logging.info("Starting event feature extraction")
        event_feature_functions = [get_pre_calculated_metrics_feature]
        df_event_features_list = [f(df=df_event) for f in event_feature_functions]

        logging.info("Starting sample feature extraction")
        sample_feature_functions = [get_acceleration_feature, get_disconjugacy_feature]
        df_sample_features_list = [f(df=df_sample) for f in sample_feature_functions]
        
        logging.info("Starting combined feature extraction")
        combined_feature_functions = [evil_bastard_get_distance_to_stimulus_features]
        df_combined_features_list = [f(df=df_combined) for f in combined_feature_functions]
    
        df_features_par_list = df_event_features_list + df_sample_features_list + df_combined_features_list
    
        df_features_par = reduce(lambda x, y: pd.merge(x, y, on = ["experiment", "participant_id"]), df_features_par_list)

        df_features_all_participants.append(df_features_par)
    
    df_features = pd.concat(df_features_all_participants, ignore_index=True)
    
    logging.info("Finished extracting anti saccade features")
    
    return df_features

In [26]:
features=get_evil_bastard_features()

2025-04-21 14:54:54,308 - INFO - 1672357721.get_evil_bastard_features:12 - Extracting anti saccade features
  0%|          | 0/160 [00:00<?, ?it/s]2025-04-21 14:54:55,594 - INFO - 1672357721.get_evil_bastard_features:25 - Processing participant 106
2025-04-21 14:54:56,274 - INFO - 1672357721.get_evil_bastard_features:34 - Starting event feature extraction
2025-04-21 14:54:56,316 - INFO - 1672357721.get_evil_bastard_features:38 - Starting sample feature extraction
2025-04-21 14:54:56,317 - INFO - feature_utils.get_acceleration_feature:52 - Extracting acceleration
2025-04-21 14:54:56,743 - INFO - feature_utils.get_disconjugacy_feature:78 - Extracting disconjugacy
2025-04-21 14:54:57,458 - INFO - 1672357721.get_evil_bastard_features:42 - Starting combined feature extraction
  1%|          | 1/160 [00:02<05:34,  2.11s/it]2025-04-21 14:54:57,701 - INFO - 1672357721.get_evil_bastard_features:25 - Processing participant 111
2025-04-21 14:54:58,287 - INFO - 1672357721.get_evil_bastard_features

In [27]:
features

Unnamed: 0,experiment,participant_id,mean_peak_velocity_sacc,mean_amplitude_sacc,mean_duration_sacc,mean_duration_fix,mean_pupil_size_fix,total_acceleration_magnitude_left_mean,total_acceleration_magnitude_left_min,total_acceleration_magnitude_left_max,...,total_acceleration_magnitude_right_min,total_acceleration_magnitude_right_max,total_acceleration_magnitude_right_median,total_acceleration_magnitude_right_std,Var_total,distance_to_fixpoint_mean,distance_to_fixpoint_min,distance_to_fixpoint_max,distance_to_fixpoint_median,distance_to_fixpoint_std
0,EVIL_BASTARD,106,116.038585,1.584904,32.110932,843.723054,767.543413,1268.603058,0.0,86052.309673,...,0.0,80120.908632,894.427191,1783.711251,4642.709862,2613.375498,230.575,280956.650,1883.7750,4042.165196
1,EVIL_BASTARD,111,103.167702,1.277267,25.541408,516.936634,500.387129,1221.966845,0.0,40207.959411,...,0.0,39412.688312,721.110255,1867.690928,14687.659841,7076.549747,898.265,90951.250,6078.0100,4388.571471
2,EVIL_BASTARD,133,188.970779,1.549501,197.035714,642.496100,737.018721,1510.381772,0.0,602429.348555,...,0.0,584171.378963,848.528137,5236.211939,3913.895544,5489.021497,42.340,8530872.250,3004.2500,42439.159119
3,EVIL_BASTARD,134,128.344882,1.766840,46.557480,769.863299,478.424963,2399.475077,0.0,101071.459869,...,0.0,50144.192086,1649.242250,2112.149274,10244.882536,3961.751179,15.970,29616.715,3515.4575,2264.977217
4,EVIL_BASTARD,135,327.098326,2.458905,106.974895,1040.525813,1289.015296,1702.245618,0.0,169681.702019,...,0.0,221269.970850,565.685425,3929.154500,9089.673140,5352.465950,93.150,2711466.975,2455.6650,55666.162898
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,EVIL_BASTARD,399,120.522995,1.278930,52.648128,497.850000,987.452041,1523.804878,0.0,457799.257317,...,0.0,431139.327828,1400.000000,4692.044447,4912.564123,4613.211637,33.930,3366379.970,3643.0700,9799.291161
156,EVIL_BASTARD,401,183.789352,1.975907,108.430556,1012.086134,1093.014706,2217.161061,0.0,186809.956908,...,0.0,187656.281536,1886.796226,3155.213917,8895.970132,3920.370553,610.145,358639.810,3233.7175,8695.792951
157,EVIL_BASTARD,402,141.332500,1.614175,51.842500,565.250000,587.212264,1635.805030,0.0,163501.804271,...,0.0,174924.212161,1166.190379,2646.228997,3314.633765,3367.077532,59.275,252602.370,2117.2975,4621.681623
158,EVIL_BASTARD,403,106.698842,1.721236,24.488417,919.809947,899.969805,1237.499786,0.0,46099.023851,...,0.0,300808.577005,1341.640786,1709.376880,25888.019187,11688.098541,4094.650,43168.910,11007.2475,4183.020734
