# Evil Bastard Features

# !!! RUN Pipeline

In [1]:
import pandas as pd
import numpy as np

from config import *

In [2]:
def rename_columns(df):
    """Renames columns by joining multi-level column names with different delimiters."""
    # Iterate over all column names
    df.columns = [f"{col[0]}" if col[1] == '' else f"{col[0]}_{col[1]}" for col in df.columns.values]
    return df

In [3]:
experiment = "EVIL_BASTARD"
participant_ids = pd.read_parquet(
        f"{PREPROCESSED_DIR}/{experiment}_samples.pq", 
        columns=["participant_id"]
    )

participant_ids = participant_ids["participant_id"].unique()

In [4]:
participant_id = participant_ids[10]

In [5]:
def get_samples_df(participant_id):
    df_samples = pd.read_parquet(
        f"{PREPROCESSED_DIR}/{experiment}_samples.pq",
        filters=[('participant_id', '=', participant_id)]
        )
    return df_samples
    
def get_events_df(participant_id):
    df_events = pd.read_parquet(
        f"{PREPROCESSED_DIR}/{experiment}_events.pq",
        filters=[('participant_id', '=', int(participant_id))]
        )
    
    return df_events

In [6]:
participant_id = participant_ids[10]

df_samples = get_samples_df(participant_id)
df_events = get_events_df(participant_id)

In [None]:
df_samples

In [None]:
def combine_samples_events(df_samples, df_events)
    # Extract fixpoints
    df_fixpoints = df_events[df_events["event"]=="FIXPOINT"].loc[:,["participant_id", "trial_id", "time", "event", "colour", "stimulus_x", "stimulus_y"]]

    # Insert fixpoints in sample data
    # Ensure you are modifying actual copies
    df_samples = df_samples.copy()
    df_fixpoints = df_fixpoints.copy()

    # Force types
    df_samples["time"] = df_samples["time"].astype("int64")
    df_samples["participant_id"] = df_samples["participant_id"].astype("int64")
    df_samples["trial_id"] = df_samples["trial_id"].astype("int64")

    # Make sure both DataFrames are sorted by time
    df_samples = df_samples.sort_values(["participant_id", "trial_id","time"])
    df_fixpoints = df_fixpoints.sort_values(["participant_id", "trial_id","time"])

    # Rename 'colour' column to 'fixpoint' so it's ready to merge
    df_fixpoints = df_fixpoints.rename(columns={"colour": "fixpoint"})

    # Perform a backward-looking join: for each row in sample_df, find the most recent fixpoint time
    df_samples = pd.merge_asof(
        df_samples,
        df_fixpoints,
        on=["participant_id"],
        by=["trial_id", "time"],
        direction="backward"
    )

    df_samples["fixpoint"] = df_samples["fixpoint"].map({RED:"red", GREEN:"green", BLUE:"blue", WHITE:"white"})
    
    return df_samples

In [None]:
df_combined = combine_samples_events(df_samples, df_events)

In [None]:
df_combined.head()

Unnamed: 0,experiment,participant_id,trial_id,time,x_left,y_left,pupil_size_left,x_velocity_left,y_velocity_left,x_right,...,pupil_size_right,x_velocity_right,y_velocity_right,x_resolution,y_resolution,error_message,event,fixpoint,stimulus_x,stimulus_y
0,EVIL_BASTARD,146,0,0,,,0.0,,,,...,0.0,,,59.2,63.3,.C.C.,,,,
1,EVIL_BASTARD,146,0,0,,,0.0,,,,...,0.0,,,59.2,63.3,.C.C.,,,,
2,EVIL_BASTARD,146,0,1,,,0.0,,,,...,0.0,,,59.2,63.3,.C.C.,,,,
3,EVIL_BASTARD,146,0,1,,,0.0,,,,...,0.0,,,59.2,63.3,.C.C.,,,,
4,EVIL_BASTARD,146,0,2,,,0.0,,,,...,0.0,,,59.2,63.3,.C.C.,,,,


In [29]:
def get_acceleration_feature(df: pd.DataFrame) -> pd.DataFrame:
    """Finds acceleration features for anti saccade experiment

    Args:
        df (pd.DataFrame): Dataframe with raw samples

    Returns:
        pd.DataFrame: Dataframe with columns ['experiment','participant_id', X_FEATURES]
        where X_FEATURES is a collection of features found by the following cartesian product:
        {'total_acceleration_magnitude_left', 'total_acceleration_magnitude_right'} x {np.mean, np.min, np.max, np.median, np.std}
    """

    acceleration = (df.join((df
    .groupby(["experiment", "participant_id", "trial_id"])[['x_velocity_left', 'y_velocity_left', 'x_velocity_right', 'y_velocity_right']].shift(1)
    .rename(columns={'x_velocity_left': 'x_velocity_left_lagged'
            , 'y_velocity_left': 'y_velocity_left_lagged'
            , 'x_velocity_right': 'x_velocity_right_lagged'
            , 'y_velocity_right': 'y_velocity_right_lagged'}))
    ).assign(x_acceleration_left = lambda x: (x["x_velocity_left"] - x["x_velocity_left_lagged"]) / (1/2000),
            y_acceleration_left = lambda x: (x["y_velocity_left"] - x["y_velocity_left_lagged"]) / (1/2000),
            x_acceleration_right = lambda x: (x["x_velocity_right"] - x["x_velocity_right_lagged"]) / (1/2000),
            y_acceleration_right = lambda x: (x["y_velocity_right"] - x["y_velocity_right_lagged"]) / (1/2000))
    .assign(total_acceleration_magnitude_left = lambda x: np.sqrt( np.power(x["x_acceleration_left"], 2) + np.power(x["y_acceleration_left"], 2)),
            total_acceleration_magnitude_right = lambda x: np.sqrt( np.power(x["x_acceleration_right"], 2) + np.power(x["y_acceleration_right"], 2)))
    .groupby(["experiment", "participant_id"])
    .agg({'total_acceleration_magnitude_left': ["mean", "min", "max", "median", "std"],
        'total_acceleration_magnitude_right': ["mean", "min", "max", "median", "std"]
        })
    .reset_index()
    .pipe(rename_columns)
    )
    return acceleration

In [25]:
def get_distance_to_stimulus_features(df: pd.DataFrame) -> pd.DataFrame:
    features = (samples_df
        .assign(
            distance_to_fixpoint_left = lambda x: (x["x_left"]-x["stimulus_x"])**2+(x["y_left"]-x["stimulus_y"])**2,
            distance_to_fixpoint_right = lambda x: (x["x_right"]-x["stimulus_x"])**2+(x["y_right"]-x["stimulus_y"])**2
        )
        .assign(
            distance_to_fixpoint = lambda x: (x["distance_to_fixpoint_left"]+x["distance_to_fixpoint_right"])/2
        )
        .agg({
            'distance_to_fixpoint': ["mean", "min", "max", "median", "std"],
        })
        .reset_index()
    )
    
    return features

In [22]:
features

Unnamed: 0,index,distance_to_fixpoint
0,mean,3340.778456
1,min,98.495
2,max,284924.1
3,median,2226.65
4,std,8480.892266


In [None]:
def get_evil_bastard_features(df_event: pd.DataFrame, df_sample:pd.DataFrame) -> pd.DataFrame:
    """Runs all anti saccade features extractions

    Args:
        df (pd.DataFrame): The preprocessed dataframe

    Returns:
        pd.DataFrame: Dataframe with columns ["experiment", "participant_id", X_FEATURES], where X_FEATURES is a collection of features
    """
    # Get unique participant ids
    experiment = "EVIL_BASTARD"
    participant_ids = pd.read_parquet(
            f"{PREPROCESSED_DIR}/{experiment}_samples.pq", 
            columns=["participant_id"]
        )
    participant_ids = participant_ids["participant_id"].unique()
    
    # event_feature_functions = [get_pre_calculated_metrics_feature, get_n_correct_trials_feature, get_prop_trials_feature, get_reaction_time_feature]
    # df_event_features_list = [f(df=df_event) for f in event_feature_functions]
    
    for participant_id in participant_ids:
        df_samples = get_samples_df(participant_id)
        
        sample_feature_functions = [get_acceleration_feature]#, get_distance_to_stimulus_features]
        df_sample_features_list = [f(df=df_sample) for f in sample_feature_functions]
    
    # df_features_list = df_event_features_list + df_sample_features_list
    
    # df_features = reduce(lambda x, y: pd.merge(x, y, on = ["experiment", "participant_id"]), df_features_list)
    
    return df_sample_features_list #df_features


features = get_evil_bastard_features(df_event=events_df, df_sample=samples_df)

In [None]:
participant_id = participant_ids[10]
filters = [
    ('participant_id', '=', participant_id),
]

samples_df = pd.read_parquet(
    f"{PREPROCESSED_DIR}/{experiment}_samples.pq",
    filters=filters
    )

filters = [
    ('participant_id', '=', int(participant_id)),
]

events_df = pd.read_parquet(
    f"{PREPROCESSED_DIR}/{experiment}_events.pq",
    filters=filters
    )

In [34]:
features

[     experiment  participant_id  total_acceleration_magnitude_left_mean  \
 0  EVIL_BASTARD             146                             1563.253278   
 
    total_acceleration_magnitude_left_min  \
 0                                    0.0   
 
    total_acceleration_magnitude_left_max  \
 0                           97659.203355   
 
    total_acceleration_magnitude_left_median  \
 0                                    1000.0   
 
    total_acceleration_magnitude_left_std  \
 0                            3902.689435   
 
    total_acceleration_magnitude_right_mean  \
 0                              1581.885881   
 
    total_acceleration_magnitude_right_min  \
 0                                     0.0   
 
    total_acceleration_magnitude_right_max  \
 0                           119249.989518   
 
    total_acceleration_magnitude_right_median  \
 0                                 894.427191   
 
    total_acceleration_magnitude_right_std  
 0                             4197.048884 