In [97]:
import pandas as pd
from config import *
reaction = pd.read_parquet(PROCESSED_DIR / "REACTION.pq")

In [98]:
def check_trialid_event(df: pd.DataFrame) -> pd.DataFrame:
    df_check = (df.
        query("event == 'TRIALID'").
        groupby(["participant_id", "trial_id", "event"])["event"].
        count().
        reset_index(name='n_TRIALID').
        query("n_TRIALID == 1")
    )
    
    filtered_df = (df.
        loc[df["participant_id"].isin(df_check["participant_id"]),:].
        loc[df["trial_id"].isin(df_check["trial_id"]),:]
    )
    
    print("Removed", len(df)-len(filtered_df), "rows")
    
    print("Removed rows with [p_id, t_id]:\n", pd.unique(pd.concat([filtered_df,df]).drop_duplicates(keep=False)[["participant_id", "trial_id"]].values.ravel("K")))
        
    return filtered_df

def check_fixpoint_amount(df: pd.DataFrame) -> pd.DataFrame:
    df_check = (df.
        query("event == 'FIXPOINT'").
        groupby(["participant_id", "trial_id", "event"])["event"].
        count().
        reset_index(name='n_fixpoints').
        query("n_fixpoints == 2")
    )
    
    filtered_df = (df.
        loc[df["participant_id"].isin(df_check["participant_id"]),:].
        loc[df["trial_id"].isin(df_check["trial_id"]),:]
    )
        
    print("Removed", len(df)-len(filtered_df), "rows")
    
    print("Removed rows with [p_id, t_id]:\n", pd.unique(pd.concat([filtered_df,df]).drop_duplicates(keep=False)[["participant_id", "trial_id"]].values.ravel("K")))
    
    return filtered_df

def check_red_fixpoint(df: pd.DataFrame) -> pd.DataFrame:
    df_check = (df.
        query("event == 'FIXPOINT' & colour == '255 0 0'").
        groupby(["participant_id", "trial_id", "event"])["event"].
        count().
        reset_index(name='n_red_fixpoints').
        query("n_red_fixpoints == 1")
    )    
    filtered_df = (df.
        loc[df["participant_id"].isin(df_check["participant_id"]),:].
        loc[df["trial_id"].isin(df_check["trial_id"]),:]
    )
    
    print("Removed", len(df)-len(filtered_df), "rows")
    print("Removed rows with [p_id, t_id]:\n", pd.unique(pd.concat([filtered_df,df]).drop_duplicates(keep=False)[["participant_id", "trial_id"]].values.ravel("K")))
    
    return filtered_df

def check_white_fixpoint(df: pd.DataFrame) -> pd.DataFrame:
    df_check = (df.
        query("event == 'FIXPOINT' & colour == '255 255 255'").
        groupby(["participant_id", "trial_id", "event"])["event"].
        count().
        reset_index(name='n_white_fixpoints').
        query("n_white_fixpoints == 1")
    )
    filtered_df = (df.
        loc[df["participant_id"].isin(df_check["participant_id"]),:].
        loc[df["trial_id"].isin(df_check["trial_id"]),:]
    )
        
    print("Removed", len(df)-len(filtered_df), "rows")
    print("Removed rows with [p_id, t_id]:\n", pd.unique(pd.concat([filtered_df,df]).drop_duplicates(keep=False)[["participant_id", "trial_id"]].values.ravel("K")))

    return filtered_df

def check_trial_var_data(df: pd.DataFrame) -> pd.DataFrame:
    df_check = (df.
        query("event == 'TRIAL_VAR_DATA'").
        groupby(["participant_id", "trial_id", "event"])["event"].
        count().
        reset_index(name='n_trial_var_data_events').
        query("n_trial_var_data_events == 1")
    )
    filtered_df = (df.
        loc[df["participant_id"].isin(df_check["participant_id"]),:].
        loc[df["trial_id"].isin(df_check["trial_id"]),:]
    )
        
    print("Removed", len(df)-len(filtered_df), "rows")
    print("Removed rows with [p_id, t_id]:\n", pd.unique(pd.concat([filtered_df,df]).drop_duplicates(keep=False)[["participant_id", "trial_id"]].values.ravel("K")))

    return filtered_df

def check_start_event(df: pd.DataFrame) -> pd.DataFrame:
    df_check = (df.
        query("event == 'START'").
        groupby(["participant_id", "trial_id", "event"])["event"].
        count().
        reset_index(name='n_start').
        query("n_start == 1")
    )
    
    filtered_df = (df.
        loc[df["participant_id"].isin(df_check["participant_id"]),:].
        loc[df["trial_id"].isin(df_check["trial_id"]),:]
    )
            
    print("Removed", len(df)-len(filtered_df), "rows")
    print("Removed rows with [p_id, t_id]:\n", pd.unique(pd.concat([filtered_df,df]).drop_duplicates(keep=False)[["participant_id", "trial_id"]].values.ravel("K")))

    return filtered_df

def check_end_event(df: pd.DataFrame) -> pd.DataFrame:
    df_check = (df.
        query("event == 'END'").
        groupby(["participant_id", "trial_id", "event"])["event"].
        count().
        reset_index(name='n_end').
        query("n_end == 1")
    )
    filtered_df = (df.
        loc[df["participant_id"].isin(df_check["participant_id"]),:].
        loc[df["trial_id"].isin(df_check["trial_id"]),:]
    )
    
    print("Removed", len(df)-len(filtered_df), "rows")
    print("Removed rows with [p_id, t_id]:\n", pd.unique(pd.concat([filtered_df,df]).drop_duplicates(keep=False)[["participant_id", "trial_id"]].values.ravel("K")))
    
    return filtered_df

df_new = (reaction.
    pipe(check_trialid_event).
    pipe(check_fixpoint_amount).
    pipe(check_red_fixpoint).
    pipe(check_white_fixpoint).
    pipe(check_trial_var_data).
    pipe(check_start_event).
    pipe(check_end_event)
)



Removed 31 rows
Removed rows with [p_id, t_id]:
 ['237' nan]
Removed 3961 rows
Removed rows with [p_id, t_id]:
 ['103' '105' '109' '113' '117' '118' '119' '120' '121' '122' '127' 0.0 1.0
 2.0 3.0 4.0 5.0 6.0 7.0]
Removed 0 rows
Removed rows with [p_id, t_id]:
 []
Removed 0 rows
Removed rows with [p_id, t_id]:
 []
Removed 0 rows
Removed rows with [p_id, t_id]:
 []
Removed 2351 rows
Removed rows with [p_id, t_id]:
 ['237' 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0
 15.0 16.0 17.0 18.0 19.0 20.0 21.0 22.0 23.0]
Removed 0 rows
Removed rows with [p_id, t_id]:
 []
