# Experiment 2 preprocessing

The purpose of this script is to join all participant's behavioral data into a single dataframe, define new variables relevant for analyses, and filter trials and subjects based on reaction times and catch trial responses

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


# Loading raw data

In [13]:
#Concatenating all participants in a single dataframe
import glob

path = r'/media/wiseman/HDD/DMFgit/PredRelv/raw/exp2/' # use your path to raw data
all_files = glob.glob(path + "/*.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)
    
df = pd.concat(li, axis=0, ignore_index=True)

In [14]:
len(df["id"].unique()) # n = 65

65

In [15]:
learn_df = df[df.phase==0] # learning phase 
implicit_df = df[df.phase==1] # implicit test phase
explicit_df = df[df.phase==2] # explicit recall phase

In [17]:
implicit_df.groupby("modality")["id"].nunique()

modality
auditory    34
visual      31
Name: id, dtype: int64

# Checking catch trial detection

The crucial part is making sure that particpants responded to visual catch trials in auditory blocks. We will mark auditory blocks of each participant where they did not seem to respond to catch trials

In [18]:
dat = implicit_df[(implicit_df.modality == "auditory") & (implicit_df.catch == 1)].groupby(["id", "block"], as_index=0)["correct"].mean()
dat[dat.correct == 0] # if they responded to at least 1 of 8 visual catch considering it valid

implicit_df["invalid_catch"] = np.zeros(len(implicit_df))

for row in range(len(dat)):
    subj = dat.iloc[row,:]["id"]
    block = dat.iloc[row,:]["block"]
    if dat.iloc[row,:]["correct"] == 0:
        implicit_df.loc[(implicit_df['id'] == subj) & (implicit_df['block'] == block), 'invalid_catch'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["invalid_catch"] = np.zeros(len(implicit_df))


# Defining new variables

In [22]:
# Another pred column which always refers to attended modality
implicit_df["pred"] = np.where(implicit_df["modality"] == "visual", implicit_df["v_pred"], implicit_df["a_pred"])
# And another one referring to unattended
implicit_df["ign_pred"] = np.where(implicit_df["modality"] == "visual", implicit_df["a_pred"], implicit_df["v_pred"])
# Lastly one referring to unattended modality itself
implicit_df["ign_mod"] = np.where(implicit_df["modality"] == "visual", "auditory", "visual")

# These columns transform the values of pred and ign_pred: "EXP" --> "1" ,  "VP" --> "0"
implicit_df["relevant_expected"] = np.where(implicit_df.pred == "EXP", 1, 0); implicit_df["relevant_expected"] = implicit_df["relevant_expected"].astype("str")
implicit_df["irrelevant_expected"] = np.where(implicit_df.ign_pred == "EXP", 1, 0); implicit_df["irrelevant_expected"] = implicit_df["irrelevant_expected"].astype("str")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["pred"] = np.where(implicit_df["modality"] == "visual", implicit_df["v_pred"], implicit_df["a_pred"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["ign_pred"] = np.where(implicit_df["modality"] == "visual", implicit_df["a_pred"], implicit_df["v_pred"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

## Explicit phase performance

Only considering a learned pair if both expected and unexpected trials in the explicit phase were correctly classified

In [23]:
# Adding a column to the implicit test dataframe to indicate if the participant learned the visual cue
v_explicit = explicit_df.groupby(["id", "v_leading"], as_index=0)["correct"].sum()
new_col = []
for row in range(len(implicit_df)):
    subj = implicit_df.iloc[row,:]["id"]
    cue = implicit_df.iloc[row,:]["v_leading"]
    
    condition = (v_explicit.id == subj) & (v_explicit.v_leading == cue) & (v_explicit["correct"] > 3)
    if condition.any(): new_col.append(1)
    else: new_col.append(0)

implicit_df["v_learned"] = new_col


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["v_learned"] = new_col


In [24]:
# Adding a column to the implicit test dataframe to indicate if the participant learned the auditory cue
a_explicit = explicit_df.groupby(["id", "a_leading"], as_index=0)["correct"].sum()
new_col = []
for row in range(len(implicit_df)):
    subj = implicit_df.iloc[row,:]["id"]
    cue = implicit_df.iloc[row,:]["a_leading"]

    condition = (a_explicit.id == subj) & (a_explicit.a_leading == cue) & (a_explicit["correct"] > 3)
    if condition.any(): new_col.append(1)
    else: new_col.append(0)

implicit_df["a_learned"] = new_col

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["a_learned"] = new_col


In [25]:
# new columns encoding learned pairs of attended and unattended modalities
implicit_df["att_learned"] = np.where(implicit_df["modality"] == "visual", implicit_df["v_learned"], implicit_df["a_learned"])
implicit_df["ign_learned"] = np.where(implicit_df["modality"] == "visual", implicit_df["a_learned"], implicit_df["v_learned"])

implicit_df["att_learned"] = implicit_df["att_learned"].astype("str")
implicit_df["ign_learned"] = implicit_df["ign_learned"].astype("str")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["att_learned"] = np.where(implicit_df["modality"] == "visual", implicit_df["v_learned"], implicit_df["a_learned"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["ign_learned"] = np.where(implicit_df["modality"] == "visual", implicit_df["a_learned"], implicit_df["v_learned"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guid

## Filtering dataframe

In [31]:
# We first remove all starting block trials, to remove first trial of a participant and the effect of the between block intervals.implicit_df
implicit_df = implicit_df[implicit_df.ntrial != 0] 
implicit_df.reset_index(inplace=True, drop=True)

In [35]:
len(implicit_df)

45237

In [33]:
def RT_filter(df):
    # this function was used to filter those trials with a RT that are above X standard deviation from the mean
    rt_mean = df['RT'].mean()
    up_lim = rt_mean + 3 * df['RT'].std() # Before 2 sd
    x = df.loc[df['RT'] < up_lim, : ]
    return x #the same dataframe with RT filtered

In [34]:
# Filtering RT outliers (>3 std) within each participant
df_clean = implicit_df.groupby(["id"], as_index= False).apply(RT_filter).reset_index() 
implicit_df = df_clean.iloc[:,2:]

  df_clean = implicit_df.groupby(["id"], as_index= False).apply(RT_filter).reset_index()


## Adding modifications to response variables

In [36]:
# eliminate catch trials and trials where they responded catch by mistake
implicit_df = implicit_df[(implicit_df["resp"] != "catch") & (implicit_df["catch"] == 0)]
implicit_df.reset_index(inplace=True,drop=True)

In [37]:
# Temporary column where diff value of same (non-target) trials is set to 0
implicit_df["change_r"] = np.where(implicit_df["target"]==0, 0, implicit_df["diff"])

In [38]:
# Rescaling change values from 0 to 1
dfmax = implicit_df.groupby(["id", "modality"], as_index=0)["change_r"].max()
new_col = []
for row in range(len(implicit_df)):
     subj = implicit_df.iloc[row,:]["id"]
     mod = implicit_df.iloc[row,:]["modality"]
     max = dfmax[(dfmax.id == subj) & (dfmax.modality == mod)]["change_r"]
     new_col.append(implicit_df.iloc[row,:]["change_r"] / max)

implicit_df["change"] = np.asarray(new_col) # If not passed as array for some reason it results in error when plotting

In [39]:
# pymer wants the response variable to be numeric
implicit_df["response"] = np.where(implicit_df["resp"] == "diferente", 1, 0)

# Saving dataframe 

In [40]:
implicit_df.to_csv("/media/wiseman/HDD/DMFgit/PredRelv/behav_analyses/data/exp2_implicit.csv")
learn_df.to_csv("/media/wiseman/HDD/DMFgit/PredRelv/behav_analyses/data/exp2_learn.csv")
explicit_df.to_csv("/media/wiseman/HDD/DMFgit/PredRelv/behav_analyses/data/exp2_explicit.csv")