# Experiment 1 preprocessing

The purpose of this script is to join all participant's behavioral data into a single dataframe, define new variables relevant for analyses, and filter trials and subjects based on reaction times and catch trial responses

In [21]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

## Loading raw data

In [22]:
#Concatenating all participants in a single dataframe
import glob

# path = r'/media/wiseman/HDD/DMFgit/PredRelv/data/exp1/raw/raw/' # use your path
path = r'/media/wiseman/HDD/DMFgit/PredRelv/raw/exp1/' # use your path
all_files = glob.glob(path + "*.csv") # get all the files in the path

# read all the csv files in the list
li = []
for filename in all_files: # loop through all the files and read them in
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)


# concatenate all the dataframes in the list
df = pd.concat(li, axis=0).reset_index(drop=True)

Create separate dataframes for each experimental phase

In [23]:
learn_df = df[df.phase==0] # learning phase 
implicit_df = df[df.phase==1] # implicit test phase
explicit_df = df[df.phase==2] # explicit recall phase

## Implicit phase

### Checking catch trial detection

The crucial part is making sure that particpants responded to visual catch trials in auditory blocks. We will mark auditory blocks of each participant where they did not seem to respond to catch trials

In [25]:
dat = implicit_df[(implicit_df.modality == "auditory") & (implicit_df.catch == 1)].groupby(["id", "block"], as_index=0)["correct"].mean()
#dat[dat.correct == 0] # if they responded to at least 1 of 8 visual catch considering it valid

implicit_df["invalid_catch"] = np.zeros(len(implicit_df))

for row in range(len(dat)):
    subj = dat.iloc[row,:]["id"]
    block = dat.iloc[row,:]["block"]
    if dat.iloc[row,:]["correct"] == 0:
        implicit_df.loc[(implicit_df['id'] == subj) & (implicit_df['block'] == block), 'invalid_catch'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["invalid_catch"] = np.zeros(len(implicit_df))


### Defining new variables

1. Creating column that enumerates blocks within modality

In [26]:
conditions = [implicit_df["block"] == 0, implicit_df["block"] == 2, implicit_df["block"] == 4, implicit_df["block"] == 6, implicit_df["block"] == 8, implicit_df["block"] == 1, implicit_df["block"] == 3, implicit_df["block"] == 5, implicit_df["block"] == 7, implicit_df["block"] == 9]
choices = [1,2,3,4,5,1,2,3,4,5] # 5 blocks per modality in test phase
implicit_df["block_mod"] = np.select(conditions, choices)  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["block_mod"] = np.select(conditions, choices)


2. Columns that encode expectation based on attention condition

In [27]:
# Another pred column which always refers to attended modality
implicit_df["pred"] = np.where(implicit_df["modality"] == "visual", implicit_df["v_pred"], implicit_df["a_pred"])
# And another one referring to unattended
implicit_df["ign_pred"] = np.where(implicit_df["modality"] == "visual", implicit_df["a_pred"], implicit_df["v_pred"])
# Lastly one referring to unattended modality itself
implicit_df["ign_mod"] = np.where(implicit_df["modality"] == "visual", "auditory", "visual")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["pred"] = np.where(implicit_df["modality"] == "visual", implicit_df["v_pred"], implicit_df["a_pred"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["ign_pred"] = np.where(implicit_df["modality"] == "visual", implicit_df["a_pred"], implicit_df["v_pred"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

3. Transforming the values of pred and ign_pred: "EXP" --> "1" ,  "VP" --> "0"

In [28]:
implicit_df["relevant_expected"] = np.where(implicit_df.pred == "EXP", 1, 0); implicit_df["relevant_expected"] = implicit_df["relevant_expected"].astype("str")
implicit_df["irrelevant_expected"] = np.where(implicit_df.ign_pred == "EXP", 1, 0); implicit_df["irrelevant_expected"] = implicit_df["irrelevant_expected"].astype("str")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["relevant_expected"] = np.where(implicit_df.pred == "EXP", 1, 0); implicit_df["relevant_expected"] = implicit_df["relevant_expected"].astype("str")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["relevant_expected"] = np.where(implicit_df.pred == "EXP", 1, 0); implicit_df["relevant_expected"] = implicit_df["relevant_expected"].astype("str")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the doc

## Explicit phase 

We consider a stimulus pair association learned if the participant correctly classified 3 out of 4 times it was presented in the explicit phase

In [29]:
# Adding a column to the implicit test dataframe to indicate if the participant learned the visual cue
v_explicit = explicit_df.groupby(["id", "v_leading"], as_index=0)["correct"].sum()
new_col = []
for row in range(len(implicit_df)):
    subj = implicit_df.iloc[row,:]["id"]
    cue = implicit_df.iloc[row,:]["v_leading"]
    
    condition = (v_explicit.id == subj) & (v_explicit.v_leading == cue) & (v_explicit["correct"] > 3)
    if condition.any(): new_col.append(1)
    else: new_col.append(0)

implicit_df["v_learned"] = new_col


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["v_learned"] = new_col


In [30]:
# Adding a column to the implicit test dataframe to indicate if the participant learned the auditory cue
a_explicit = explicit_df.groupby(["id", "a_leading"], as_index=0)["correct"].sum()
new_col = []
for row in range(len(implicit_df)):
    subj = implicit_df.iloc[row,:]["id"]
    cue = implicit_df.iloc[row,:]["a_leading"]

    condition = (a_explicit.id == subj) & (a_explicit.a_leading == cue) & (a_explicit["correct"] > 3)
    if condition.any(): new_col.append(1)
    else: new_col.append(0)

implicit_df["a_learned"] = new_col

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["a_learned"] = new_col


In [31]:
# new columns encoding learned pairs of attended and unattended modalities
implicit_df["att_learned"] = np.where(implicit_df["modality"] == "visual", implicit_df["v_learned"], implicit_df["a_learned"])
implicit_df["ign_learned"] = np.where(implicit_df["modality"] == "visual", implicit_df["a_learned"], implicit_df["v_learned"])

implicit_df["att_learned"] = implicit_df["att_learned"].astype("str")
implicit_df["ign_learned"] = implicit_df["ign_learned"].astype("str")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["att_learned"] = np.where(implicit_df["modality"] == "visual", implicit_df["v_learned"], implicit_df["a_learned"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  implicit_df["ign_learned"] = np.where(implicit_df["modality"] == "visual", implicit_df["a_learned"], implicit_df["v_learned"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guid

## Filtering dataframe

Removing all initial trials of every block, and catch trials from the dataframe that will be used for the analyses

In [32]:
# We first remove all starting block trials, to remove first trial of a participant and the effect of the between block intervals.implicit_df
implicit_df = implicit_df[(implicit_df.ntrial != 0) & (implicit_df.catch==0)] 
implicit_df.reset_index(inplace=True, drop=True)

In [33]:
def RT_filter(df):
    # this function was used to filter those trials with a RT that are 3 standard deviations away from the mean
    rt_mean = df['RT'].mean()
    up_lim = rt_mean + 3 * df['RT'].std() # 
    x = df.loc[df['RT'] < up_lim, : ]
    return x #the same dataframe with RT filtered

In [34]:
# Filtering RT outliers (>3 std) within each participant
df_clean = implicit_df.groupby(["id"], as_index= False).apply(RT_filter).reset_index() 
implicit_df = df_clean.iloc[:,2:]

  df_clean = implicit_df.groupby(["id"], as_index= False).apply(RT_filter).reset_index()


## Adding modifications to response variables

In [35]:
# First we will have to eliminate trials were they responded catch by mistake
implicit_df = implicit_df[implicit_df["resp"] != "catch"]
implicit_df.reset_index(inplace=True,drop=True)

In [36]:
# Temporary column where diff value of same (non-target) trials is set to 0
implicit_df["change_r"] = np.where(implicit_df["target"]==0, 0, implicit_df["diff"])

In [37]:
# Rescaling change values from 0 to 1
dfmax = implicit_df.groupby(["id", "modality"], as_index=0)["change_r"].max()
new_col = []
for row in range(len(implicit_df)):
     subj = implicit_df.iloc[row,:]["id"]
     mod = implicit_df.iloc[row,:]["modality"]
     max = dfmax[(dfmax.id == subj) & (dfmax.modality == mod)]["change_r"]
     new_col.append(implicit_df.iloc[row,:]["change_r"] / max)

implicit_df["change"] = np.asarray(new_col) # If not passed as array for some reason it results in error when plotting

In [38]:
# pymer wants the response variable to be numeric
implicit_df["response"] = np.where(implicit_df["resp"] == "diferente", 1, 0)

# Saving dataframe 

In [39]:
implicit_df.to_csv("/media/wiseman/HDD/DMFgit/PredRelv/behav_analyses/data/exp1_implicit.csv")
learn_df.to_csv("/media/wiseman/HDD/DMFgit/PredRelv/behav_analyses/data/exp1_learn.csv")
explicit_df.to_csv("/media/wiseman/HDD/DMFgit/PredRelv/behav_analyses/data/exp1_explicit.csv")