Importing necessary modules

In [141]:
from pathlib import Path
import pandas as pd
import numpy as np

Accessing csv files to analyse. To use the example csv, download it and substitute "..." with the path of the folder containing it on your computer

In [163]:

# Define the exclude_outliers function
def exclude_outliers(df, column_name, lower_bound, lower_std_devs, upper_std_devs):
    mean = df[column_name].mean()
    std_dev = df[column_name].std()
    
    lower_std_bound = mean - lower_std_devs * std_dev
    upper_std_bound = mean + upper_std_devs * std_dev

    filtered_df = df[(df[column_name] >= lower_bound) & (df[column_name] >= lower_std_bound) & (df[column_name] <= upper_std_bound)]
    return filtered_df

# Get a list of all CSV files in the folder
csv_files = [file for file in Path('...').iterdir() if file.suffix == '.csv']


Initializing empty data frame and selecting columns of interest

In [164]:

# Initialize an empty DataFrame to store the final output
output_df = pd.DataFrame()

# Extract columns of interest
columns_of_interest = ["participant", "distPres", "targPos", 
                        "distPos", "distCol", "key_resp_2.corr", 
                        "key_resp_2.rt", "trial.thisN"]


Reading every csv with a for loop, performing necessary analyses

In [165]:
for csv_file in csv_files:
    # Read the CSV file into a DataFrame
    try:
        df = pd.read_csv(csv_file)
    except pd.errors.EmptyDataError:
        # Skip empty file
        continue

    # Select only the columns of interest
    df = df[columns_of_interest]

    # Calculate accuracy
    accuracy = df["key_resp_2.corr"].mean()

    if accuracy > 0.70:
        # Omit practice and NA trials based on trial.thisN column
        df = df.dropna(subset=["trial.thisN"])

        # Exclude outliers in 'key_resp_2.rt' column
        df = exclude_outliers(df, 'key_resp_2.rt', 0.200, 2.5, 2.5)        

        #find distractor present and absent trials
        df_dist = df[df["distPres"] == "2"]
        df_nodist = df[df["distPres"] == "FALSE"]

        # Distractor position
        dist_pos = df_dist["distPos"].mean()
        
        #find mean of no distractor positions
        mean_nodist = df_nodist[df_nodist["key_resp_2.corr"] == 1]["key_resp_2.rt"].mean()

        # Find mean of distractor present positions
        mean_distpres = df_dist[df_dist["key_resp_2.corr"] == 1]["key_resp_2.rt"].mean()

        # Target LOC EFFECT

        # Target when it appears in distractor position
        target_distpos = df_nodist[(df_nodist["targPos"] == dist_pos)]["key_resp_2.rt"].mean()

        # Target when it appears in no distractor positions
        target_nodistpos = df_nodist[(df_nodist["targPos"] != dist_pos)]["key_resp_2.rt"].mean()

        # Accuracy

        # Find accuracy of no distractor positions
        acc_nodist = df_nodist["key_resp_2.corr"].mean()

        # Find accuracy of distractor present positions
        acc_distpres = df_dist["key_resp_2.corr"].mean()

        # Participant
        n_part = df["participant"].iloc[1]

        # Create a DataFrame with the calculated values
        df_means_and_accs = pd.DataFrame({
            "n_part": [n_part],
            "accuracy": [accuracy],
            "mean_nodist": [mean_nodist],
            "mean_distpres": [mean_distpres],
            "target_nodistpos": [target_nodistpos],
            "target_distpos": [target_distpos],
            "acc_nodist": [acc_nodist],
            "acc_distpres": [acc_distpres]
        })

        # Append the current subset DataFrame to the final output DataFrame
        output_df = pd.concat([output_df, df_means_and_accs])

Writing output in another csv

In [166]:
# Write the final output DataFrame to a CSV file
output_csv_file = Path("...")
output_df.to_csv(output_csv_file, index=False)
