The purpose of this script is to generate .csv files containing behavioural summary statistics (mean accuracy and RT) for each condition in the PE experiment

# Import necessaries

In [None]:
%pylab inline
import pandas as pd
import os
import numpy as np
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))
import itertools

pd.set_option('display.max_rows', 150)

In [None]:
# Define top dir
topDir = open('../top_dir_win.txt').read().replace('\n', '')

# Define path to behavioural data, subjects and conditions

In [None]:
#### Specify subject and input data
subjects = ['subject-002', 'subject-003', 'subject-004', 'subject-005', 'subject-006',
            'subject-007', 'subject-008', 'subject-009', 'subject-010', 'subject-011', 'subject-012',
            'subject-013', 'subject-014', 'subject-015', 'subject-016', 'subject-017', 'subject-018',
           'subject-019', 'subject-020', 'subject-021', 'subject-022', 'subject-023', 'subject-024',
           'subject-025', 'subject-026', 'subject-027', 'subject-028', 'subject-029', 'subject-030']

conditions = ['aloud', 'silent', 'foil']



behav_data_path = topDir + "/behavioural_data/"
in_file_path = behav_data_path + "/fmri_runs1/" 

# Specify directories for output (and make directories if they don't alraedy exist)
out_file_path = behav_data_path + "/PE_behavioural_summary_stats"
if not os.path.exists(out_file_path):
    os.makedirs(out_file_path)


# Make summary stats files (mean accuracy & mean rt) for each condition

In [None]:
# Loop through conditions, creating a summary stats dataframe for each
for condition in conditions:

    # Create empty dta frame to hold our summary stats
    summary_stats = pd.DataFrame(columns = ['subject_id', 'condition', 'mean_accuracy', 'mean_rt'])

    # Loop through subjects. For each subject, we will read in their behavioural data, 
    # compute mean accuracy and rt, and append their data to our summary stats dataframe
    for idx, subject in enumerate(subjects):

        # Define path to behavioural csv for this subject, read in the data
        in_file = in_file_path + subject + '/' + subject + "_test.csv" 
        cols = ['test_cond', 'tested_word', 'test_resp.corr', 'test_resp.rt']  
        df = pd.read_csv(in_file, usecols = cols)

        # Rename columns for convenience 
        df.rename(columns={'test_cond':"CONDITION", 'tested_word':'WORD', 'test_resp.corr': 'ACC', 'test_resp.rt': 'RT'}, inplace=True)        

        # Remove NaNs
        df.dropna(subset = ['WORD'], inplace=True)
        
        # Grab the data for this condition, compute mean accuracy and rt
        df_condition = df[df["CONDITION"] == condition]

        mean_acc = mean(df_condition['ACC'])
        mean_rt = mean(df_condition['RT'])
        
        # Append mean values to the summary stats dataframe
        summary_stats.loc[idx] = [subject, condition, mean_acc, mean_rt]
        
    # Save summary stats to disk 
    summary_stats.to_csv(out_file_path + '/' + condition + '_behav_summary_stats.csv', index = False)



# Make summary stats file for Production Effect (aloud - silent)

In [None]:
# Create empty dta frame to hold our summary stats
summary_stats_PE = pd.DataFrame(columns = ['subject_id', 'condition', 'mean_accuracy', 'mean_rt'])

# Loop through subjects. For each subject, get mean accuracy and rt for BOTH aloud and silent, 
# compute aloud-silent (i.e. magnitude of PE), and append to our PE dataframe
for idx, subject in enumerate(subjects):

    # Define path to behavioural csv for this subject, read in the data
    in_file = in_file_path + subject + '/' + subject + "_test.csv" 
    cols = ['test_cond', 'tested_word', 'test_resp.corr', 'test_resp.rt']  
    df = pd.read_csv(in_file, usecols = cols)

    # Rename columns for convenience 
    df.rename(columns={'test_cond':"CONDITION", 'tested_word':'WORD', 'test_resp.corr': 'ACC', 'test_resp.rt': 'RT'}, inplace=True)        
    
    # Remove NaNs
    df.dropna(subset = ['WORD'], inplace=True)
    
    # Grab the data for this condition, compute mean accuracy and rt
    df_aloud = df[df["CONDITION"] == 'aloud']
    df_silent = df[df["CONDITION"] == 'silent']
        
    # Compute accuracy difference
    mean_acc_aloud = mean(df_aloud['ACC'])
    mean_acc_silent = mean(df_silent['ACC'])
    
    mean_acc_PE = mean_acc_aloud - mean_acc_silent
    
    # Do the same for rt
    mean_rt_aloud = mean(df_aloud['RT'])
    mean_rt_silent = mean(df_silent['RT'])
    
    mean_rt_PE = mean_rt_aloud - mean_rt_silent    
    
    
    # Append mean values to the summary stats dataframe
    summary_stats_PE.loc[idx] = [subject, 'PE', mean_acc_PE, mean_rt_PE]
    
# Save to disk 
summary_stats_PE.to_csv(out_file_path + '/PE_behav_summary_stats.csv', index = False)

# Make behavioural dsms

In [None]:
for subject in subjects: 
    
    # Define path to behavioural csv for this subject, read in the data
    in_file = in_file_path + subject + '/' + subject + "_test.csv" 
    cols = ['test_cond', 'tested_word', 'test_resp.corr', 'test_resp.rt']  
    df = pd.read_csv(in_file, usecols = cols)


    # Rename columns for convenience 
    df.rename(columns={'test_cond':"CONDITION", 'tested_word':'WORD', 'test_resp.corr': 'ACC', 'test_resp.rt': 'RT'}, inplace=True)
    
    # Remove NaNs
    df.dropna(subset = ['WORD'], inplace=True)
    
    for condition in ['aloud']:
        
        # Define output paths
        rt_out_path = "../../MRIanalyses/assets/" + subject + "/" + subject + "_" + condition + "_rt_raw.csv"
        dsm_out_path = "../../MRIanalyses/assets/" + subject + "/" + subject + "_" + condition + "_rt_dsm.csv"

        this_df = df[df['CONDITION']==condition]
        
        

        # Save raw RTs for each item to disk
        this_df[['WORD', 'RT']].to_csv(rt_out_path, index = False)

        # Grab list of words and compute all pairs
        word_list = sorted(list(this_df['WORD']))
        pairs = itertools.permutations(word_list, 2)

        # Define an empty matrix, containing a row/column for ever word
        matrix=pd.DataFrame(index=word_list, columns=word_list)

        # Loop through pairs
        for pair in pairs:

            w1 = pair[0]
            w2 = pair[1]

            # Get rt for each word in the pair
            rt1 = float(this_df.loc[this_df['WORD']==w1]['RT'])
            rt2 = float(this_df.loc[this_df['WORD']==w2]['RT'])

            # Compute difference and slot into our matrix
            distance = abs(rt1-rt2)
            matrix.loc[matrix.index==pair[0], matrix.columns==pair[1]]=distance

            # Enter zeros along the diagonal
            matrix.loc[matrix.index==pair[0], matrix.columns==pair[0]]=0

        # Save matrix to disk
        matrix.to_csv(dsm_out_path)

