The purpose of this script is to parse output from PsychoPy into fMRI log files (event timing files for each condition / stimulus, which can later be fed to FEAT). You must run this script individually for each participant

# Import necessaries

In [None]:
%pylab inline
import pandas as pd
import os
import numpy as np
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

pd.set_option('display.max_rows', 150)

In [None]:
# Define top dir
topDir = open('../top_dir_win.txt').read().replace('\n', '')

In [None]:
# Specify subject and input data
subj = "subject-030" 

inFilePath = topDir + "/behavioural_data/fmri_runs2/" + subj + '/'

# # Specify directories for output (and make directories if they don't alraedy exist)
outFilePath = topDir + "/MRIanalyses/" + 'assets/' + subj + '/' + subj + "_log_files/"
if not os.path.exists(outFilePath):
    os.makedirs(outFilePath)


# Load behavioural data from each run

In [None]:
# Define columns we want to read in
studyCols = ['cond','studied_word', 'studyWord.started']

# The easiest way to deal with multiple datasets is to define a function for reading in the data
def readData(run_n):
    inFile = inFile1 = inFilePath + subj + "_quickrun" + run_n + ".csv"
    df = pd.read_csv(inFile, usecols = studyCols)
    
    # Remoave NaNs
    df.dropna(inplace=True)
    
    # Rename columns for convenience 
    df.rename(columns={'cond':'CONDITION', 'studied_word':'WORD', 'studyWord.started': 'WORDON'}, inplace=True)
    
    # Add run #
    df['RUN'] = run_n
    
    return df

df1 = readData('1')
df2 = readData('2')
df3 = readData('3')
df4 = readData('4')

# Adjust stimulus timing
Note that stim timings are relative to hitting "start" in PsychoPy, however there is a variable (uncontrolled) period between "start" and the actual onset of trails/scans, because the experiment begins with an instruction/"please wait" screen. Therefore, we need to make timings relative to the onset of trials, which was triggered by the MRI tech hitting "s" as they initiated the scan

In [None]:
# Define a function to fix stim timing
def correctTimings(run_n):

    # Read in log file (which tells us exactly when the "s" was pressed, relative to PsychoPy starting)
    log_file = inFilePath + subj + '_quickrun' + run_n + '.log'
    
    
# Read in as list (each line is an element)
    f = open(log_file)
    log = f.readlines()
    
    # Pull out the line containing "Keypress: s" (there should only be one instance in the whole run)
    for i in log:
        if "Keypress: s" in i:
#             print(i)
            startLine = i
    
    # Extract timing
    absStart = float(startLine.split("\t")[0])
    
    # Call the df for this run
    df = eval('df' + run_n)
    
    # Subtract the absolute start time from WORDON. Also subtract the time for dummy scans (5 TRs, with each TR being 1.8 s)
    df['WORDON_adj'] = df['WORDON'] - (absStart + (1.8 * 5))
    
    # Assuming we will concatenate runs, we need to adjust the timing of runs 2, 3, and 4 as if these trials began immediately after the last run.
    # Each run contains 100 TRs of 1.8 s. Therefore we need to add 180 s for every preceding run.
    n_preceding = float(run_n)-1
    df['WORDON_4concat'] = df['WORDON_adj'] + (180*n_preceding)
    
    # Return the adjusted dataframe
    return df
            
# Use the function to adjust stim timing in each df. Note that the first timepoint should always begin at 
# approximately 1.5 seconds, because the first cue (which ought to start at timepoint 0) lasts 1.5 seconds.
df1_adj = correctTimings('1')
df2_adj = correctTimings('2')
df3_adj = correctTimings('3')
df4_adj = correctTimings('4')



In [None]:
# Take a look
# df2_adj

In [None]:
# Concatenate the df's from the 4 runs
df = pd.concat([df1_adj,df2_adj,df3_adj,df4_adj])

# Add word duration
df['wordDuration'] = 2.5

# Reset index
df.reset_index(inplace=True)
df.drop(axis=1,labels='index', inplace=True)

# Quality checking! 
Confirm that each word appears 4 times, and that each word is always in the same condition

In [None]:
# Confirm that df contains 120 rows (30 words X 4 runs)
if len(df)==120:
    print("120 rows in df - all good!")
else:
    print("WARNING - there are", len(df), "rows in df!")

In [None]:
# Confirm that every word occurs 4 times (will only show printout if there is a problem)
for i in list(df['WORD']):
    if list(df['WORD']).count(i)!=4:
        print('WARNING: ', i, "appears", list(df['WORD']).count(i), "times!")

In [None]:
# Confirm that every word is always in the same condition. 

# To do this, we need to make a copy of df containing only WORD and CONDITION
df_debug = df[['WORD','CONDITION']].copy()

# Optional: Try artificially messing with word-condition mapping, to test whether this works 
# df_debug.CONDITION.iloc[[0,5,7,11,19,55]] = 'aloud'


x = df_debug.loc[~df_debug.sort_values(["WORD", "CONDITION"]).duplicated(keep=False)]

if x.empty:
    print("Each word is always in the same condition")
else:
    print("WARNING:",len(x)," word(s) appeared in two conditions")


In [None]:
# We can also check manually, if desired
# df.sort_values(["WORD","CONDITION"])

# Generate condition-wise output log files for FSL


In [None]:
# Loop through runs and conditions
conditions = ['aloud','silent']
runs = ['1','2','3','4']

for run in runs:
    for cond in conditions:
        dfTmp = df[df['CONDITION'].isin([cond]) & df['RUN'].isin([run])][['WORDON_adj','wordDuration']]
        dfTmp['col3'] = 1
        dfTmp.to_csv(outFilePath + '/' + subj + '_quickread' + str(run) + '_' + cond + '_alltrials.txt', sep='\t', header=False, index=False) 

In [None]:
# # Do the same for concatenated runs
# for cond in conditions:
#     dfTmp = df[df['CONDITION'].isin([cond])][['WORDON_4concat','wordDuration']]
#     dfTmp['col3'] = 1
#     dfTmp.to_csv(outFilePath + '/' + subj + '_quickread_concat' + '_' + cond + '.txt', sep='\t', header=False, index=False) 

# Generate trial-wise output log files for FSL

In [None]:
# Loop over the list of words, saving a separate log file for each word/run combination. 
wordList = list(set(df['WORD']))

for word in wordList:
    for cond in conditions:
        for run in runs:
            dfTmp = df[df['CONDITION'].isin([cond]) & df['RUN'].isin([run]) & df['WORD'].isin([word])][['WORDON_adj','wordDuration']]
            
            # Python will try to make a file for every possible combination of word, condition, and run. This means we get 2X the number of
            # files we want, because it tries to match every word to both silent and aloud, resulting in one empty dfTmp for every word. To avoid
            # this, skip every iteration where dfTmp is empty.
            if dfTmp.empty:
                continue
            
            dfTmp['col3'] = 1
            dfTmp.to_csv(outFilePath + '/' + subj + '_quickread' + str(run) + '_' + cond + '_' + word + '.txt', sep='\t', header=False, index=False) 


In [None]:
# Loop over words again, but this time save a log file for each word, containing times for all 4 runs
# for word in wordList:
#     for cond in conditions:
#         for run in runs:
#             dfTmp = df[df['CONDITION'].isin([cond]) & df['WORD'].isin([word])][['WORDON_4concat','wordDuration']]
            
#             # Python will try to make a file for every possible combination of word & condition. This means we get 2X the number of
#             # files we want, because it tries to match every word to both silent and aloud, resulting in one empty dfTmp for every word. To avoid
#             # this, skip every iteration where dfTmp is empty.
#             if dfTmp.empty:
#                 continue
            
#             dfTmp['col3'] = 1
#             dfTmp.to_csv(trialWisePath_concat + '/' + subj + '_quickread_concat' + '_' + cond + '_' + word + '.txt', sep='\t', header=False, index=False) 
