# Data Preprocessing for Experiment

In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
path = r'C:\Users\L\Google Drive\PhD\experiments\ATTMEM\ATTMEM_pilot_0\experiment\raw_data'


In [3]:
# get all files in folder
files = [f'{path}\\{f}' for f in os.listdir(path) if f.split('.')[-1] == 'log']


In [5]:
#%% Aggregate the data
encoding_files = []
retrieval_files = []

for f in files:
    
    # Parse the filename
    fname = f.split('\\')[-1].split('.')[0]
    fparts = fname.split('_')
    subject_number, exp_version = [int(p) for p in fparts[0].split('-')]
    experiment_phase = fparts[2]
    block_number = int(fparts[-1])
    
    # Get the file write date & time
    with open(f) as fp:
        for i, line in enumerate(fp):
            if i == 1:
                if 'Logfile written' in line:
                    date, time = line.strip().split(' ')[-2:]
                break
             
    df = pd.read_csv(f, sep='\t', skiprows=3)
    
    df['write_time'] = date + ' ' + time
    df['write_time'] = pd.to_datetime(df['write_time'])
    df['version'] = exp_version
    df['phase'] = experiment_phase
    df['block'] = block_number
    
    # Strip out first few rows until first stimulus event
    start_idx = df.loc[(df['Event Type']=='Picture') & (df['Code']!='GETTING READY')].index[0]
    df = df.iloc[start_idx:]
    
    
    if experiment_phase == 'encoding':
        encoding_files.append(df)
    elif experiment_phase == 'retrieval':
        retrieval_files.append(df)

# Concatenate all files together
df1 = pd.concat(encoding_files).reset_index(drop=True)
df2 = pd.concat(retrieval_files).reset_index(drop=True)


In [6]:
df1.head()

Unnamed: 0,Subject,Trial,Event Type,Code,CONDITION(num),SCREEN_POSITION(num),IMAGE_ID(num),TARGET_NAME(str),DIST_1_NAME(str),DIST_2_NAME(str),...,Duration,Uncertainty.1,ReqTime,ReqDur,Stim Type,Pair Index,write_time,version,phase,block
21,1.0,6.0,Picture,"3,1,5,honeybee,crocodile,ladybug02,zebra",3.0,1.0,5.0,honeybee,crocodile,ladybug02,...,27160.0,1.0,13000.0,27000.0,hit,24.0,2019-11-03 08:14:36,1,encoding,1
22,1.0,6.0,Pulse,99,,,,,,,...,,,,,,,2019-11-03 08:14:36,1,encoding,1
23,1.0,6.0,Response,1,,,,,,,...,,,,,,,2019-11-03 08:14:36,1,encoding,1
24,1.0,6.0,Pulse,99,,,,,,,...,,,,,,,2019-11-03 08:14:36,1,encoding,1
25,1.0,6.0,Pulse,99,,,,,,,...,,,,,,,2019-11-03 08:14:36,1,encoding,1
