# Crossmodal Capture Study Data Cleaning

In [1]:
#importing necessary packages
import pandas as pd
import numpy as np
from glob import glob
import AnalysisHelper as ah

Import Behavioral Data

In [2]:
MSDraw = ah.import_all_csvs('/Users/sheaduarte/Desktop/MSduck_frog22220/data/')

In [3]:
#create variable to check accuracy
MSDraw['corrCheck'] = MSDraw['corrAns'] == MSDraw['key_resp_2.keys']


In [4]:
#remove practice trials from dataframe
#MSD_minusPrac is the dataset that includes outliers and includes incorrect trials, it does not include practice trials
MSD_minusPrac = MSDraw[MSDraw.expLab != 'prac'].loc[:, ['targetAnim', 'picL', 'picR', 'visualR','visualL','condLabel',
                                                        'corrAns','audioPos','exp1a.thisRepN','exp1a.thisTrialN',
                                                        'exp1a.thisN','exp1a.thisIndex','key_resp_2.keys',
                                                        'key_resp_2.rt','date','frameRate','expName','participant', 
                                                        'corrCheck']]

In [5]:
#rename columns
MSD_minusPrac = MSD_minusPrac.rename(columns = {'picL': 'left anim', 'picR':'right anim', 'visualR':'right landolt','visualL':'left landolt',
                         'condLabel': 'condition', 'exp1a.thisRepN': 'repetition','exp1a.thisTrialN': 'trial in rep',
                         'exp1a.thisN': 'trial','exp1a.thisIndex':'trial idx','key_resp_2.keys':'key pressed',
                         'key_resp_2.rt':'reaction time'})

In [6]:
# apply AnalysisHelper function to get accuracy for each participant
MSDraw.groupby('participant').apply(ah.SjAcc)

0.9629629629629629
0.9894179894179894
0.9488536155202821
0.9541446208112875
0.9453262786596119
0.9858906525573192
0.9876543209876543
0.9682539682539683
0.9929453262786596
0.9717813051146384


In [7]:
MSDall = MSD_minusPrac.groupby(['participant','condition']).apply(ah.remove_outliers, var='reaction time')
#Add one to every trial number so that it starts at 1, not 0
MSDall['trial'] += 1
#MSDall has correct and incorrect trials, use only correct trials for most analyses
MSD_behavioral = MSDall[MSDall['corrCheck'] == True]

In [8]:
#reset indices
MSDall = MSDall.reset_index(drop=True)
MSDall.to_csv('/Users/sheaduarte/Desktop/MSduck_frog22220/MSDall.csv')
MSD_behavioral = MSD_behavioral.reset_index(drop=True)
MSDall

Unnamed: 0,targetAnim,left anim,right anim,right landolt,left landolt,condition,corrAns,audioPos,repetition,trial in rep,trial,trial idx,key pressed,reaction time,date,frameRate,expName,participant,corrCheck
0,frog,duckFront.png,frogFront.png,landoltCTop.png,landoltCL.png,Hcong,h,ribbitR.wav,0.0,13.0,14.0,60.0,h,0.800653,2020_Feb_26_0910,59.952707,duckFrog,2,True
1,frog,duckFront.png,frogFront.png,landoltCBottom.png,landoltCL.png,Hcong,b,ribbitR.wav,0.0,18.0,19.0,36.0,b,0.800833,2020_Feb_26_0910,59.952707,duckFrog,2,True
2,frog,frogFront.png,duckFront.png,landoltCR.png,landoltCTop.png,Hcong,h,ribbitL.wav,0.0,23.0,24.0,40.0,h,0.483786,2020_Feb_26_0910,59.952707,duckFrog,2,True
3,frog,frogFront.png,duckFront.png,landoltCL.png,landoltCBottom.png,Hcong,b,ribbitL.wav,0.0,26.0,27.0,64.0,b,0.850971,2020_Feb_26_0910,59.952707,duckFrog,2,True
4,frog,duckFront.png,frogFront.png,landoltCBottom.png,landoltCL.png,Hcong,b,ribbitR.wav,0.0,27.0,28.0,68.0,b,0.667272,2020_Feb_26_0910,59.952707,duckFrog,2,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5376,frog,duckFront.png,frogFront.png,landoltCBottom.png,landoltCR.png,LinCongWN,b,wnL.wav,5.0,69.0,470.0,23.0,b,0.967617,2020_Mar_11_1110,59.950336,duckFrog,16,True
5377,frog,frogFront.png,duckFront.png,landoltCL.png,landoltCTop.png,LinCongWN,h,wnR.wav,6.0,4.0,485.0,27.0,h,1.200920,2020_Mar_11_1110,59.950336,duckFrog,16,True
5378,frog,duckFront.png,frogFront.png,landoltCBottom.png,landoltCR.png,LinCongWN,b,wnL.wav,6.0,5.0,486.0,23.0,b,1.067533,2020_Mar_11_1110,59.950336,duckFrog,16,True
5379,frog,duckFront.png,frogFront.png,landoltCTop.png,landoltCR.png,LinCongWN,h,wnL.wav,6.0,40.0,521.0,31.0,h,1.267934,2020_Mar_11_1110,59.950336,duckFrog,16,True


Import Eyetracking Data

In [9]:
#import all CSV files for behavioral data from data folder to master raw dataframe
MSDeyeFiles = glob('/Users/sheaduarte/Desktop/MSduck_frog22220/eyetracking_data/*.csv')
MSDeyeRaw = pd.concat([pd.read_csv(f) for f in MSDeyeFiles ])

In [10]:
#rename columns of eye df for readability
MSDeyeRaw = MSDeyeRaw.rename(columns = {'CURRENT_FIX_DURATION':'current_fix_dur', 
                                        'CURRENT_FIX_INDEX':'current_fix_idx',
                                        'CURRENT_FIX_INTEREST_AREA_DWELL_TIME': 'current_IA_dwell',
                                        'CURRENT_FIX_INTEREST_AREA_FIX_COUNT': 'current_IA_fix_count', 
                                        'CURRENT_FIX_INTEREST_AREA_ID': 'current_IA_id',
                                        'CURRENT_FIX_INTEREST_AREA_INDEX': 'current_IA_idx', 
                                        'CURRENT_FIX_INTEREST_AREA_LABEL':'current_fix_label',
                                        'CURRENT_FIX_NEAREST_INTEREST_AREA': 'nearest_IA', 
                                        'CURRENT_FIX_NEAREST_INTEREST_AREA_DISTANCE':'nearest_IA_distance',
                                        'CURRENT_FIX_NEAREST_INTEREST_AREA_LABEL':'nearest_IA_label',
                                        'CURRENT_FIX_RUN_DWELL_TIME':'current_fix_run_dwell',
                                        'CURRENT_FIX_START': 'current_fix_start',
                                        'RECORDING_SESSION_LABEL':'participant',
                                        'TRIAL_FIXATION_TOTAL':'total_fixations',
                                        'TRIAL_INDEX':'trial'})

In [11]:
MSD_eye = ah.EyeTracking_MasterDF(MSDeyeRaw, currentFixationIdx = 'current_fix_idx', nearestIAVar = 'nearest_IA_label', currentFixDurationVar = 'current_fix_dur', fixationTotalVar = 'total_fixations')

Merge behavioral and eye data

In [12]:
MSD = pd.merge(MSD_eye, MSD_behavioral, on =['trial', 'participant'], how = 'outer')

In [13]:
#create CSV of Clean Data
MSD.to_csv('/Users/sheaduarte/Desktop/MSduck_frog22220/MSD.csv')