# Crossmodal Capture Study Data Cleaning

In [1]:
#importing necessary packages
import pandas as pd
import numpy as np
from glob import glob
import os

In [2]:
# changes current working directory
os.chdir(os.path.dirname(os.path.abspath('/Users/sheaduarte/Desktop/School/PSC290python/final/AnalysisHelper.py')))

In [3]:
import AnalysisHelper as ah
from AnalysisHelper import EyeTrackingHelper

### Import Behavioral Data

In [4]:
# import raw multisensory experiment (MSD) data
MSDraw = ah.import_all_csvs('/Users/sheaduarte/Desktop/School/PSC290python/final/duarte/data/behavioral')

In [5]:
# create variable in MSD to check accuracy
MSDraw['corrCheck'] = MSDraw['corrAns'] == MSDraw['key_resp_2.keys']

Clean it up:
- filter columns
- rename columns
- add some grouping columns that I forgot to specify when building the experiment

In [6]:
# remove practice trials, filter columns
MSD_minusPrac = MSDraw[MSDraw.expLab != 'prac']
MSD_minusPrac = ah.filter_columns('/Users/sheaduarte/Desktop/School/PSC290python/final/duarte/Shea_keep_columns.txt', MSD_minusPrac)
MSD_minusPrac = ah.rename_columns('/Users/sheaduarte/Desktop/School/PSC290python/final/duarte/Shea_column_rename.txt', MSD_minusPrac)

In [7]:
# add sound type column
def Add_SoundTypeCol(df, condition = 'condition'):
    df['sound_type'] = (df.condition == 'Hcong') | (df.condition == 'HinCong')|(df.condition == 'Lcong') | (df.condition == 'LinCong')
    df['sound_type'] = df['sound_type'].astype(int)
    df['sound_type'] = df['sound_type'].replace({1: 'Animal', 0: 'White Noise'})
    return df

MSD_minusPrac = Add_SoundTypeCol(MSD_minusPrac)

def Add_SoundLocCol(df, condition = 'condition'):
    df['sound_loc'] = df['condition']
    df['sound_loc'] = df.sound_loc.replace({'Hcong': 'Duck1', 'HcongWN': 'Duck1', 'HinCong': 'Frog1', 'HinCongWN': 'Frog1', 'Lcong': 'Frog2',
                                            'LcongWN': 'Frog2', 'LinCong': 'Duck2', 'LinCongWN': 'Duck2' })
    return df
MSD_minusPrac = Add_SoundLocCol(MSD_minusPrac)
MSD_minusPrac

Unnamed: 0,targetAnim,left_anim,right_anim,right_landolt,left_landolt,condition,corrAns,audioPos,repetition,trial_in_rep,...,trial_idx,key_pressed,reaction_time,date,frameRate,expName,participant,corrCheck,sound_type,sound_loc
0,,,,,,,,,,,...,,,,2020_Mar_04_1508,59.971444,duckFrog,12,False,White Noise,
6,,,,,,,,,,,...,,,,2020_Mar_04_1508,59.971444,duckFrog,12,False,White Noise,
7,frog,duckFront.png,frogFront.png,landoltCTop.png,landoltCL.png,Hcong,h,ribbitR.wav,0.0,0.0,...,28.0,h,0.684346,2020_Mar_04_1508,59.971444,duckFrog,12,True,Animal,Duck1
8,frog,duckFront.png,frogFront.png,landoltCBottom.png,landoltCR.png,HinCongWN,b,wnL.wav,0.0,1.0,...,23.0,b,1.418252,2020_Mar_04_1508,59.971444,duckFrog,12,True,White Noise,Frog1
9,frog,duckFront.png,frogFront.png,landoltCTop.png,landoltCL.png,HinCong,h,quackL.wav,0.0,2.0,...,46.0,h,1.218290,2020_Mar_04_1508,59.971444,duckFrog,12,True,Animal,Frog1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
562,duck,frogFront.png,duckFront.png,landoltCTop.png,landoltCR.png,HinCong,h,ribbitL.wav,6.0,75.0,...,6.0,h,0.784165,2020_Feb_26_1011,59.958637,duckFrog,3,True,Animal,Frog1
563,duck,frogFront.png,duckFront.png,landoltCTop.png,landoltCL.png,HcongWN,h,wnR.wav,6.0,76.0,...,5.0,h,1.051047,2020_Feb_26_1011,59.958637,duckFrog,3,True,White Noise,Duck1
564,frog,duckFront.png,frogFront.png,landoltCTop.png,landoltCL.png,LinCong,h,quackL.wav,6.0,77.0,...,30.0,h,0.784375,2020_Feb_26_1011,59.958637,duckFrog,3,True,Animal,Duck2
565,duck,frogFront.png,duckFront.png,landoltCBottom.png,landoltCR.png,HinCong,b,ribbitL.wav,6.0,78.0,...,62.0,b,0.650831,2020_Feb_26_1011,59.958637,duckFrog,3,True,Animal,Frog1


In [8]:
# quick accuracy check
MSD_minusPrac.groupby('participant').apply(ah.SjAcc)

0.9626334519572953
0.9893238434163701
0.9483985765124555
0.9537366548042705
0.9466192170818505
0.9857651245551602
0.9875444839857651
0.9679715302491103
0.9928825622775801
0.9750889679715302


In [9]:
# clean data: remove outliers, reset trial #s, reset indices
MSDall = MSD_minusPrac.groupby(['participant','condition']).apply(ah.remove_outliers, var='reaction_time')
MSDall['trial'] += 1
MSD_behavioral = MSDall[MSDall['corrCheck'] == True]
MSDall = MSDall.reset_index(drop=True)
MSD_behavioral = MSD_behavioral.reset_index(drop=True)

# save MSDall to csv
MSDall.to_csv('/Users/sheaduarte/Desktop/School/PSC290python/final/duarte/data/interim/MSDall.csv')

In [10]:
MSD_behavioral

Unnamed: 0,targetAnim,left_anim,right_anim,right_landolt,left_landolt,condition,corrAns,audioPos,repetition,trial_in_rep,...,trial_idx,key_pressed,reaction_time,date,frameRate,expName,participant,corrCheck,sound_type,sound_loc
0,frog,duckFront.png,frogFront.png,landoltCTop.png,landoltCL.png,Hcong,h,ribbitR.wav,0.0,13.0,...,60.0,h,0.800653,2020_Feb_26_0910,59.952707,duckFrog,2,True,Animal,Duck1
1,frog,duckFront.png,frogFront.png,landoltCBottom.png,landoltCL.png,Hcong,b,ribbitR.wav,0.0,18.0,...,36.0,b,0.800833,2020_Feb_26_0910,59.952707,duckFrog,2,True,Animal,Duck1
2,frog,frogFront.png,duckFront.png,landoltCR.png,landoltCTop.png,Hcong,h,ribbitL.wav,0.0,23.0,...,40.0,h,0.483786,2020_Feb_26_0910,59.952707,duckFrog,2,True,Animal,Duck1
3,frog,frogFront.png,duckFront.png,landoltCL.png,landoltCBottom.png,Hcong,b,ribbitL.wav,0.0,26.0,...,64.0,b,0.850971,2020_Feb_26_0910,59.952707,duckFrog,2,True,Animal,Duck1
4,frog,duckFront.png,frogFront.png,landoltCBottom.png,landoltCL.png,Hcong,b,ribbitR.wav,0.0,27.0,...,68.0,b,0.667272,2020_Feb_26_0910,59.952707,duckFrog,2,True,Animal,Duck1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5253,frog,duckFront.png,frogFront.png,landoltCBottom.png,landoltCR.png,LinCongWN,b,wnL.wav,5.0,69.0,...,23.0,b,0.967617,2020_Mar_11_1110,59.950336,duckFrog,16,True,White Noise,Duck2
5254,frog,frogFront.png,duckFront.png,landoltCL.png,landoltCTop.png,LinCongWN,h,wnR.wav,6.0,4.0,...,27.0,h,1.200920,2020_Mar_11_1110,59.950336,duckFrog,16,True,White Noise,Duck2
5255,frog,duckFront.png,frogFront.png,landoltCBottom.png,landoltCR.png,LinCongWN,b,wnL.wav,6.0,5.0,...,23.0,b,1.067533,2020_Mar_11_1110,59.950336,duckFrog,16,True,White Noise,Duck2
5256,frog,duckFront.png,frogFront.png,landoltCTop.png,landoltCR.png,LinCongWN,h,wnL.wav,6.0,40.0,...,31.0,h,1.267934,2020_Mar_11_1110,59.950336,duckFrog,16,True,White Noise,Duck2


### Import Eyetracking Data

In [11]:
# import eyetracking csv files
MSDeyeRaw = ah.import_all_csvs('/Users/sheaduarte/Desktop/School/PSC290python/final/duarte/data/eyetracking')

In [12]:
# filter columns
MSDeyeRaw = ah.filter_columns('/Users/sheaduarte/Desktop/School/PSC290python/final/duarte/SheaEyetracking_keep_columns.txt', MSDeyeRaw)
MSDeyeRaw = ah.rename_columns('/Users/sheaduarte/Desktop/School/PSC290python/final/duarte/SheaEyetracking_rename_columns.txt', MSDeyeRaw)

In [13]:
# create EyeTrackingHelper object
MSDeye = ah.EyeTrackingHelper(MSDeyeRaw)
MSDeye.RawEyeDF

Unnamed: 0,participant,trial,total_fixations,current_fix_dur,current_fix_idx,current_IA_id,current_IA_label,running_dwell_time,current_fix_IA_fixcount,current_fix_dur.1,current_fix_start_time,nearest_IA,nearest_IA_distance,nearest_IA_label
0,4,1,7,32,1,.,.,.,.,.,0,.,.,.
1,4,1,7,1256,2,.,.,.,.,.,48,.,.,.
2,4,1,7,352,3,.,.,.,.,.,1380,.,.,.
3,4,1,7,428,4,.,.,.,.,.,1752,.,.,.
4,4,1,7,288,5,.,.,.,.,.,2828,.,.,.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2038,10,559,3,176,3,2,targetIA,394,2,394,1110,2,1.11,targetIA
2039,10,560,4,444,1,1,fixationIA,444,1,444,4,1,0.58,fixationIA
2040,10,560,4,286,2,2,targetIA,634,3,634,526,2,0.66,targetIA
2041,10,560,4,136,3,2,targetIA,634,3,634,828,2,1.11,targetIA


In [14]:
# create master eye df
# note: this study uses a fixation interest area, so we will only store this as a tmp dataframe so that we can also use our first fixation method
tmp = MSDeye.CleanEyeTracking_MasterDF(currentFixationIdx = 'current_fix_idx', nearestIAVar = 'nearest_IA_label', currentFixDurationVar='current_fix_dur', fixationTotalVar='total_fixations')

In [15]:
MSD_eye = MSDeye.Add_First_Fixation(df = tmp, fixation1Var = 'fixation1',fixation2Var= 'fixation2', fixation3Var='fixation3', fixDuration1='fix_dur_1', fixDuration2='fix_dur_2', fixDuration3='fix_dur_3')

### Merge behavioral and eye tracking data

In [22]:
# merge behavioral and eye tracking, send to csv
MSD = pd.merge(MSD_eye, MSD_behavioral, on =['trial', 'participant'], how = 'outer')
MSD.to_csv('/Users/sheaduarte/Desktop/School/PSC290python/final/duarte/data/processed/MSD.csv')

MSD.shape

(5600, 33)

In [25]:
# check over the data
pd.set_option('display.max_columns', None)
MSD.head(20)

Unnamed: 0,trial,participant,total_fixations,latency,fixation0,fixation1,fix_dur_1,fixation2,fix_dur_2,fixation3,fix_dur_3,fixation4,first_fixation,first_fix_dwell,targetAnim,left_anim,right_anim,right_landolt,left_landolt,condition,corrAns,audioPos,repetition,trial_in_rep,trial_idx,key_pressed,reaction_time,date,frameRate,expName,corrCheck,sound_type,sound_loc
0,1,4,7,32,.,.,1256.0,.,352.0,.,428.0,.,.,1256.0,,,,,,,,,,,,,,,,,,,
1,2,4,10,1632,fixationIA,targetIA,136.0,targetIA,204.0,targetIA,540.0,targetIA,targetIA,136.0,,,,,,,,,,,,,,,,,,,
2,3,4,4,1148,fixationIA,targetIA,180.0,targetIA,280.0,targetIA,428.0,,targetIA,180.0,frog,frogFront.png,duckFront.png,landoltCL.png,landoltCTop.png,HcongWN,h,wnL.wav,0.0,2.0,41.0,h,1.568276,2020_Feb_26_1123,59.966808,duckFrog,True,White Noise,Duck1
3,4,4,4,1332,fixationIA,distIA,576.0,targetIA,132.0,targetIA,1228.0,,distIA,576.0,,,,,,,,,,,,,,,,,,,
4,5,4,5,1276,fixationIA,targetIA,652.0,targetIA,140.0,targetIA,344.0,distIA,targetIA,652.0,frog,duckFront.png,frogFront.png,landoltCBottom.png,landoltCR.png,HcongWN,b,wnR.wav,0.0,4.0,37.0,b,2.20214,2020_Feb_26_1123,59.966808,duckFrog,True,White Noise,Duck1
5,6,4,5,996,fixationIA,distIA,596.0,targetIA,100.0,targetIA,168.0,targetIA,distIA,596.0,frog,duckFront.png,frogFront.png,landoltCTop.png,landoltCR.png,HinCongWN,h,wnL.wav,0.0,5.0,63.0,h,2.135226,2020_Feb_26_1123,59.966808,duckFrog,True,White Noise,Frog1
6,7,4,6,1196,fixationIA,distIA,220.0,distIA,268.0,targetIA,164.0,targetIA,distIA,220.0,duck,duckFront.png,frogFront.png,landoltCR.png,landoltCBottom.png,LinCong,b,ribbitR.wav,0.0,6.0,10.0,b,1.868055,2020_Feb_26_1123,59.966808,duckFrog,True,Animal,Duck2
7,8,4,4,992,fixationIA,distIA,1024.0,targetIA,168.0,targetIA,476.0,,distIA,1024.0,frog,duckFront.png,frogFront.png,landoltCBottom.png,landoltCR.png,HinCongWN,b,wnL.wav,0.0,7.0,55.0,b,2.218382,2020_Feb_26_1123,59.966808,duckFrog,True,White Noise,Frog1
8,9,4,5,428,fixationIA,fixationIA,584.0,distIA,680.0,targetIA,176.0,targetIA,distIA,680.0,frog,frogFront.png,duckFront.png,landoltCL.png,landoltCBottom.png,HinCong,b,quackR.wav,0.0,8.0,66.0,b,1.918261,2020_Feb_26_1123,59.966808,duckFrog,True,Animal,Frog1
9,10,4,6,1008,fixationIA,distIA,200.0,distIA,300.0,targetIA,172.0,targetIA,distIA,200.0,frog,duckFront.png,frogFront.png,landoltCTop.png,landoltCL.png,HinCong,h,quackL.wav,0.0,9.0,46.0,h,1.651253,2020_Feb_26_1123,59.966808,duckFrog,True,Animal,Frog1
