In [1]:
import pandas as pd 
import numpy as np
import glob
import re

In [2]:
def bitmask_flag_change(Data_Frame, colname):
    """ Creates a list of values that flag the change in validity of the Eyetracking data, 
    diferentiates between the beggining and enf of an invalid event. 
    
Parameters
-------------
     
     :dataframe(DataFrame): your data frame,
     :colname(str): Name of the column that containts the validity mask 
returns 
----------

    List of len DataFrame with values that mark changes on the validity column
     """
    change_flag = ["No_change"] # initialize flag 
    for i in range(1, len(Data_Frame)):
        if Data_Frame[colname][i] == Data_Frame[colname][i-1]: # compare each value with the previous value
          change_flag.append("No_change") 
        elif Data_Frame[colname][i] != Data_Frame[colname][i-1]:
            if Data_Frame[colname][i] == 3:
                  change_flag.append("Invalid_Section_Ends")
            elif Data_Frame[colname][i] == 0:
                change_flag.append("Invalid_Section_Starts")
            else:
                change_flag.append("ERROR")
    return change_flag

In [3]:
def median_correction(x):
    """
    Perform 5-point median correction on input signal x.
    """
    y = np.zeros_like(x)
    length = len(x)
    for i in range(2, length-2):
        y[i] = np.nanmedian(x[i-2:i+3])
    y[0:2] = np.nanmedian(x[0:5])        
    y[length-2:length] = np.nanmedian(x[length-5:length])
    return y

In [4]:
# Since the collider names are too detailed, here we create a dictionary with patterns to classify them into our categories of interest

patterns = {
    r'\d{2}_Sa': 'Passive_Agent',
    r'\d{2}_Cma': 'Active_Agent',
    r'Building_\d+': 'Building'
}

patterns.update(dict.fromkeys([
    'Castle-TaskBuilding_56', 'Crane_59', 'HighSilo-TaskBuilding_49',
    'Windmill-TaskBuilding_10_1', 'Church-TaskBuilding_16'
], 'Global_Landmark'))

patterns.update(dict.fromkeys([
    r'^TaskBuilding_2$', r'^TaskBuilding_3$', r'^TaskBuilding_5$', r'^TaskBuilding_8$', r'^TaskBuilding_9$', 
    r'^TaskBuilding_11$', r'^TaskBuilding_13$', r'^TaskBuilding_14$', r'^TaskBuilding_20$', r'^TaskBuilding_21$', 
    r'^TaskBuilding_23$', r'^TaskBuilding_27$', r'^TaskBuilding_29$', r'^TaskBuilding_32$', r'^TaskBuilding_34$',  
    r'^TaskBuilding_38$', r'^TaskBuilding_41$', r'^TaskBuilding_42$', r'^TaskBuilding_44$', r'^TaskBuilding_45$', 
    r'^TaskBuilding_47$', r'^TaskBuilding_50$', r'^TaskBuilding_51$', r'^TaskBuilding_52$', 
    'BasketballCourt_58', 'Construction_57', 
    r'^Graffity_02$', r'^Graffity_03$', r'^Graffity_05$', r'^Graffity_08$', r'^Graffity_09$', r'^Graffity_11$', 
    r'^Graffity_13$', r'^Graffity_14$', r'^Graffity_20$', r'^Graffity_21$', r'^Graffity_23$', r'^Graffity_27$', 
    r'^Graffity_29$', r'^Graffity_32$', r'^Graffity_34$', r'^Graffity_38$', r'^Graffity_41$', r'^Graffity_42$', 
    r'^Graffity_44$', r'^Graffity_45$', r'^Graffity_47$', r'^Graffity_50$', r'^Graffity_51$', r'^Graffity_52$'
], 'TaskBuilding_Public'))

patterns.update(dict.fromkeys([
    r'^TaskBuilding_1$', r'^TaskBuilding_4$', r'^TaskBuilding_6$', r'^TaskBuilding_7$', r'^TaskBuilding_12$', 
    r'^TaskBuilding_15$', r'^TaskBuilding_17$', r'^TaskBuilding_18$', r'^TaskBuilding_19$', r'^TaskBuilding_22$', 
    r'^TaskBuilding_24$', r'^TaskBuilding_25$', r'^TaskBuilding_26$', r'^TaskBuilding_28$', r'^TaskBuilding_30$',  
    r'^TaskBuilding_31$', r'^TaskBuilding_33$', r'^TaskBuilding_35$', r'^TaskBuilding_36$', r'^TaskBuilding_37$', 
    r'^TaskBuilding_39$', r'^TaskBuilding_40$', r'^TaskBuilding_43$', r'^TaskBuilding_48$', r'^TaskBuilding_54$', 
    r'^TaskBuilding_55$', r'^Graffity_01$', r'^Graffity_04$', r'^Graffity_06$', r'^Graffity_07$', r'^Graffity_12$', 
    r'^Graffity_15$', r'^Graffity_17$', r'^Graffity_18$', r'^Graffity_19$', r'^Graffity_22$', r'^Graffity_24$', 
    r'^Graffity_25$', r'^Graffity_26$', r'^Graffity_28$', r'^Graffity_30$', r'^Graffity_31$', r'^Graffity_33$', 
    r'^Graffity_35$', r'^Graffity_36$', r'^Graffity_37$', r'^Graffity_39$', r'^Graffity_40$', r'^Graffity_43$', 
    r'^Graffity_48$', r'^Graffity_54$', r'^Graffity_55$'
], 'TaskBuilding_Residential'))

default_val = 'Background'

In [5]:
path = "/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions" 

  
# csv files in the path
files = glob.glob(path + "/*.csv")
  
# defining an empty list to store 
# content
data_frame = pd.DataFrame()
content = []
  
# checking all the csv files in the 
# specified path
for filename in files:
    
    # reading content of csv file
    # content.append(filename)
    One_participant = pd.read_csv(filename)
    One_participant.drop(['Unnamed: 0'],axis=1, inplace=True)
    #Apply function that marks beggining and end of invalid sections
    One_participant["Bitmask_flag"] = bitmask_flag_change(One_participant, "combinedGazeValidityBitmask")
    One_participant.reset_index(inplace=True)
    # Get index of begging and end of invalid events
    indexLastValid = One_participant[One_participant["Bitmask_flag"] == "Invalid_Section_Starts"].index
    indexLastInValid = One_participant[One_participant["Bitmask_flag"] == "Invalid_Section_Ends"].index
    # Get 20 rows prior to the beggining of the invalid event 
    indexLower = indexLastValid - 20
    indexUpper = indexLastValid
    # Since it's possible that the invalid event occured less than 20 rows from the beggining of the file
    # we need to correct so that the highest possible index is the firs value in the file
    indexLower_r = [0 if i < 0 else i for i in indexLower]
    #Create a list with the most common element 200ms before the invalid section started
    Elements_to_replace = [One_participant.iloc[down:up,20].mode().iloc[0] for down, up in zip(indexLower_r, indexUpper)]
    #Create new column for interpolated events
    One_participant["Interpolated_collider"] = One_participant["hitObjectColliderName"]
    #Concat the row indexes that need replacement
    ranges = list(zip(indexLastValid, indexLastInValid))
    #Replace the invalid event with the mode of 20 events prior
    for i, (lower, upper) in enumerate(ranges):
        One_participant.iloc[lower:upper,-2]  = Elements_to_replace[i]
    # Here we look for the patterns contained in the dictionary and create the more general/informative variable Collider_Categorical
    One_participant['Collider_Categorical'] = One_participant['Interpolated_collider'].apply(lambda x: next((val for key, val in patterns.items() if re.match(key, x)), default_val))
    # Replace coordinates and eucledian distances with Nans
    One_participant.loc[One_participant['combinedGazeValidityBitmask'] == 0, ['hitPointOnObject_x', 'hitPointOnObject_y', 'hitPointOnObject_z',' eyePositionCombinedWorld.x', 'eyePositionCombinedWorld.y', 'eyePositionCombinedWorld.z', 'Eucledian_distance']] =  np.nan
    indexLastValid = []
    indexLastInValid = []
    indexLower = []
    indexUpper = []
    
    # 5 point median filter
    One_participant["hitPointOnObject_x"] = median_correction(One_participant.hitPointOnObject_x)
    One_participant["hitPointOnObject_y"] = median_correction(One_participant.hitPointOnObject_y)
    One_participant["hitPointOnObject_z"] = median_correction(One_participant.hitPointOnObject_z)
    One_participant["eyePositionCombinedWorld.x"] = median_correction( One_participant["eyePositionCombinedWorld.x"])
    One_participant["eyePositionCombinedWorld.y"] = median_correction( One_participant["eyePositionCombinedWorld.y"])
    One_participant["eyePositionCombinedWorld.z"] = median_correction( One_participant["eyePositionCombinedWorld.z"])
     #Saves an individual file per session per subject with out duplicates
    One_participant.to_csv(f"/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/04_Interpolated/{filename[-10:-4]}.csv", index=True)
    print(filename[-10:-4])
    content.append(One_participant)

# converting content to data frame
data_frame = pd.concat(content)

  y[i] = np.nanmedian(x[i-2:i+3])


1031_1


  y[i] = np.nanmedian(x[i-2:i+3])


1031_2


  y[i] = np.nanmedian(x[i-2:i+3])


1031_3


  y[i] = np.nanmedian(x[i-2:i+3])


1031_4


  y[i] = np.nanmedian(x[i-2:i+3])


1031_5


  y[i] = np.nanmedian(x[i-2:i+3])


1268_1


  y[i] = np.nanmedian(x[i-2:i+3])


1268_2


  y[i] = np.nanmedian(x[i-2:i+3])


1268_3


  y[i] = np.nanmedian(x[i-2:i+3])


1268_4


  y[i] = np.nanmedian(x[i-2:i+3])


1268_5


  y[i] = np.nanmedian(x[i-2:i+3])


1574_1


  y[i] = np.nanmedian(x[i-2:i+3])


1574_2


  y[i] = np.nanmedian(x[i-2:i+3])


1574_3


  y[i] = np.nanmedian(x[i-2:i+3])


1574_4


  y[i] = np.nanmedian(x[i-2:i+3])


1574_5


  y[i] = np.nanmedian(x[i-2:i+3])


1843_1


  y[i] = np.nanmedian(x[i-2:i+3])


1843_2


  y[i] = np.nanmedian(x[i-2:i+3])


1843_3


  y[i] = np.nanmedian(x[i-2:i+3])


1843_4


  y[i] = np.nanmedian(x[i-2:i+3])


1843_5


  y[i] = np.nanmedian(x[i-2:i+3])


2069_1


  y[i] = np.nanmedian(x[i-2:i+3])


2069_2


  y[i] = np.nanmedian(x[i-2:i+3])


2069_3


  y[i] = np.nanmedian(x[i-2:i+3])


2069_4


  y[i] = np.nanmedian(x[i-2:i+3])


2069_5


  y[i] = np.nanmedian(x[i-2:i+3])


3193_1


  y[i] = np.nanmedian(x[i-2:i+3])


3193_2


  y[i] = np.nanmedian(x[i-2:i+3])


3193_3


  y[i] = np.nanmedian(x[i-2:i+3])


3193_4


  y[i] = np.nanmedian(x[i-2:i+3])


3193_5


  y[i] = np.nanmedian(x[i-2:i+3])


3540_1


  y[i] = np.nanmedian(x[i-2:i+3])


3540_2


  y[i] = np.nanmedian(x[i-2:i+3])


3540_3


  y[i] = np.nanmedian(x[i-2:i+3])


3540_4


  y[i] = np.nanmedian(x[i-2:i+3])


3540_5


  y[i] = np.nanmedian(x[i-2:i+3])


4580_1


  y[i] = np.nanmedian(x[i-2:i+3])


4580_2


  y[i] = np.nanmedian(x[i-2:i+3])


4580_3


  y[i] = np.nanmedian(x[i-2:i+3])


4580_4


  y[i] = np.nanmedian(x[i-2:i+3])


4580_5


  y[i] = np.nanmedian(x[i-2:i+3])


4598_1


  y[i] = np.nanmedian(x[i-2:i+3])


4598_2


  y[i] = np.nanmedian(x[i-2:i+3])


4598_3


  y[i] = np.nanmedian(x[i-2:i+3])


4598_4


  y[i] = np.nanmedian(x[i-2:i+3])


4598_5


  y[i] = np.nanmedian(x[i-2:i+3])


4847_1


  y[i] = np.nanmedian(x[i-2:i+3])
  y[length-2:length] = np.nanmedian(x[length-5:length])


4847_2


  y[i] = np.nanmedian(x[i-2:i+3])
  y[0:2] = np.nanmedian(x[0:5])


4847_3


  y[i] = np.nanmedian(x[i-2:i+3])
  y[length-2:length] = np.nanmedian(x[length-5:length])


4847_4


  y[i] = np.nanmedian(x[i-2:i+3])


4847_5


  y[i] = np.nanmedian(x[i-2:i+3])


4875_1


  y[i] = np.nanmedian(x[i-2:i+3])


4875_2


  y[i] = np.nanmedian(x[i-2:i+3])


4875_3


  y[i] = np.nanmedian(x[i-2:i+3])


4875_4


  y[i] = np.nanmedian(x[i-2:i+3])


4875_5


  y[i] = np.nanmedian(x[i-2:i+3])


5161_1


  y[i] = np.nanmedian(x[i-2:i+3])


5161_2


  y[i] = np.nanmedian(x[i-2:i+3])


5161_3


  y[i] = np.nanmedian(x[i-2:i+3])


5161_4


  y[i] = np.nanmedian(x[i-2:i+3])


5161_5


  y[i] = np.nanmedian(x[i-2:i+3])


5189_1


  y[i] = np.nanmedian(x[i-2:i+3])


5189_2


  y[i] = np.nanmedian(x[i-2:i+3])


5189_3


  y[i] = np.nanmedian(x[i-2:i+3])


5189_4


  y[i] = np.nanmedian(x[i-2:i+3])


5189_5


  y[i] = np.nanmedian(x[i-2:i+3])


5743_1


  y[i] = np.nanmedian(x[i-2:i+3])
  y[0:2] = np.nanmedian(x[0:5])


5743_2


  y[i] = np.nanmedian(x[i-2:i+3])


5743_3


  y[i] = np.nanmedian(x[i-2:i+3])


5743_4


  y[i] = np.nanmedian(x[i-2:i+3])


5743_5


  y[i] = np.nanmedian(x[i-2:i+3])


5766_1


  y[i] = np.nanmedian(x[i-2:i+3])


5766_2


  y[i] = np.nanmedian(x[i-2:i+3])


5766_3


  y[i] = np.nanmedian(x[i-2:i+3])
  y[length-2:length] = np.nanmedian(x[length-5:length])


5766_4


  y[i] = np.nanmedian(x[i-2:i+3])


5766_5


  y[i] = np.nanmedian(x[i-2:i+3])


5851_1


  y[i] = np.nanmedian(x[i-2:i+3])


5851_2


  y[i] = np.nanmedian(x[i-2:i+3])


5851_3


  y[i] = np.nanmedian(x[i-2:i+3])


5851_4


  y[i] = np.nanmedian(x[i-2:i+3])


5851_5


  y[i] = np.nanmedian(x[i-2:i+3])


5972_1


  y[i] = np.nanmedian(x[i-2:i+3])
  y[length-2:length] = np.nanmedian(x[length-5:length])


5972_2


  y[i] = np.nanmedian(x[i-2:i+3])


5972_3


  y[i] = np.nanmedian(x[i-2:i+3])
  y[length-2:length] = np.nanmedian(x[length-5:length])


5972_4


  y[i] = np.nanmedian(x[i-2:i+3])


5972_5


  y[i] = np.nanmedian(x[i-2:i+3])


6406_1


  y[i] = np.nanmedian(x[i-2:i+3])


6406_2


  y[i] = np.nanmedian(x[i-2:i+3])


6406_3


  y[i] = np.nanmedian(x[i-2:i+3])


6406_4


  y[i] = np.nanmedian(x[i-2:i+3])


6406_5


  y[i] = np.nanmedian(x[i-2:i+3])


7081_1


  y[i] = np.nanmedian(x[i-2:i+3])


7081_2


  y[i] = np.nanmedian(x[i-2:i+3])
  y[0:2] = np.nanmedian(x[0:5])


7081_3


  y[i] = np.nanmedian(x[i-2:i+3])


7081_4


  y[i] = np.nanmedian(x[i-2:i+3])


7081_5


  y[i] = np.nanmedian(x[i-2:i+3])


7393_1


  y[i] = np.nanmedian(x[i-2:i+3])


7393_2


  y[i] = np.nanmedian(x[i-2:i+3])


7393_3


  y[i] = np.nanmedian(x[i-2:i+3])


7393_4


  y[i] = np.nanmedian(x[i-2:i+3])


7393_5


  y[i] = np.nanmedian(x[i-2:i+3])
  y[length-2:length] = np.nanmedian(x[length-5:length])


7823_1


  y[i] = np.nanmedian(x[i-2:i+3])


7823_2


  y[i] = np.nanmedian(x[i-2:i+3])


7823_3


  y[i] = np.nanmedian(x[i-2:i+3])


7823_4


  y[i] = np.nanmedian(x[i-2:i+3])


7823_5


  y[i] = np.nanmedian(x[i-2:i+3])


7935_1


  y[i] = np.nanmedian(x[i-2:i+3])
  y[length-2:length] = np.nanmedian(x[length-5:length])


7935_2


  y[i] = np.nanmedian(x[i-2:i+3])


7935_3


  y[i] = np.nanmedian(x[i-2:i+3])


7935_4


  y[i] = np.nanmedian(x[i-2:i+3])


7935_5


  y[i] = np.nanmedian(x[i-2:i+3])


8629_1


  y[i] = np.nanmedian(x[i-2:i+3])


8629_2


  y[i] = np.nanmedian(x[i-2:i+3])


8629_3


  y[i] = np.nanmedian(x[i-2:i+3])


8629_4


  y[i] = np.nanmedian(x[i-2:i+3])
  y[length-2:length] = np.nanmedian(x[length-5:length])


8629_5


  y[i] = np.nanmedian(x[i-2:i+3])


9297_1


  y[i] = np.nanmedian(x[i-2:i+3])


9297_2


  y[i] = np.nanmedian(x[i-2:i+3])


9297_3


  y[i] = np.nanmedian(x[i-2:i+3])


9297_4


  y[i] = np.nanmedian(x[i-2:i+3])


9297_5


  y[i] = np.nanmedian(x[i-2:i+3])


9627_1


  y[i] = np.nanmedian(x[i-2:i+3])


9627_2


  y[i] = np.nanmedian(x[i-2:i+3])


9627_3


  y[i] = np.nanmedian(x[i-2:i+3])


9627_4


  y[i] = np.nanmedian(x[i-2:i+3])


9627_5


  y[i] = np.nanmedian(x[i-2:i+3])


5191_1


  y[i] = np.nanmedian(x[i-2:i+3])


5191_2


  y[i] = np.nanmedian(x[i-2:i+3])


5191_3


  y[i] = np.nanmedian(x[i-2:i+3])


5191_4


  y[i] = np.nanmedian(x[i-2:i+3])


5191_5


  y[i] = np.nanmedian(x[i-2:i+3])


6254_1


  y[i] = np.nanmedian(x[i-2:i+3])


6254_2


  y[i] = np.nanmedian(x[i-2:i+3])


6254_4


  y[i] = np.nanmedian(x[i-2:i+3])


6254_5


  y[i] = np.nanmedian(x[i-2:i+3])


9535_1


  y[i] = np.nanmedian(x[i-2:i+3])


9535_2


  y[i] = np.nanmedian(x[i-2:i+3])


9535_3


  y[i] = np.nanmedian(x[i-2:i+3])


9535_4


  y[i] = np.nanmedian(x[i-2:i+3])


9535_5


  y[i] = np.nanmedian(x[i-2:i+3])


1142_2


  y[i] = np.nanmedian(x[i-2:i+3])


1142_3


  y[i] = np.nanmedian(x[i-2:i+3])


1142_4


  y[i] = np.nanmedian(x[i-2:i+3])


1142_5


  y[i] = np.nanmedian(x[i-2:i+3])


1234_1


  y[i] = np.nanmedian(x[i-2:i+3])


1234_2


  y[i] = np.nanmedian(x[i-2:i+3])


1234_3


  y[i] = np.nanmedian(x[i-2:i+3])


1234_4


  y[i] = np.nanmedian(x[i-2:i+3])


1234_5


  y[i] = np.nanmedian(x[i-2:i+3])


6266_1


  y[i] = np.nanmedian(x[i-2:i+3])


6266_2


  y[i] = np.nanmedian(x[i-2:i+3])


6266_3


  y[i] = np.nanmedian(x[i-2:i+3])


6266_4


  y[i] = np.nanmedian(x[i-2:i+3])


6266_5


  y[i] = np.nanmedian(x[i-2:i+3])


1142_1


In [6]:
data_frame.tail(50)

Unnamed: 0,index,SubjectID,Session,SessionSubsection,timeStampDataPointEnd,eyeOpennessLeft,eyeOpennessRight,pupilDiameterMillimetersLeft,pupilDiameterMillimetersRight,leftGazeValidityBitmask,...,hitPointOnObject_y,hitPointOnObject_z,Eucledian_distance,Collider_Categorical,Face_Hits,Time_Shift,Continuous_Time,Bitmask_flag,Interpolated_collider,eyePositionCombinedWorld.x
116045,116045,1142,1,3,1687343000.0,1.0,1.0,4.494644,4.18515,31,...,0.733514,100.771011,4.633499,Background,Not_Agent,0.011407,50.796,No_change,Dumpster_v2_2,
116046,116046,1142,1,3,1687343000.0,0.99835,0.937989,4.502533,4.192703,31,...,0.735419,100.766327,4.634478,Background,Not_Agent,0.021329,50.797,No_change,Dumpster_v2_2,
116047,116047,1142,1,3,1687343000.0,1.0,1.0,4.502594,4.192657,31,...,0.735715,100.766235,4.634451,Background,Not_Agent,0.011904,50.797,No_change,Dumpster_v2_2,
116048,116048,1142,1,3,1687343000.0,1.0,1.0,4.514389,4.210312,31,...,0.735817,100.765305,4.63465,Background,Not_Agent,0.022321,50.797,No_change,Dumpster_v2_2,
116049,116049,1142,1,3,1687343000.0,1.0,1.0,4.51297,4.221741,31,...,0.737244,100.764633,4.634825,Background,Not_Agent,0.021823,50.798,No_change,Dumpster_v2_2,
116050,116050,1142,1,3,1687343000.0,1.0,1.0,4.51329,4.221954,31,...,0.738034,100.764633,4.634898,Background,Not_Agent,0.011408,50.798,No_change,Dumpster_v2_2,
116051,116051,1142,1,3,1687343000.0,1.0,1.0,4.51329,4.221954,31,...,0.738869,100.764633,4.634883,Background,Not_Agent,0.010911,50.798,No_change,Dumpster_v2_2,
116052,116052,1142,1,3,1687343000.0,1.0,1.0,4.508698,4.217957,31,...,0.739336,100.767494,4.634378,Background,Not_Agent,0.010417,50.798,No_change,Dumpster_v2_2,
116053,116053,1142,1,3,1687343000.0,1.0,1.0,4.513,4.217758,31,...,0.740494,100.768806,4.634201,Background,Not_Agent,0.012399,50.798,No_change,Dumpster_v2_2,
116054,116054,1142,1,3,1687343000.0,1.0,1.0,4.513901,4.205582,31,...,0.740494,100.769936,4.633972,Background,Not_Agent,0.010912,50.798,No_change,Dumpster_v2_2,
