In [1]:
import pandas as pd 
import numpy as np
import glob
import re

In [2]:
def bitmask_flag_change(Data_Frame, colname):
    """ Creates a list of values that flag the change in validity of the Eyetracking data, 
    diferentiates between the beggining and enf of an invalid event. 
    
Parameters
-------------
     
     :dataframe(DataFrame): your data frame,
     :colname(str): Name of the column that containts the validity mask 
returns 
----------

    List of len DataFrame with values that mark changes on the validity column
     """
    change_flag = ["No_change"] # initialize flag 
    for i in range(1, len(Data_Frame)):
        if Data_Frame[colname][i] == Data_Frame[colname][i-1]: # compare each value with the previous value
          change_flag.append("No_change") 
        elif Data_Frame[colname][i] != Data_Frame[colname][i-1]:
            if Data_Frame[colname][i] == 3:
                  change_flag.append("Invalid_Section_Ends")
            elif Data_Frame[colname][i] == 0:
                change_flag.append("Invalid_Section_Starts")
            else:
                change_flag.append("ERROR")
    return change_flag

In [3]:
def median_correction(x):
    """
    Perform 5-point median correction on input signal x.
    """
    y = np.zeros_like(x)
    length = len(x)
    for i in range(2, length-2):
        y[i] = np.nanmedian(x[i-2:i+3])
    y[0:2] = np.nanmedian(x[0:5])        
    y[length-2:length] = np.nanmedian(x[length-5:length])
    return y

In [4]:
# Since the collider names are too detailed, here we create a dictionary with patterns to classify them into our categories of interest

patterns = {'\d{2}_Sa':'Passive_Agent', '\d{2}_Cma':'Active_Agent', 'Building_\d+': 'Building'}
patterns.update(dict.fromkeys(['Castle-TaskBuilding_56', 'Crane_59','HighSilo-TaskBuilding_49', 'Windmill-TaskBuilding_10_1', 'Church-TaskBuilding_16'], 'Global_Landmark'))
patterns.update(dict.fromkeys(['^TaskBuilding_2$','^TaskBuilding_3$', '^TaskBuilding_5$', '^TaskBuilding_8$', '^TaskBuilding_9$', '^TaskBuilding_11$', '^TaskBuilding_13$', '^TaskBuilding_14$', '^TaskBuilding_20$', 
                               '^TaskBuilding_21$', '^TaskBuilding_23$','^TaskBuilding_27$', '^TaskBuilding_29$', '^TaskBuilding_32$', '^TaskBuilding_34$',  '^TaskBuilding_38$', '^TaskBuilding_41$', '^TaskBuilding_42$', 
                               '^TaskBuilding_44$', '^TaskBuilding_45$', '^TaskBuilding_47$', '^TaskBuilding_50$', '^TaskBuilding_51$', '^TaskBuilding_52$', 'BasketballCourt_58', 'Construction_57', 
                               '^Graffity_02$', '^Graffity_03$', '^Graffity_05$', '^Graffity_08$', '^Graffity_09$', '^Graffity_11$', '^Graffity_13$', '^Graffity_14$', '^Graffity_20$', 
                               '^Graffity_21$', '^Graffity_23$', '^Graffity_27$', '^Graffity_29$', '^Graffity_32$', '^Graffity_34$', '^Graffity_38$', '^Graffity_41$', '^Graffity_42$', 
                               '^Graffity_44$', '^Graffity_45$', '^Graffity_47$',  '^Graffity_50$', '^Graffity_51$', '^Graffity_52$'], 'TaskBuilding_Public'))

patterns.update(dict.fromkeys(['^TaskBuilding_1$','^TaskBuilding_4$', '^TaskBuilding_6$', '^TaskBuilding_7$', '^TaskBuilding_12$', '^TaskBuilding_15$', '^TaskBuilding_17$', '^TaskBuilding_18$', '^TaskBuilding_19$', 
                               '^TaskBuilding_22$', '^TaskBuilding_24$','^TaskBuilding_25$', '^TaskBuilding_26$', '^TaskBuilding_28$', '^TaskBuilding_30$',  '^TaskBuilding_31$', '^TaskBuilding_33$', '^TaskBuilding_35$', 
                               '^TaskBuilding_36$', '^TaskBuilding_37$', '^TaskBuilding_39$', '^TaskBuilding_40$', '^TaskBuilding_43$', '^TaskBuilding_48$', '^TaskBuilding_54$','^TaskBuilding_55$',
                               '^Graffity_01$','^Graffity_04$', '^Graffity_06$', '^Graffity_07$', '^Graffity_12$', '^Graffity_15$', '^Graffity_17$', '^Graffity_18$', '^Graffity_19$', '^Graffity_22$', 
                               '^Graffity_24$','^Graffity_25$', '^Graffity_26$', '^Graffity_28$', '^Graffity_30$',  '^Graffity_31$', '^Graffity_33$', '^Graffity_35$', '^Graffity_36$', '^Graffity_37$', '^Graffity_39$', 
                               '^Graffity_40$', '^Graffity_43$', '^Graffity_48$', '^Graffity_54$', '^Graffity_55$' ], 'TaskBuilding_Residential'))
default_val = 'Background'

In [5]:
path = "/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/" 

  
# csv files in the path
files = glob.glob(path + "/*.csv")
  
# defining an empty list to store 
# content
data_frame = pd.DataFrame()
content = []
  
# checking all the csv files in the 
# specified path
for filename in files:
    
    # reading content of csv file
    # content.append(filename)
    One_participant = pd.read_csv(filename)
    One_participant.drop(['Unnamed: 0'],axis=1, inplace=True)
    #Apply function that marks beggining and end of invalid sections
    One_participant["Bitmask_flag"] = bitmask_flag_change(One_participant, "combinedGazeValidityBitmask")
    One_participant.reset_index(inplace=True)
    # Get index of begging and end of invalid events
    indexLastValid = One_participant[One_participant["Bitmask_flag"] == "Invalid_Section_Starts"].index
    indexLastInValid = One_participant[One_participant["Bitmask_flag"] == "Invalid_Section_Ends"].index
    # Get 20 rows prior to the beggining of the invalid event 
    indexLower = indexLastValid - 20
    indexUpper = indexLastValid
    # Since it's possible that the invalid event occured less than 20 rows from the beggining of the file
    # we need to correct so that the highest possible index is the firs value in the file
    indexLower_r = [0 if i < 0 else i for i in indexLower]
    #Create a list with the most common element 200ms before the invalid section started
    Elements_to_replace = [One_participant.iloc[down:up,20].mode().iloc[0] for down, up in zip(indexLower_r, indexUpper)]
    #Create new column for interpolated events
    One_participant["Interpolated_collider"] = One_participant["hitObjectColliderName"]
    #Concat the row indexes that need replacement
    ranges = list(zip(indexLastValid, indexLastInValid))
    #Replace the invalid event with the mode of 20 events prior
    for i, (lower, upper) in enumerate(ranges):
        One_participant.iloc[lower:upper,-2]  = Elements_to_replace[i]
    # Here we look for the patterns contained in the dictionary and create the more general/informative variable Collider_Categorical
    One_participant['Collider_Categorical'] = One_participant['Interpolated_collider'].apply(lambda x: next((val for key, val in patterns.items() if re.match(key, x)), default_val))
    # Replace coordinates and eucledian distances with Nans
    One_participant.loc[One_participant['combinedGazeValidityBitmask'] == 0, ['hitPointOnObject_x', 'hitPointOnObject_y', 'hitPointOnObject_z',' eyePositionCombinedWorld.x', 'eyePositionCombinedWorld.y', 'eyePositionCombinedWorld.z', 'Eucledian_distance']] =  np.nan
    indexLastValid = []
    indexLastInValid = []
    indexLower = []
    indexUpper = []
    
    # 5 point median filter
    One_participant["hitPointOnObject_x"] = median_correction(One_participant.hitPointOnObject_x)
    One_participant["hitPointOnObject_y"] = median_correction(One_participant.hitPointOnObject_y)
    One_participant["hitPointOnObject_z"] = median_correction(One_participant.hitPointOnObject_z)
    One_participant["eyePositionCombinedWorld.x"] = median_correction( One_participant["eyePositionCombinedWorld.x"])
    One_participant["eyePositionCombinedWorld.y"] = median_correction( One_participant["eyePositionCombinedWorld.y"])
    One_participant["eyePositionCombinedWorld.z"] = median_correction( One_participant["eyePositionCombinedWorld.z"])
     #Saves an individual file per session per subject with out duplicates
    One_participant.to_csv(f"/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/04_Interpolated/{filename[-10:-4]}.csv", index=True)
    print(filename[-10:-4])
    content.append(One_participant)

# converting content to data frame
data_frame = pd.concat(content)

  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1031_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1031_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1031_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1031_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1031_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1268_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1268_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1268_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1268_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1268_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1574_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1574_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1574_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1574_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1574_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1843_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1843_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1843_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1843_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


1843_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


2069_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


2069_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


2069_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


2069_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


2069_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


3193_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


3193_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


3193_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


3193_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


3193_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


3540_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


3540_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


3540_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


3540_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


3540_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4580_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4580_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4580_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4580_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4580_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4598_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4598_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4598_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4598_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4598_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4847_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4847_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4847_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4847_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4847_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4875_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4875_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4875_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4875_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


4875_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5161_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5161_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5161_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5161_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5161_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5189_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5189_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5189_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5189_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5189_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5743_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5743_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5743_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5743_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5743_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5766_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5766_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5766_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5766_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5766_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5851_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5851_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5851_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5851_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5851_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5972_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5972_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5972_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5972_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5972_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


6406_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


6406_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


6406_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


6406_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


6406_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7081_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7081_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7081_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7081_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7081_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7393_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7393_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7393_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7393_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7393_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7823_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7823_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7823_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7823_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7823_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7935_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7935_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7935_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7935_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


7935_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


8629_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


8629_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


8629_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


8629_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


8629_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


9297_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


9297_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


9297_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


9297_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


9297_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


9627_1


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


9627_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


9627_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


9627_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


9627_5


In [6]:
data_frame.tail(50)

Unnamed: 0,index,SubjectID,Session,SessionSubsection,timeStampDataPointEnd,combinedGazeValidityBitmask,eyePositionCombinedWorld.x,eyePositionCombinedWorld.y,eyePositionCombinedWorld.z,eyeDirectionCombinedWorld.y,...,hitPointOnObject_y,hitPointOnObject_z,Eucledian_distance,Collider_Categorical,Face_Hits,Time_Shift,Continuous_Time,Bitmask_flag,Interpolated_collider,eyePositionCombinedWorld.x.1
114435,114435,9627,5,3,1668586000.0,3,156.188293,-0.46633,194.69696,0.040248,...,2.447964,184.033463,21.007784,Building,Not_Agent,0.015872,40.003,No_change,Building_94,
114436,114436,9627,5,3,1668586000.0,0,156.07785,-0.465736,194.690529,-1.118201,...,1.41045,183.93515,,Background,Not_Agent,0.01488,40.004,121.812462,road_base_network.004,
114437,114437,9627,5,3,1668586000.0,0,156.000809,-0.465142,194.684097,-1.119434,...,0.372935,183.836838,,Background,Not_Agent,0.01736,40.004,121.812462,road_base_network.004,
114438,114438,9627,5,3,1668586000.0,0,155.917892,,,-1.121739,...,,,,Background,Not_Agent,0.015872,40.004,121.812462,road_base_network.004,
114439,114439,9627,5,3,1668586000.0,0,155.840256,,,-1.122891,...,,,,Background,Not_Agent,0.015377,40.004,121.812462,road_base_network.004,
114440,114440,9627,5,3,1668586000.0,0,155.767273,-0.458764,194.595917,-1.124001,...,3.469473,170.495651,,Background,Not_Agent,0.015376,40.005,121.812462,road_base_network.004,
114441,114441,9627,5,3,1668586000.0,0,155.691559,-0.458491,194.5895,-1.124369,...,3.206465,156.405624,,Background,Not_Agent,0.015872,40.005,121.812462,road_base_network.004,
114442,114442,9627,5,3,1668586000.0,3,155.641266,-0.458764,194.583084,0.059437,...,3.345662,157.15863,66.231726,Background,Not_Agent,0.016864,40.005,Invalid_Section_Ends,CollisionObject1,
114443,114443,9627,5,3,1668586000.0,3,155.559509,-0.459119,194.576935,0.023262,...,3.14456,163.827141,146.308296,Background,Not_Agent,0.01488,40.005,No_change,Cypress_v1_2 (5),
114444,114444,9627,5,3,1668586000.0,3,155.482727,-0.459474,194.570786,0.0361,...,3.345662,157.15863,105.507892,Building,Not_Agent,0.016037,40.006,No_change,Building_103,
