In [1]:
import pandas as pd 
import numpy as np
import glob
import re

In [2]:
def bitmask_flag_change(Data_Frame, colname):
    """ Creates a list of values that flag the change in validity of the Eyetracking data, 
    diferentiates between the beggining and enf of an invalid event. 
    
Parameters
-------------
     
     :dataframe(DataFrame): your data frame,
     :colname(str): Name of the column that containts the validity mask 
returns 
----------

    List of len DataFrame with values that mark changes on the validity column
     """
    change_flag = ["No_change"] # initialize flag 
    for i in range(1, len(Data_Frame)):
        if Data_Frame[colname][i] == Data_Frame[colname][i-1]: # compare each value with the previous value
          change_flag.append("No_change") 
        elif Data_Frame[colname][i] != Data_Frame[colname][i-1]:
            if Data_Frame[colname][i] == 3:
                  change_flag.append("Invalid_Section_Ends")
            elif Data_Frame[colname][i] == 0:
                change_flag.append("Invalid_Section_Starts")
            else:
                change_flag.append("ERROR")
    return change_flag

In [3]:
def median_correction(x):
    """
    Perform 5-point median correction on input signal x.
    """
    y = np.zeros_like(x)
    length = len(x)
    for i in range(2, length-2):
        y[i] = np.nanmedian(x[i-2:i+3])
    y[0:2] = np.nanmedian(x[0:5])        
    y[length-2:length] = np.nanmedian(x[length-5:length])
    return y

In [4]:
# Since the collider names are too detailed, here we create a dictionary with patterns to classify them into our categories of interest

patterns = {'\d{2}_Sa':'Passive_Agent', '\d{2}_Cma':'Active_Agent', 'Building_\d+': 'Building'}
patterns.update(dict.fromkeys(['Castle-TaskBuilding_56', 'Crane_59','HighSilo-TaskBuilding_49', 'Windmill-TaskBuilding_10_1', 'Church-TaskBuilding_16'], 'Global_Landmark'))
patterns.update(dict.fromkeys(['^TaskBuilding_2$','^TaskBuilding_3$', '^TaskBuilding_5$', '^TaskBuilding_8$', '^TaskBuilding_9$', '^TaskBuilding_11$', '^TaskBuilding_13$', '^TaskBuilding_14$', '^TaskBuilding_20$', 
                               '^TaskBuilding_21$', '^TaskBuilding_23$','^TaskBuilding_27$', '^TaskBuilding_29$', '^TaskBuilding_32$', '^TaskBuilding_34$',  '^TaskBuilding_38$', '^TaskBuilding_41$', '^TaskBuilding_42$', 
                               '^TaskBuilding_44$', '^TaskBuilding_45$', '^TaskBuilding_47$', '^TaskBuilding_50$', '^TaskBuilding_51$', '^TaskBuilding_52$', 'BasketballCourt_58', 'Construction_57', 
                               '^Graffity_02$', '^Graffity_03$', '^Graffity_05$', '^Graffity_08$', '^Graffity_09$', '^Graffity_11$', '^Graffity_13$', '^Graffity_14$', '^Graffity_20$', 
                               '^Graffity_21$', '^Graffity_23$', '^Graffity_27$', '^Graffity_29$', '^Graffity_32$', '^Graffity_34$', '^Graffity_38$', '^Graffity_41$', '^Graffity_42$', 
                               '^Graffity_44$', '^Graffity_45$', '^Graffity_47$',  '^Graffity_50$', '^Graffity_51$', '^Graffity_52$'], 'TaskBuilding_Public'))

patterns.update(dict.fromkeys(['^TaskBuilding_1$','^TaskBuilding_4$', '^TaskBuilding_6$', '^TaskBuilding_7$', '^TaskBuilding_12$', '^TaskBuilding_15$', '^TaskBuilding_17$', '^TaskBuilding_18$', '^TaskBuilding_19$', 
                               '^TaskBuilding_22$', '^TaskBuilding_24$','^TaskBuilding_25$', '^TaskBuilding_26$', '^TaskBuilding_28$', '^TaskBuilding_30$',  '^TaskBuilding_31$', '^TaskBuilding_33$', '^TaskBuilding_35$', 
                               '^TaskBuilding_36$', '^TaskBuilding_37$', '^TaskBuilding_39$', '^TaskBuilding_40$', '^TaskBuilding_43$', '^TaskBuilding_48$', '^TaskBuilding_54$','^TaskBuilding_55$',
                               '^Graffity_01$','^Graffity_04$', '^Graffity_06$', '^Graffity_07$', '^Graffity_12$', '^Graffity_15$', '^Graffity_17$', '^Graffity_18$', '^Graffity_19$', '^Graffity_22$', 
                               '^Graffity_24$','^Graffity_25$', '^Graffity_26$', '^Graffity_28$', '^Graffity_30$',  '^Graffity_31$', '^Graffity_33$', '^Graffity_35$', '^Graffity_36$', '^Graffity_37$', '^Graffity_39$', 
                               '^Graffity_40$', '^Graffity_43$', '^Graffity_48$', '^Graffity_54$', '^Graffity_55$' ], 'TaskBuilding_Residential'))
default_val = 'Background'

In [5]:
path = "/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/New" 

  
# csv files in the path
files = glob.glob(path + "/*.csv")
  
# defining an empty list to store 
# content
data_frame = pd.DataFrame()
content = []
  
# checking all the csv files in the 
# specified path
for filename in files:
    
    # reading content of csv file
    # content.append(filename)
    One_participant = pd.read_csv(filename)
    One_participant.drop(['Unnamed: 0'],axis=1, inplace=True)
    #Apply function that marks beggining and end of invalid sections
    One_participant["Bitmask_flag"] = bitmask_flag_change(One_participant, "combinedGazeValidityBitmask")
    One_participant.reset_index(inplace=True)
    # Get index of begging and end of invalid events
    indexLastValid = One_participant[One_participant["Bitmask_flag"] == "Invalid_Section_Starts"].index
    indexLastInValid = One_participant[One_participant["Bitmask_flag"] == "Invalid_Section_Ends"].index
    # Get 20 rows prior to the beggining of the invalid event 
    indexLower = indexLastValid - 20
    indexUpper = indexLastValid
    # Since it's possible that the invalid event occured less than 20 rows from the beggining of the file
    # we need to correct so that the highest possible index is the firs value in the file
    indexLower_r = [0 if i < 0 else i for i in indexLower]
    #Create a list with the most common element 200ms before the invalid section started
    Elements_to_replace = [One_participant.iloc[down:up,20].mode().iloc[0] for down, up in zip(indexLower_r, indexUpper)]
    #Create new column for interpolated events
    One_participant["Interpolated_collider"] = One_participant["hitObjectColliderName"]
    #Concat the row indexes that need replacement
    ranges = list(zip(indexLastValid, indexLastInValid))
    #Replace the invalid event with the mode of 20 events prior
    for i, (lower, upper) in enumerate(ranges):
        One_participant.iloc[lower:upper,-2]  = Elements_to_replace[i]
    # Here we look for the patterns contained in the dictionary and create the more general/informative variable Collider_Categorical
    One_participant['Collider_Categorical'] = One_participant['Interpolated_collider'].apply(lambda x: next((val for key, val in patterns.items() if re.match(key, x)), default_val))
    # Replace coordinates and eucledian distances with Nans
    One_participant.loc[One_participant['combinedGazeValidityBitmask'] == 0, ['hitPointOnObject_x', 'hitPointOnObject_y', 'hitPointOnObject_z',' eyePositionCombinedWorld.x', 'eyePositionCombinedWorld.y', 'eyePositionCombinedWorld.z', 'Eucledian_distance']] =  np.nan
    indexLastValid = []
    indexLastInValid = []
    indexLower = []
    indexUpper = []
    
    # 5 point median filter
    One_participant["hitPointOnObject_x"] = median_correction(One_participant.hitPointOnObject_x)
    One_participant["hitPointOnObject_y"] = median_correction(One_participant.hitPointOnObject_y)
    One_participant["hitPointOnObject_z"] = median_correction(One_participant.hitPointOnObject_z)
    One_participant["eyePositionCombinedWorld.x"] = median_correction( One_participant["eyePositionCombinedWorld.x"])
    One_participant["eyePositionCombinedWorld.y"] = median_correction( One_participant["eyePositionCombinedWorld.y"])
    One_participant["eyePositionCombinedWorld.z"] = median_correction( One_participant["eyePositionCombinedWorld.z"])
     #Saves an individual file per session per subject with out duplicates
    One_participant.to_csv(f"/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/04_Interpolated/{filename[-10:-4]}.csv", index=True)
    print(filename[-10:-4])
    content.append(One_participant)

# converting content to data frame
data_frame = pd.concat(content)

  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5191_2


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5191_3


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5191_4


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5191_5


  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


5191_1


In [6]:
data_frame.tail(50)

Unnamed: 0,index,SubjectID,Session,SessionSubsection,timeStampDataPointEnd,combinedGazeValidityBitmask,eyePositionCombinedWorld.x,eyePositionCombinedWorld.y,eyePositionCombinedWorld.z,eyeDirectionCombinedWorld.y,...,hitPointOnObject_y,hitPointOnObject_z,Eucledian_distance,Collider_Categorical,Face_Hits,Time_Shift,Continuous_Time,Bitmask_flag,Interpolated_collider,eyePositionCombinedWorld.x.1
113173,113173,5191,1,3,1691568000.0,3,-215.569519,3.992218,173.075516,-0.042176,...,2.367489,163.14357,27.583096,Background,Not_Agent,0.011407,34.343,No_change,Garage_185,
113174,113174,5191,1,3,1691568000.0,3,-215.594467,3.992218,173.066162,-0.043066,...,1.988649,163.14357,27.436397,Background,Not_Agent,0.021824,34.344,No_change,Garage_185,
113175,113175,5191,1,3,1691568000.0,3,-215.594757,3.992242,173.066162,-0.04284,...,1.625756,158.844772,55.331866,Background,Not_Agent,0.010913,34.344,No_change,terrain_R.001,
113176,113176,5191,1,3,1691568000.0,3,-215.595139,3.992242,173.066162,-0.040593,...,1.625756,158.844772,58.962274,Background,Not_Agent,0.011409,34.344,No_change,terrain_R.001,
113177,113177,5191,1,3,1691568000.0,3,-215.596069,3.992262,173.067383,-0.048921,...,1.615508,158.844772,48.690221,Background,Not_Agent,0.021823,34.344,No_change,terrain_R.001,
113178,113178,5191,1,3,1691568000.0,3,-215.596603,3.992293,173.068268,-0.06201,...,1.614481,161.283539,37.868478,Background,Not_Agent,0.011904,34.344,No_change,terrain_R.001,
113179,113179,5191,1,3,1691568000.0,3,-215.597687,3.992441,173.069595,-0.059434,...,1.614481,161.283539,40.067477,Background,Not_Agent,0.021328,34.345,No_change,terrain_R.001,
113180,113180,5191,1,3,1691568000.0,3,-215.598206,3.992519,173.070587,-0.057037,...,1.608413,161.283539,42.252445,Background,Not_Agent,0.011409,34.345,No_change,terrain_R.001,
113181,113181,5191,1,3,1691568000.0,3,-215.598694,3.992636,173.071838,-0.049622,...,1.601179,160.622803,48.131002,Background,Not_Agent,0.011408,34.345,No_change,terrain_R.001,
113182,113182,5191,1,3,1691568000.0,3,-215.599777,3.99292,173.074066,-0.043126,...,1.596539,160.622803,55.656081,Background,Not_Agent,0.021824,34.346,No_change,terrain_R.001,
