In [1]:
import pandas as pd 
import numpy as np
import glob
import re

In [2]:
def bitmask_flag_change(Data_Frame, colname):
    """ Creates a list of values that flag the change in validity of the Eyetracking data, 
    diferentiates between the beggining and enf of an invalid event. 
    
Parameters
-------------
     
     :dataframe(DataFrame): your data frame,
     :colname(str): Name of the column that containts the validity mask 
returns 
----------

    List of len DataFrame with values that mark changes on the validity column
     """
    change_flag = ["No_change"] # initialize flag 
    for i in range(1, len(Data_Frame)):
        if Data_Frame[colname][i] == Data_Frame[colname][i-1]: # compare each value with the previous value
          change_flag.append("No_change") 
        elif Data_Frame[colname][i] != Data_Frame[colname][i-1]:
            if Data_Frame[colname][i] == 3:
                  change_flag.append("Invalid_Section_Ends")
            elif Data_Frame[colname][i] == 0:
                change_flag.append("Invalid_Section_Starts")
            else:
                change_flag.append("ERROR")
    return change_flag

In [3]:
def median_correction(x):
    """
    Perform 5-point median correction on input signal x.
    """
    y = np.zeros_like(x)
    length = len(x)
    for i in range(2, length-2):
        y[i] = np.nanmedian(x[i-2:i+3])
    y[0:2] = np.nanmedian(x[0:5])        
    y[length-2:length] = np.nanmedian(x[length-5:length])
    return y

In [6]:
# Since the collider names are too detailed, here we create a dictionary with patterns to classify them into our categories of interest

patterns = {
    r'\d{2}_Sa': 'Passive_Agent',
    r'\d{2}_Cma': 'Active_Agent',
    r'Building_\d+': 'Building'
}

patterns.update(dict.fromkeys([
    'Castle-TaskBuilding_56', 'Crane_59', 'HighSilo-TaskBuilding_49',
    'Windmill-TaskBuilding_10_1', 'Church-TaskBuilding_16'
], 'Global_Landmark'))

patterns.update(dict.fromkeys([
    r'^TaskBuilding_2$', r'^TaskBuilding_3$', r'^TaskBuilding_5$', r'^TaskBuilding_8$', r'^TaskBuilding_9$', 
    r'^TaskBuilding_11$', r'^TaskBuilding_13$', r'^TaskBuilding_14$', r'^TaskBuilding_20$', r'^TaskBuilding_21$', 
    r'^TaskBuilding_23$', r'^TaskBuilding_27$', r'^TaskBuilding_29$', r'^TaskBuilding_32$', r'^TaskBuilding_34$',  
    r'^TaskBuilding_38$', r'^TaskBuilding_41$', r'^TaskBuilding_42$', r'^TaskBuilding_44$', r'^TaskBuilding_45$', 
    r'^TaskBuilding_47$', r'^TaskBuilding_50$', r'^TaskBuilding_51$', r'^TaskBuilding_52$', 
    'BasketballCourt_58', 'Construction_57', 
    r'^Graffity_02$', r'^Graffity_03$', r'^Graffity_05$', r'^Graffity_08$', r'^Graffity_09$', r'^Graffity_11$', 
    r'^Graffity_13$', r'^Graffity_14$', r'^Graffity_20$', r'^Graffity_21$', r'^Graffity_23$', r'^Graffity_27$', 
    r'^Graffity_29$', r'^Graffity_32$', r'^Graffity_34$', r'^Graffity_38$', r'^Graffity_41$', r'^Graffity_42$', 
    r'^Graffity_44$', r'^Graffity_45$', r'^Graffity_47$', r'^Graffity_50$', r'^Graffity_51$', r'^Graffity_52$'
], 'TaskBuilding_Public'))

patterns.update(dict.fromkeys([
    r'^TaskBuilding_1$', r'^TaskBuilding_4$', r'^TaskBuilding_6$', r'^TaskBuilding_7$', r'^TaskBuilding_12$', 
    r'^TaskBuilding_15$', r'^TaskBuilding_17$', r'^TaskBuilding_18$', r'^TaskBuilding_19$', r'^TaskBuilding_22$', 
    r'^TaskBuilding_24$', r'^TaskBuilding_25$', r'^TaskBuilding_26$', r'^TaskBuilding_28$', r'^TaskBuilding_30$',  
    r'^TaskBuilding_31$', r'^TaskBuilding_33$', r'^TaskBuilding_35$', r'^TaskBuilding_36$', r'^TaskBuilding_37$', 
    r'^TaskBuilding_39$', r'^TaskBuilding_40$', r'^TaskBuilding_43$', r'^TaskBuilding_48$', r'^TaskBuilding_54$', 
    r'^TaskBuilding_55$', r'^Graffity_01$', r'^Graffity_04$', r'^Graffity_06$', r'^Graffity_07$', r'^Graffity_12$', 
    r'^Graffity_15$', r'^Graffity_17$', r'^Graffity_18$', r'^Graffity_19$', r'^Graffity_22$', r'^Graffity_24$', 
    r'^Graffity_25$', r'^Graffity_26$', r'^Graffity_28$', r'^Graffity_30$', r'^Graffity_31$', r'^Graffity_33$', 
    r'^Graffity_35$', r'^Graffity_36$', r'^Graffity_37$', r'^Graffity_39$', r'^Graffity_40$', r'^Graffity_43$', 
    r'^Graffity_48$', r'^Graffity_54$', r'^Graffity_55$'
], 'TaskBuilding_Residential'))
default_val = 'Background'

In [7]:
path = "/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/" 

  
# csv files in the path
files = glob.glob(path + "/*.csv")
  
# defining an empty list to store 
# content
data_frame = pd.DataFrame()
content = []
  
# checking all the csv files in the 
# specified path
for filename in files:
    
    # reading content of csv file
    # content.append(filename)
    One_participant = pd.read_csv(filename)
    One_participant.drop(['Unnamed: 0'],axis=1, inplace=True)
    #Apply function that marks beggining and end of invalid sections
    One_participant["Bitmask_flag"] = bitmask_flag_change(One_participant, "combinedGazeValidityBitmask")
    One_participant.reset_index(inplace=True)
    # Get index of begging and end of invalid events
    indexLastValid = One_participant[One_participant["Bitmask_flag"] == "Invalid_Section_Starts"].index
    indexLastInValid = One_participant[One_participant["Bitmask_flag"] == "Invalid_Section_Ends"].index
    # Get 20 rows prior to the beggining of the invalid event 
    indexLower = indexLastValid - 20
    indexUpper = indexLastValid
    # Since it's possible that the invalid event occured less than 20 rows from the beggining of the file
    # we need to correct so that the highest possible index is the firs value in the file
    indexLower_r = [0 if i < 0 else i for i in indexLower]
    #Create a list with the most common element 200ms before the invalid section started
    Elements_to_replace = [One_participant.iloc[down:up,20].mode().iloc[0] for down, up in zip(indexLower_r, indexUpper)]
    #Create new column for interpolated events
    One_participant["Interpolated_collider"] = One_participant["hitObjectColliderName"]
    #Concat the row indexes that need replacement
    ranges = list(zip(indexLastValid, indexLastInValid))
    #Replace the invalid event with the mode of 20 events prior
    for i, (lower, upper) in enumerate(ranges):
        One_participant.iloc[lower:upper,-2]  = Elements_to_replace[i]
    # Here we look for the patterns contained in the dictionary and create the more general/informative variable Collider_Categorical
    One_participant['Collider_Categorical'] = One_participant['Interpolated_collider'].apply(lambda x: next((val for key, val in patterns.items() if re.match(key, x)), default_val))
    # Replace coordinates and eucledian distances with Nans
    One_participant.loc[One_participant['combinedGazeValidityBitmask'] == 0, ['hitPointOnObject_x', 'hitPointOnObject_y', 'hitPointOnObject_z',' eyePositionCombinedWorld.x', 'eyePositionCombinedWorld.y', 'eyePositionCombinedWorld.z', 'Eucledian_distance']] =  np.nan
    indexLastValid = []
    indexLastInValid = []
    indexLower = []
    indexUpper = []
    
    # 5 point median filter
    One_participant["hitPointOnObject_x"] = median_correction(One_participant.hitPointOnObject_x)
    One_participant["hitPointOnObject_y"] = median_correction(One_participant.hitPointOnObject_y)
    One_participant["hitPointOnObject_z"] = median_correction(One_participant.hitPointOnObject_z)
    One_participant["eyePositionCombinedWorld.x"] = median_correction( One_participant["eyePositionCombinedWorld.x"])
    One_participant["eyePositionCombinedWorld.y"] = median_correction( One_participant["eyePositionCombinedWorld.y"])
    One_participant["eyePositionCombinedWorld.z"] = median_correction( One_participant["eyePositionCombinedWorld.z"])
     #Saves an individual file per session per subject with out duplicates
    One_participant.to_csv(f"/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/04_Interpolated/{filename[-10:-4]}.csv", index=True)
    print(filename[-10:-4])
    content.append(One_participant)

# converting content to data frame
data_frame = pd.concat(content)

  y[i] = np.nanmedian(x[i-2:i+3])


0365_1


  y[i] = np.nanmedian(x[i-2:i+3])


0365_2


  y[i] = np.nanmedian(x[i-2:i+3])


0365_3


  y[i] = np.nanmedian(x[i-2:i+3])


0365_4


  y[i] = np.nanmedian(x[i-2:i+3])
  y[length-2:length] = np.nanmedian(x[length-5:length])


1754_1


  y[i] = np.nanmedian(x[i-2:i+3])


1754_2


  y[i] = np.nanmedian(x[i-2:i+3])
  y[length-2:length] = np.nanmedian(x[length-5:length])


1754_3


  y[i] = np.nanmedian(x[i-2:i+3])


1754_4


  y[i] = np.nanmedian(x[i-2:i+3])


1754_5


  y[i] = np.nanmedian(x[i-2:i+3])


2258_1


  y[i] = np.nanmedian(x[i-2:i+3])


2258_2


  y[i] = np.nanmedian(x[i-2:i+3])


2258_3


  y[i] = np.nanmedian(x[i-2:i+3])


2258_4


  y[i] = np.nanmedian(x[i-2:i+3])


2258_5


  y[i] = np.nanmedian(x[i-2:i+3])


2361_2


  y[i] = np.nanmedian(x[i-2:i+3])


2361_3


  y[i] = np.nanmedian(x[i-2:i+3])


2361_4


  y[i] = np.nanmedian(x[i-2:i+3])


2361_5


  y[i] = np.nanmedian(x[i-2:i+3])


2693_1


  y[i] = np.nanmedian(x[i-2:i+3])


2693_2


  y[i] = np.nanmedian(x[i-2:i+3])
  y[0:2] = np.nanmedian(x[0:5])


2693_3


  y[i] = np.nanmedian(x[i-2:i+3])


2693_4


  y[i] = np.nanmedian(x[i-2:i+3])


2693_5


  y[i] = np.nanmedian(x[i-2:i+3])


3246_1


  y[i] = np.nanmedian(x[i-2:i+3])


3246_2


  y[i] = np.nanmedian(x[i-2:i+3])


3246_3


  y[i] = np.nanmedian(x[i-2:i+3])


3246_5


  y[i] = np.nanmedian(x[i-2:i+3])


3310_1


  y[i] = np.nanmedian(x[i-2:i+3])


3310_2


  y[i] = np.nanmedian(x[i-2:i+3])


3310_3


  y[i] = np.nanmedian(x[i-2:i+3])


3310_4


  y[i] = np.nanmedian(x[i-2:i+3])


3310_5


  y[i] = np.nanmedian(x[i-2:i+3])


3572_1


  y[i] = np.nanmedian(x[i-2:i+3])


3572_2


  y[i] = np.nanmedian(x[i-2:i+3])


3572_3


  y[i] = np.nanmedian(x[i-2:i+3])
  y[length-2:length] = np.nanmedian(x[length-5:length])


3572_4


  y[i] = np.nanmedian(x[i-2:i+3])


3976_1


  y[i] = np.nanmedian(x[i-2:i+3])


3976_2


  y[i] = np.nanmedian(x[i-2:i+3])


3976_3


  y[i] = np.nanmedian(x[i-2:i+3])


3976_4


  y[i] = np.nanmedian(x[i-2:i+3])


3976_5


  y[i] = np.nanmedian(x[i-2:i+3])


4176_1


  y[i] = np.nanmedian(x[i-2:i+3])


4176_2


  y[i] = np.nanmedian(x[i-2:i+3])


4176_3


  y[i] = np.nanmedian(x[i-2:i+3])


4176_4


  y[i] = np.nanmedian(x[i-2:i+3])


4176_5


  y[i] = np.nanmedian(x[i-2:i+3])


4796_1


  y[i] = np.nanmedian(x[i-2:i+3])


4796_2


  y[i] = np.nanmedian(x[i-2:i+3])
  y[0:2] = np.nanmedian(x[0:5])


4796_3


  y[i] = np.nanmedian(x[i-2:i+3])


4796_4


  y[i] = np.nanmedian(x[i-2:i+3])


4796_5


  y[i] = np.nanmedian(x[i-2:i+3])


4917_1


  y[i] = np.nanmedian(x[i-2:i+3])


4917_2


  y[i] = np.nanmedian(x[i-2:i+3])


4917_3


  y[i] = np.nanmedian(x[i-2:i+3])


4917_4


  y[i] = np.nanmedian(x[i-2:i+3])


4917_5


  y[i] = np.nanmedian(x[i-2:i+3])


0365_5


  y[i] = np.nanmedian(x[i-2:i+3])


0479_1


  y[i] = np.nanmedian(x[i-2:i+3])


0479_2


  y[i] = np.nanmedian(x[i-2:i+3])


0479_3


  y[i] = np.nanmedian(x[i-2:i+3])


5531_1


  y[i] = np.nanmedian(x[i-2:i+3])


5531_3


  y[i] = np.nanmedian(x[i-2:i+3])


5531_4


  y[i] = np.nanmedian(x[i-2:i+3])


5531_5


  y[i] = np.nanmedian(x[i-2:i+3])


5741_1


  y[i] = np.nanmedian(x[i-2:i+3])


5741_2


  y[i] = np.nanmedian(x[i-2:i+3])


5741_3


  y[i] = np.nanmedian(x[i-2:i+3])


5741_4


  y[i] = np.nanmedian(x[i-2:i+3])


5741_5


  y[i] = np.nanmedian(x[i-2:i+3])


6642_1


  y[i] = np.nanmedian(x[i-2:i+3])


6642_2


  y[i] = np.nanmedian(x[i-2:i+3])


6642_3


  y[i] = np.nanmedian(x[i-2:i+3])


6642_4


  y[i] = np.nanmedian(x[i-2:i+3])


6642_5


  y[i] = np.nanmedian(x[i-2:i+3])


7093_1


  y[i] = np.nanmedian(x[i-2:i+3])


7093_2


  y[i] = np.nanmedian(x[i-2:i+3])
  y[length-2:length] = np.nanmedian(x[length-5:length])


7093_3


  y[i] = np.nanmedian(x[i-2:i+3])


7093_4


  y[i] = np.nanmedian(x[i-2:i+3])


7093_5


  y[i] = np.nanmedian(x[i-2:i+3])


7264_1


  y[i] = np.nanmedian(x[i-2:i+3])


7264_2


  y[i] = np.nanmedian(x[i-2:i+3])


7264_3


  y[i] = np.nanmedian(x[i-2:i+3])


7264_4


  y[i] = np.nanmedian(x[i-2:i+3])


7264_5


  y[i] = np.nanmedian(x[i-2:i+3])


7412_1


  y[i] = np.nanmedian(x[i-2:i+3])


7412_2


  y[i] = np.nanmedian(x[i-2:i+3])


7412_3


  y[i] = np.nanmedian(x[i-2:i+3])


7412_4


  y[i] = np.nanmedian(x[i-2:i+3])


7412_5


  y[i] = np.nanmedian(x[i-2:i+3])


7842_1


  y[i] = np.nanmedian(x[i-2:i+3])


7842_2


  y[i] = np.nanmedian(x[i-2:i+3])
  y[length-2:length] = np.nanmedian(x[length-5:length])


7842_3


  y[i] = np.nanmedian(x[i-2:i+3])


7842_4


  y[i] = np.nanmedian(x[i-2:i+3])


7842_5


  y[i] = np.nanmedian(x[i-2:i+3])


8007_1


  y[i] = np.nanmedian(x[i-2:i+3])


8007_2


  y[i] = np.nanmedian(x[i-2:i+3])


8007_3


  y[i] = np.nanmedian(x[i-2:i+3])


8007_4


  y[i] = np.nanmedian(x[i-2:i+3])


8007_5


  y[i] = np.nanmedian(x[i-2:i+3])


8469_1


  y[i] = np.nanmedian(x[i-2:i+3])


8469_2


  y[i] = np.nanmedian(x[i-2:i+3])


8469_3


  y[i] = np.nanmedian(x[i-2:i+3])


8469_4


  y[i] = np.nanmedian(x[i-2:i+3])


8469_5


  y[i] = np.nanmedian(x[i-2:i+3])


8673_1


  y[i] = np.nanmedian(x[i-2:i+3])


8673_2


  y[i] = np.nanmedian(x[i-2:i+3])


8673_3


  y[i] = np.nanmedian(x[i-2:i+3])


8673_4


  y[i] = np.nanmedian(x[i-2:i+3])


8673_5


  y[i] = np.nanmedian(x[i-2:i+3])


0479_4


  y[i] = np.nanmedian(x[i-2:i+3])


0479_5


  y[i] = np.nanmedian(x[i-2:i+3])


/365_1


  y[i] = np.nanmedian(x[i-2:i+3])


/365_2


  y[i] = np.nanmedian(x[i-2:i+3])


9472_1


  y[i] = np.nanmedian(x[i-2:i+3])


9472_2


  y[i] = np.nanmedian(x[i-2:i+3])


9472_3


  y[i] = np.nanmedian(x[i-2:i+3])


9472_4


  y[i] = np.nanmedian(x[i-2:i+3])


9472_5


  y[i] = np.nanmedian(x[i-2:i+3])


9502_1


  y[i] = np.nanmedian(x[i-2:i+3])


9502_2


  y[i] = np.nanmedian(x[i-2:i+3])


9502_3


  y[i] = np.nanmedian(x[i-2:i+3])
  y[length-2:length] = np.nanmedian(x[length-5:length])


9502_5


  y[i] = np.nanmedian(x[i-2:i+3])


9601_1


  y[i] = np.nanmedian(x[i-2:i+3])


9601_2


  y[i] = np.nanmedian(x[i-2:i+3])


9601_3


  y[i] = np.nanmedian(x[i-2:i+3])


9601_4


  y[i] = np.nanmedian(x[i-2:i+3])


9601_5


  y[i] = np.nanmedian(x[i-2:i+3])


/365_3


  y[i] = np.nanmedian(x[i-2:i+3])


/365_4


  y[i] = np.nanmedian(x[i-2:i+3])


/365_5


  y[i] = np.nanmedian(x[i-2:i+3])


/479_1


  y[i] = np.nanmedian(x[i-2:i+3])


/479_2


  y[i] = np.nanmedian(x[i-2:i+3])


/479_3


  y[i] = np.nanmedian(x[i-2:i+3])


7_1_II


  y[i] = np.nanmedian(x[i-2:i+3])


2361_1


  y[i] = np.nanmedian(x[i-2:i+3])


3246_4


  y[i] = np.nanmedian(x[i-2:i+3])


4597_2


  y[i] = np.nanmedian(x[i-2:i+3])


4597_3


  y[i] = np.nanmedian(x[i-2:i+3])


4597_4


  y[i] = np.nanmedian(x[i-2:i+3])


4597_5


  y[i] = np.nanmedian(x[i-2:i+3])


9502_4


  y[i] = np.nanmedian(x[i-2:i+3])


9586_1


  y[i] = np.nanmedian(x[i-2:i+3])


9586_2


  y[i] = np.nanmedian(x[i-2:i+3])


9586_3


  y[i] = np.nanmedian(x[i-2:i+3])


9586_4


  y[i] = np.nanmedian(x[i-2:i+3])


9586_5


  y[i] = np.nanmedian(x[i-2:i+3])


/479_4


  y[i] = np.nanmedian(x[i-2:i+3])


/479_5


  y[i] = np.nanmedian(x[i-2:i+3])


5238_1


  y[i] = np.nanmedian(x[i-2:i+3])


5238_2


  y[i] = np.nanmedian(x[i-2:i+3])


5238_3


  y[i] = np.nanmedian(x[i-2:i+3])


5238_4


  y[i] = np.nanmedian(x[i-2:i+3])


8695_2


  y[i] = np.nanmedian(x[i-2:i+3])


8695_3


  y[i] = np.nanmedian(x[i-2:i+3])


8695_4


  y[i] = np.nanmedian(x[i-2:i+3])


4597_1


  y[i] = np.nanmedian(x[i-2:i+3])


8695_5


In [None]:
data_frame.tail(50)