In [1]:
import os
import re
import numpy as np
import pandas as pd
import glob

In [2]:
# Since the collider names are too detailed, here we create a dictionary with patterns to classify them into our categories of interest

patterns = {
    r'\d{2}_Sa': 'Passive_Agent',
    r'\d{2}_Cma': 'Active_Agent',
    r'Building_\d+': 'Building'
}

patterns.update(dict.fromkeys([
    'Castle-TaskBuilding_56', 'Crane_59', 'HighSilo-TaskBuilding_49',
    'Windmill-TaskBuilding_10_1', 'Church-TaskBuilding_16'
], 'Global_Landmark'))

patterns.update(dict.fromkeys([
    r'^TaskBuilding_2$', r'^TaskBuilding_3$', r'^TaskBuilding_5$', r'^TaskBuilding_8$', r'^TaskBuilding_9$', 
    r'^TaskBuilding_11$', r'^TaskBuilding_13$', r'^TaskBuilding_14$', r'^TaskBuilding_20$', r'^TaskBuilding_21$', 
    r'^TaskBuilding_23$', r'^TaskBuilding_27$', r'^TaskBuilding_29$', r'^TaskBuilding_32$', r'^TaskBuilding_34$',  
    r'^TaskBuilding_38$', r'^TaskBuilding_41$', r'^TaskBuilding_42$', r'^TaskBuilding_44$', r'^TaskBuilding_45$', 
    r'^TaskBuilding_47$', r'^TaskBuilding_50$', r'^TaskBuilding_51$', r'^TaskBuilding_52$', 
    'BasketballCourt_58', 'Construction_57', 
    r'^Graffity_02$', r'^Graffity_03$', r'^Graffity_05$', r'^Graffity_08$', r'^Graffity_09$', r'^Graffity_11$', 
    r'^Graffity_13$', r'^Graffity_14$', r'^Graffity_20$', r'^Graffity_21$', r'^Graffity_23$', r'^Graffity_27$', 
    r'^Graffity_29$', r'^Graffity_32$', r'^Graffity_34$', r'^Graffity_38$', r'^Graffity_41$', r'^Graffity_42$', 
    r'^Graffity_44$', r'^Graffity_45$', r'^Graffity_47$', r'^Graffity_50$', r'^Graffity_51$', r'^Graffity_52$'
], 'TaskBuilding_Public'))

patterns.update(dict.fromkeys([
    r'^TaskBuilding_1$', r'^TaskBuilding_4$', r'^TaskBuilding_6$', r'^TaskBuilding_7$', r'^TaskBuilding_12$', 
    r'^TaskBuilding_15$', r'^TaskBuilding_17$', r'^TaskBuilding_18$', r'^TaskBuilding_19$', r'^TaskBuilding_22$', 
    r'^TaskBuilding_24$', r'^TaskBuilding_25$', r'^TaskBuilding_26$', r'^TaskBuilding_28$', r'^TaskBuilding_30$',  
    r'^TaskBuilding_31$', r'^TaskBuilding_33$', r'^TaskBuilding_35$', r'^TaskBuilding_36$', r'^TaskBuilding_37$', 
    r'^TaskBuilding_39$', r'^TaskBuilding_40$', r'^TaskBuilding_43$', r'^TaskBuilding_48$', r'^TaskBuilding_54$', 
    r'^TaskBuilding_55$', r'^Graffity_01$', r'^Graffity_04$', r'^Graffity_06$', r'^Graffity_07$', r'^Graffity_12$', 
    r'^Graffity_15$', r'^Graffity_17$', r'^Graffity_18$', r'^Graffity_19$', r'^Graffity_22$', r'^Graffity_24$', 
    r'^Graffity_25$', r'^Graffity_26$', r'^Graffity_28$', r'^Graffity_30$', r'^Graffity_31$', r'^Graffity_33$', 
    r'^Graffity_35$', r'^Graffity_36$', r'^Graffity_37$', r'^Graffity_39$', r'^Graffity_40$', r'^Graffity_43$', 
    r'^Graffity_48$', r'^Graffity_54$', r'^Graffity_55$'
], 'TaskBuilding_Residential'))

default_val = 'Background'

In [3]:
path = "/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/02_Individuals_Colliders" 

  
# csv files in the path
files = glob.glob(path + "/*.csv")
  
# defining an empty list to store 
# content
data_frame = pd.DataFrame()
content = []
  
# checking all the csv files in the 
# specified path
for filename in files:
    
    # reading content of csv file
    # content.append(filename)
    df = pd.read_csv(filename)
    df.drop(['rayCastHitsCombinedEyes', 'timeStampGetVerboseData', 'hitObjectColliderBoundsCenter', 'timeStampDataPointStart', 
             'bodyTrackerRotation.x', 'bodyTrackerRotation.y','bodyTrackerRotation.z',],axis=1, inplace=True)
    #Since we have two colliders hits per frame, we calculate the distance between each hit and the participant
    df['Eucledian_distance'] = np.linalg.norm(df.loc[:, ["hitPointOnObject_x","hitPointOnObject_y","hitPointOnObject_z"]].values - df.loc[:, ["playerBodyPosition.x","playerBodyPosition.y","playerBodyPosition.z"]], axis=1)
    # Here we look for the patterns contained in the dictionary and create the more general/informative variable Collider_Categorical
    df['Collider_Categorical'] =  df['hitObjectColliderName'].apply(lambda x: next((val for key, val in patterns.items() if re.match(key, x)), default_val))
    df['Previous_Euclidean_value'] = df['Eucledian_distance'].shift(1)
    #Here we declare the conditions to choose between collider hits:
    df['Collider_stays'] = (df["ordinalOfHit"] == 2) & (df['Collider_Categorical'] != 'Background') & (df['Eucledian_distance'] <  df['Previous_Euclidean_value'])
    df.reset_index(inplace=True)
    #Drop all the second hit colliders that do not comply with the criteria
    indexCollider = df[(df["ordinalOfHit"] == 2) & (df['Collider_stays'] == False)].index
    depleted_data = df.drop(index=indexCollider)
    depleted_data.reset_index(inplace=True, drop=True)
    #Take the index of all second colliders that will stay
    indexColliderStays = depleted_data[depleted_data['Collider_stays'] == True].index
    #We subtract one from that list of indexes because now is the first collider that has to go (so row directly on top)
    indexColliderDelete = indexColliderStays - 1
    depleted_data_1 = depleted_data.drop(indexColliderDelete)
    #Create variable that differenciates body from face hits on agents
    depleted_data_1["Face_Hits"] = "Not_Agent"
    mask_Face = ((depleted_data_1["Collider_Categorical"].str.contains(pat="_Agent")) & (depleted_data_1["hitColliderType"] == "UnityEngine.SphereCollider"))
    mask_Body = ((depleted_data_1["Collider_Categorical"].str.contains(pat="_Agent")) & (depleted_data_1["hitColliderType"] == "UnityEngine.BoxCollider"))
    depleted_data_1.loc[mask_Face, 'Face_Hits'] = "Face"
    depleted_data_1.loc[mask_Body, 'Face_Hits'] = "Body"
    content.append(depleted_data_1)
    print(filename)
  
# converting content to data frame
data_frame = pd.concat(content)


/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/02_Individuals_Colliders/3246.csv
/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/02_Individuals_Colliders/5238.csv
/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/02_Individuals_Colliders/0365.csv
/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/02_Individuals_Colliders/0479.csv
/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/02_Individuals_Colliders/1754.csv
/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/02_Individuals_Colliders/2258.csv
/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/02_Individuals_Colliders/2361.csv
/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/02_Individuals_Colliders/2693.csv
/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/02_Individuals_Colliders/3310.csv
/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/02_Individuals_Colliders/3572.csv
/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/

In [4]:
data_frame.columns

Index(['index', 'SubjectID', 'Session', 'SessionSubsection',
       'timeStampDataPointEnd', 'eyeOpennessLeft', 'eyeOpennessRight',
       'pupilDiameterMillimetersLeft', 'pupilDiameterMillimetersRight',
       'leftGazeValidityBitmask',
       ...
       'hitColliderType', 'ordinalOfHit', 'hitPointOnObject_x',
       'hitPointOnObject_y', 'hitPointOnObject_z', 'Eucledian_distance',
       'Collider_Categorical', 'Previous_Euclidean_value', 'Collider_stays',
       'Face_Hits'],
      dtype='object', length=109)

In [5]:
data_frame.drop([ 'index','Collider_stays', 'Previous_Euclidean_value'],axis=1, inplace=True)

In [6]:
data_frame.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/Data_Sets/Complete_data_Categorical_L.csv")

In [7]:
Trash

NameError: name 'Trash' is not defined

## Dealing with time 
 
- Create continues time 
- Delete duplicate time stamps
- Create individual csv files per subject per session

In [None]:
data_frame =  pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/Data_Sets/Complete_data_Categorical_L.csv")

In [8]:
# Group by Subject and Session because the continuous time only makes sence on a session scale
all_participants_sessions = pd.DataFrame(data_frame.groupby(["SubjectID", "Session"])["hitColliderType"].count())
all_participants_sessions.reset_index(inplace=True)
# Create tuples to later subset data to subject-session scale. We create this because subjects can have a different number of sessions, in this way we only subset for the existing pairs S-S.
L_all_participants_sessions = list(zip(all_participants_sessions.SubjectID, all_participants_sessions.Session))

In [9]:
len(L_all_participants_sessions)

147

In [10]:
temporalss = []
for tuples in L_all_participants_sessions:
    subject, session = tuples
    temporal = data_frame[(data_frame['SubjectID'] == subject) & (data_frame['Session'] == session)]
    temporal_c = temporal.sort_values(by=['timeStampDataPointEnd'])
    temporal_cr = temporal_c.reset_index(drop=True)
    temporal_cr["Time_Shift"] = temporal_cr.timeStampDataPointEnd.diff()
    temporal_c_ND =  temporal_cr[(temporal_cr["Time_Shift"] > 0.001) & (temporal_cr["Time_Shift"] < 1)].copy()
    number = temporal_c_ND.columns.get_loc('timeStampDataPointEnd')
    first_time= temporal_c_ND.iloc[0, number]
    temporal_c_ND["Continuous_Time"] = np.round(((temporal_c_ND.iloc[:, number] - first_time)/60), 3)
    # Delete duplicate time stamps
    temporal_c_ND.to_csv(f"/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/{subject}_{session}.csv", index=True)
    temporalss.append(temporal_c_ND)
    print(tuples)

(365, 1)
(365, 2)
(365, 3)
(365, 4)
(365, 5)
(479, 1)
(479, 2)
(479, 3)
(479, 4)
(479, 5)
(1754, 1)
(1754, 2)
(1754, 3)
(1754, 4)
(1754, 5)
(2258, 1)
(2258, 2)
(2258, 3)
(2258, 4)
(2258, 5)
(2361, 1)
(2361, 2)
(2361, 3)
(2361, 4)
(2361, 5)
(2693, 1)
(2693, 2)
(2693, 3)
(2693, 4)
(2693, 5)
(3246, 1)
(3246, 2)
(3246, 3)
(3246, 4)
(3246, 5)
(3310, 1)
(3310, 2)
(3310, 3)
(3310, 4)
(3310, 5)
(3572, 1)
(3572, 2)
(3572, 3)
(3572, 4)
(3976, 1)
(3976, 2)
(3976, 3)
(3976, 4)
(3976, 5)
(4176, 1)
(4176, 2)
(4176, 3)
(4176, 4)
(4176, 5)
(4597, 1)
(4597, 2)
(4597, 3)
(4597, 4)
(4597, 5)
(4796, 1)
(4796, 2)
(4796, 3)
(4796, 4)
(4796, 5)
(4917, 1)
(4917, 2)
(4917, 3)
(4917, 4)
(4917, 5)
(5238, 1)
(5238, 2)
(5238, 3)
(5238, 4)
(5531, 1)
(5531, 2)
(5531, 3)
(5531, 4)
(5531, 5)
(5741, 1)
(5741, 2)
(5741, 3)
(5741, 4)
(5741, 5)
(6642, 1)
(6642, 2)
(6642, 3)
(6642, 4)
(6642, 5)
(7093, 1)
(7093, 2)
(7093, 3)
(7093, 4)
(7093, 5)
(7264, 1)
(7264, 2)
(7264, 3)
(7264, 4)
(7264, 5)
(7412, 1)
(7412, 2)
(7412, 3)


## Change files that were damaged during recordings

In [11]:
a2361_1 = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/2361_1.csv")
a2361_1_1 = a2361_1[a2361_1.SessionSubsection == 1].copy()
a2361_1_1.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/2361_1.csv")
a2361_2 = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/2361_2.csv")
a2361_2 = a2361_2[a2361_2.SessionSubsection != 1].copy()
a2361_2.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/2361_2.csv")

In [12]:
a3246_4 = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/3246_4.csv")
a3246_4 = a3246_4[a3246_4.SessionSubsection != 1].copy()
a3246_4.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/3246_4.csv")
a3246_5 = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/3246_5.csv")
a3246_5 = a3246_5[a3246_5.SessionSubsection != 2].copy()
a3246_5.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/3246_5.csv")

In [13]:
a3310_4 = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/3310_4.csv")
a3310_4 = a3310_4[(a3310_4.SessionSubsection != 2)&(a3310_4.SessionSubsection != 3)].copy()
a3310_4.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/3310_4.csv")

In [14]:
a3976_1 = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/3976_1.csv")
a3976_1 = a3976_1[(a3976_1.SessionSubsection != 1)&(a3976_1.SessionSubsection != 4)].copy()
a3976_1.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/3976_1.csv")
a3976_3 = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/3976_3.csv")
a3976_3 = a3976_3[(a3976_3.SessionSubsection != 1)&(a3976_3.SessionSubsection != 4)].copy()
a3976_3.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/3976_3.csv")

In [15]:
a5741_4 = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/5741_4.csv")
a5741_4 = a5741_4[(a5741_4.SessionSubsection != 1)&(a5741_4.SessionSubsection != 2)].copy()
a5741_4.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/5741_4.csv")
a5741_5 = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/5741_5.csv")
a5741_5 = a5741_5[(a5741_5.SessionSubsection != 4)&(a5741_5.SessionSubsection != 4)].copy()
a5741_5.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/5741_5.csv")

In [16]:
a5531_1 = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/5531_1.csv")
a5531_1 = a5531_1[(a5531_1.SessionSubsection != 3)].copy()
a5531_1.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/5531_1.csv")
os.remove("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/5531_2.csv")

In [17]:
a7264_1 = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/7264_1.csv")
a7264_1 = a7264_1[(a7264_1.SessionSubsection != 3)&(a7264_1.SessionSubsection != 4)].copy()
a7264_1.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/7264_1.csv")

In [18]:
a8469_1 = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/8469_1.csv")
a8469_1 = a8469_1[(a8469_1.SessionSubsection != 2)&(a8469_1.SessionSubsection != 3)].copy()
a8469_1.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/8469_1.csv")

In [19]:
a9601_5 = pd.read_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/9601_5.csv")
a9601_5 = a9601_5[(a9601_5.SessionSubsection != 4)].copy()
a8469_1.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/8469_1.csv")

In [None]:
a9601_5.Collider_Categorical.value_counts()