In [1]:
import os
import re
import numpy as np
import pandas as pd
import glob

In [2]:
# Since the collider names are too detailed, here we create a dictionary with patterns to classify them into our categories of interest

patterns = {'\d{2}_Sa':'Passive_Agent', '\d{2}_Cma':'Active_Agent', 'Building_\d+': 'Building'}
patterns.update(dict.fromkeys(['Castle-TaskBuilding_56', 'Crane_59','HighSilo-TaskBuilding_49', 'Windmill-TaskBuilding_10_1', 'Church-TaskBuilding_16'], 'Global_Landmark'))
patterns.update(dict.fromkeys(['^TaskBuilding_2$','^TaskBuilding_3$', '^TaskBuilding_5$', '^TaskBuilding_8$', '^TaskBuilding_9$', '^TaskBuilding_11$', '^TaskBuilding_13$', '^TaskBuilding_14$', '^TaskBuilding_20$', 
                               '^TaskBuilding_21$', '^TaskBuilding_23$','^TaskBuilding_27$', '^TaskBuilding_29$', '^TaskBuilding_32$', '^TaskBuilding_34$',  '^TaskBuilding_38$', '^TaskBuilding_41$', '^TaskBuilding_42$', 
                               '^TaskBuilding_44$', '^TaskBuilding_45$', '^TaskBuilding_47$', '^TaskBuilding_50$', '^TaskBuilding_51$', '^TaskBuilding_52$', 'BasketballCourt_58', 'Construction_57', 
                               '^Graffity_02$', '^Graffity_03$', '^Graffity_05$', '^Graffity_08$', '^Graffity_09$', '^Graffity_11$', '^Graffity_13$', '^Graffity_14$', '^Graffity_20$', 
                               '^Graffity_21$', '^Graffity_23$', '^Graffity_27$', '^Graffity_29$', '^Graffity_32$', '^Graffity_34$', '^Graffity_38$', '^Graffity_41$', '^Graffity_42$', 
                               '^Graffity_44$', '^Graffity_45$', '^Graffity_47$',  '^Graffity_50$', '^Graffity_51$', '^Graffity_52$'], 'TaskBuilding_Public'))

patterns.update(dict.fromkeys(['^TaskBuilding_1$','^TaskBuilding_4$', '^TaskBuilding_6$', '^TaskBuilding_7$', '^TaskBuilding_12$', '^TaskBuilding_15$', '^TaskBuilding_17$', '^TaskBuilding_18$', '^TaskBuilding_19$', 
                               '^TaskBuilding_22$', '^TaskBuilding_24$','^TaskBuilding_25$', '^TaskBuilding_26$', '^TaskBuilding_28$', '^TaskBuilding_30$',  '^TaskBuilding_31$', '^TaskBuilding_33$', '^TaskBuilding_35$', 
                               '^TaskBuilding_36$', '^TaskBuilding_37$', '^TaskBuilding_39$', '^TaskBuilding_40$', '^TaskBuilding_43$', '^TaskBuilding_48$', '^TaskBuilding_54$','^TaskBuilding_55$',
                               '^Graffity_01$','^Graffity_04$', '^Graffity_06$', '^Graffity_07$', '^Graffity_12$', '^Graffity_15$', '^Graffity_17$', '^Graffity_18$', '^Graffity_19$', '^Graffity_22$', 
                               '^Graffity_24$','^Graffity_25$', '^Graffity_26$', '^Graffity_28$', '^Graffity_30$',  '^Graffity_31$', '^Graffity_33$', '^Graffity_35$', '^Graffity_36$', '^Graffity_37$', '^Graffity_39$', 
                               '^Graffity_40$', '^Graffity_43$', '^Graffity_48$', '^Graffity_54$', '^Graffity_55$' ], 'TaskBuilding_Residential'))
default_val = 'Background'

In [3]:
path = "/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/02_Individuals_Colliders" 

  
# csv files in the path
files = glob.glob(path + "/*.csv")
  
# defining an empty list to store 
# content
data_frame = pd.DataFrame()
content = []
  
# checking all the csv files in the 
# specified path
for filename in files:
    
    # reading content of csv file
    # content.append(filename)
    df = pd.read_csv(filename)
    df.drop(['Unnamed: 0', 'rayCastHitsCombinedEyes', 'timeStampGetVerboseData', 'hitObjectColliderBoundsCenter', 'timeStampDataPointStart', 
             'bodyTrackerPosition.x', 'bodyTrackerPosition.y', 'bodyTrackerPosition.z', 'hmdPosition.x', 'hmdPosition.y',
             'hmdPosition.z', 'hmdDirectionForward.x', 'hmdDirectionForward.y',
             'hmdDirectionForward.z', 'hmdRotation.x', 'hmdRotation.y',
             'hmdRotation.z', 'hmdDirectionUp.x', 'hmdDirectionUp.y',
             'hmdDirectionUp.z', 'bodyTrackerRotation.x', 'bodyTrackerRotation.y','bodyTrackerRotation.z',],axis=1, inplace=True)
    #Since we have two colliders hits per frame, we calculate the distance between each hit and the participant
    df['Eucledian_distance'] = np.linalg.norm(df.loc[:, ["hitPointOnObject_x","hitPointOnObject_y","hitPointOnObject_z"]].values - df.loc[:, ["playerBodyPosition.x","playerBodyPosition.y","playerBodyPosition.z"]], axis=1)
    # Here we look for the patterns contained in the dictionary and create the more general/informative variable Collider_Categorical
    df['Collider_Categorical'] =  df['hitObjectColliderName'].apply(lambda x: next((val for key, val in patterns.items() if re.match(key, x)), default_val))
    df['Previous_Euclidean_value'] = df['Eucledian_distance'].shift(1)
    #Here we declare the conditions to choose between collider hits:
    df['Collider_stays'] = (df["ordinalOfHit"] == 2) & (df['Collider_Categorical'] != 'Background') & (df['Eucledian_distance'] <  df['Previous_Euclidean_value'])
    df.reset_index(inplace=True)
    #Drop all the second hit colliders that do not comply with the criteria
    indexCollider = df[(df["ordinalOfHit"] == 2) & (df['Collider_stays'] == False)].index
    depleted_data = df.drop(index=indexCollider)
    depleted_data.reset_index(inplace=True, drop=True)
    #Take the index of all second colliders that will stay
    indexColliderStays = depleted_data[depleted_data['Collider_stays'] == True].index
    #We subtract one from that list of indexes because now is the first collider that has to go (so row directly on top)
    indexColliderDelete = indexColliderStays - 1
    depleted_data_1 = depleted_data.drop(indexColliderDelete)
    #Create variable that differenciates body from face hits on agents
    depleted_data_1["Face_Hits"] = "Not_Agent"
    mask_Face = ((depleted_data_1["Collider_Categorical"].str.contains(pat="_Agent")) & (depleted_data_1["hitColliderType"] == "UnityEngine.SphereCollider"))
    mask_Body = ((depleted_data_1["Collider_Categorical"].str.contains(pat="_Agent")) & (depleted_data_1["hitColliderType"] == "UnityEngine.BoxCollider"))
    depleted_data_1.loc[mask_Face, 'Face_Hits'] = "Face"
    depleted_data_1.loc[mask_Body, 'Face_Hits'] = "Body"
    content.append(depleted_data_1)
    print(filename)
  
# converting content to data frame
data_frame = pd.concat(content)


/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/02_Individuals_Colliders/1031.csv
/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/02_Individuals_Colliders/1268.csv
/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/02_Individuals_Colliders/1574.csv
/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/02_Individuals_Colliders/1843.csv
/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/02_Individuals_Colliders/2069.csv
/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/02_Individuals_Colliders/3193.csv
/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/02_Individuals_Colliders/3540.csv
/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/02_Individuals_Colliders/4580.csv
/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/02_Individuals_Colliders/4598.csv
/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/02_Individuals_Colliders/4847.csv
/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/

In [4]:
data_frame.columns

Index(['level_0', 'index', 'SubjectID', 'Session', 'SessionSubsection',
       'timeStampDataPointEnd', 'combinedGazeValidityBitmask',
       'eyePositionCombinedWorld.x', 'eyePositionCombinedWorld.y',
       'eyePositionCombinedWorld.z', 'eyeDirectionCombinedWorld.y',
       'eyeDirectionCombinedWorld.z', 'eyeDirectionCombinedLocal.x',
       'eyeDirectionCombinedLocal.y', 'eyeDirectionCombinedLocal.z',
       'playerBodyPosition.x', 'playerBodyPosition.y', 'playerBodyPosition.z',
       'hitColliderType', 'hitObjectColliderName', 'ordinalOfHit',
       'hitPointOnObject_x', 'hitPointOnObject_y', 'hitPointOnObject_z',
       'Eucledian_distance', 'Collider_Categorical',
       'Previous_Euclidean_value', 'Collider_stays', 'Face_Hits'],
      dtype='object')

In [5]:
data_frame.drop(['level_0', 'index','Collider_stays', 'Previous_Euclidean_value'],axis=1, inplace=True)

In [6]:
data_frame.to_csv("/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/Data_Sets/Complete_data_Categorical_L.csv")

In [7]:
Trash

NameError: name 'Trash' is not defined

## Dealing with time 
 
- Create continues time 
- Delete duplicate time stamps
- Create individual csv files per subject per session

In [None]:
data_frame =  pd.read_csv("/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/Data_Sets/Complete_data_Categorical_L.csv")

In [8]:
# Group by Subject and Session because the continuous time only makes sence on a session scale
all_participants_sessions = pd.DataFrame(data_frame.groupby(["SubjectID", "Session"])["hitColliderType"].count())
all_participants_sessions.reset_index(inplace=True)
# Create tuples to later subset data to subject-session scale. We create this because subjects can have a different number of sessions, in this way we only subset for the existing pairs S-S.
L_all_participants_sessions = list(zip(all_participants_sessions.SubjectID, all_participants_sessions.Session))

In [9]:
len(L_all_participants_sessions)

145

In [10]:
temporalss = []
for tuples in L_all_participants_sessions:
    subject, session = tuples
    temporal = data_frame[(data_frame['SubjectID'] == subject) & (data_frame['Session'] == session)]
    temporal_c = temporal.sort_values(by=['timeStampDataPointEnd'])
    temporal_cr = temporal_c.reset_index(drop=True)
    temporal_cr["Time_Shift"] = temporal_cr.timeStampDataPointEnd.diff()
    temporal_c_ND =  temporal_cr[(temporal_cr["Time_Shift"] > 0.001) & (temporal_cr["Time_Shift"] < 1)].copy()
    number = temporal_c_ND.columns.get_loc('timeStampDataPointEnd')
    first_time= temporal_c_ND.iloc[0, number]
    temporal_c_ND["Continuous_Time"] = np.round(((temporal_c_ND.iloc[:, number] - first_time)/60), 3)
    # Delete duplicate time stamps
    temporal_c_ND.to_csv(f"/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Pre_processed/03_Individuals_IndividualSessions/{subject}_{session}.csv", index=True)
    temporalss.append(temporal_c_ND)
    print(tuples)

(1031, 1)
(1031, 2)
(1031, 3)
(1031, 4)
(1031, 5)
(1142, 1)
(1142, 2)
(1142, 3)
(1142, 4)
(1142, 5)
(1234, 1)
(1234, 2)
(1234, 3)
(1234, 4)
(1234, 5)
(1268, 1)
(1268, 2)
(1268, 3)
(1268, 4)
(1268, 5)
(1574, 1)
(1574, 2)
(1574, 3)
(1574, 4)
(1574, 5)
(1843, 1)
(1843, 2)
(1843, 3)
(1843, 4)
(1843, 5)
(2069, 1)
(2069, 2)
(2069, 3)
(2069, 4)
(2069, 5)
(3193, 1)
(3193, 2)
(3193, 3)
(3193, 4)
(3193, 5)
(3540, 1)
(3540, 2)
(3540, 3)
(3540, 4)
(3540, 5)
(4580, 1)
(4580, 2)
(4580, 3)
(4580, 4)
(4580, 5)
(4598, 1)
(4598, 2)
(4598, 3)
(4598, 4)
(4598, 5)
(4847, 1)
(4847, 2)
(4847, 3)
(4847, 4)
(4847, 5)
(4875, 1)
(4875, 2)
(4875, 3)
(4875, 4)
(4875, 5)
(5161, 1)
(5161, 2)
(5161, 3)
(5161, 4)
(5161, 5)
(5189, 1)
(5189, 2)
(5189, 3)
(5189, 4)
(5189, 5)
(5191, 1)
(5191, 2)
(5191, 3)
(5191, 4)
(5191, 5)
(5743, 1)
(5743, 2)
(5743, 3)
(5743, 4)
(5743, 5)
(5766, 1)
(5766, 2)
(5766, 3)
(5766, 4)
(5766, 5)
(5851, 1)
(5851, 2)
(5851, 3)
(5851, 4)
(5851, 5)
(5972, 1)
(5972, 2)
(5972, 3)
(5972, 4)
(5972, 5)
