In [1]:
import os
import re
import numpy as np
import pandas as pd
import glob

In [2]:
# Since the collider names are too detailed, here we create a dictionary with patterns to classify them into our categories of interest
patterns = {'\d{2}_Sa':'Passive_Agent', '\d{2}_Cma':'Active_Agent', 'Building_\d+': 'Building'}
patterns.update(dict.fromkeys(['Castle-TaskBuilding_56', 'Crane_59','HighSilo-TaskBuilding_49', 'Windmill-TaskBuilding_10_1', 'Church-TaskBuilding_16'], 'Global_Landmark'))
patterns.update(dict.fromkeys(['TaskBuilding_2','TaskBuilding_3', 'TaskBuilding_5', 'TaskBuilding_8', 'TaskBuilding_9', 'TaskBuilding_11', 'TaskBuilding_13', 'TaskBuilding_14', 'TaskBuilding_20', 'TaskBuilding_21', 'TaskBuilding_23','TaskBuilding_27', 'TaskBuilding_29', 'TaskBuilding_32', 'TaskBuilding_34',  'TaskBuilding_38', 'TaskBuilding_41', 'TaskBuilding_42', 'TaskBuilding_44', 'TaskBuilding_45', 'TaskBuilding_47', 'TaskBuilding_50', 'TaskBuilding_51', 'TaskBuilding_52', 'BasketballCourt_58', 'Construction_57', 'Graffity_02', 'Graffity_03', 'Graffity_05', 'Graffity_08', 'Graffity_09', 'Graffity_11', 'Graffity_13', 'Graffity_14', 'Graffity_20', 'Graffity_21', 'Graffity_23', 'Graffity_27', 'Graffity_29', 'Graffity_32', 'Graffity_34', 'Graffity_38', 'Graffity_41', 'Graffity_42', 'Graffity_44', 'Graffity_45', 'Graffity_47',  'Graffity_50', 'Graffity_51', 'Graffity_52'], 'TaskBuilding_Public'))
patterns.update(dict.fromkeys(['TaskBuilding_1','TaskBuilding_4', 'TaskBuilding_6', 'TaskBuilding_7', 'TaskBuilding_12', 'TaskBuilding_15', 'TaskBuilding_17', 'TaskBuilding_18', 'TaskBuilding_19', 'TaskBuilding_22', 'TaskBuilding_24','TaskBuilding_25', 'TaskBuilding_26', 'TaskBuilding_28', 'TaskBuilding_30',  'TaskBuilding_31', 'TaskBuilding_33', 'TaskBuilding_35', 'TaskBuilding_36', 'TaskBuilding_37', 'TaskBuilding_39', 'TaskBuilding_40', 'TaskBuilding_43', 'TaskBuilding_48', 'TaskBuilding_54','TaskBuilding_55','Graffity_01','Graffity_04', 'Graffity_06', 'Graffity_07', 'Graffity_12', 'Graffity_15', 'Graffity_17', 'Graffity_18', 'Graffity_19', 'Graffity_22', 'Graffity_24','Graffity_25', 'Graffity_26', 'Graffity_28', 'Graffity_30',  'Graffity_31', 'Graffity_33', 'Graffity_35', 'Graffity_36', 'Graffity_37', 'Graffity_39', 'Graffity_40', 'Graffity_43', 'Graffity_48', 'Graffity_54', 'Graffity_55' ], 'TaskBuilding_Residential'))
default_val = 'Background'

In [3]:
path = "/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders" 

  
# csv files in the path
files = glob.glob(path + "/*.csv")
  
# defining an empty list to store 
# content
data_frame = pd.DataFrame()
content = []
  
# checking all the csv files in the 
# specified path
for filename in files:
    
    # reading content of csv file
    # content.append(filename)
    df = pd.read_csv(filename)
    df.drop(['Unnamed: 0', 'rayCastHitsCombinedEyes', 'timeStampGetVerboseData', 'hitObjectColliderBoundsCenter', 'timeStampDataPointStart', 
             'bodyTrackerPosition.x', 'bodyTrackerPosition.y', 'bodyTrackerPosition.z', 'hmdPosition.x', 'hmdPosition.y',
             'hmdPosition.z', 'hmdDirectionForward.x', 'hmdDirectionForward.y',
             'hmdDirectionForward.z', 'hmdRotation.x', 'hmdRotation.y',
             'hmdRotation.z', 'hmdDirectionUp.x', 'hmdDirectionUp.y',
             'hmdDirectionUp.z', 'bodyTrackerRotation.x', 'bodyTrackerRotation.y','bodyTrackerRotation.z',],axis=1, inplace=True)
    #Since we have two colliders hits per frame, we calculate the distance between each hit and the participant
    df['Eucledian_distance'] = np.linalg.norm(df.loc[:, ["hitPointOnObject_x","hitPointOnObject_y","hitPointOnObject_z"]].values - df.loc[:, ["playerBodyPosition.x","playerBodyPosition.y","playerBodyPosition.z"]], axis=1)
    # Here we look for the patterns contained in the dictionary and create the more general/informative variable Collider_Categorical
    df['Collider_Categorical'] =  df['hitObjectColliderName'].apply(lambda x: next((val for key, val in patterns.items() if re.match(key, x)), default_val))
    df['Previous_Euclidean_value'] = df['Eucledian_distance'].shift(1)
    #Here we declare the conditions to choose between collider hits:
    df['Collider_stays'] = (df["ordinalOfHit"] == 2) & (df['Collider_Categorical'] != 'Background') & (df['Eucledian_distance'] <  df['Previous_Euclidean_value'])
    df.reset_index(inplace=True)
    #Drop all the second hit colliders that do not comply with the criteria
    indexCollider = df[(df["ordinalOfHit"] == 2) & (df['Collider_stays'] == False)].index
    depleted_data = df.drop(index=indexCollider)
    depleted_data.reset_index(inplace=True, drop=True)
    #Take the index of all second colliders that will stay
    indexColliderStays = depleted_data[depleted_data['Collider_stays'] == True].index
    #We subtract one from that list of indexes because now is the first collider that has to go (so row directly on top)
    indexColliderDelete = indexColliderStays - 1
    depleted_data_1 = depleted_data.drop(indexColliderDelete)
    #Create variable that differenciates body from face hits on agents
    depleted_data_1["Face_Hits"] = "Not_Agent"
    mask_Face = ((depleted_data_1["Collider_Categorical"].str.contains(pat="_Agent")) & (depleted_data_1["hitColliderType"] == "UnityEngine.SphereCollider"))
    mask_Body = ((depleted_data_1["Collider_Categorical"].str.contains(pat="_Agent")) & (depleted_data_1["hitColliderType"] == "UnityEngine.BoxCollider"))
    depleted_data_1.loc[mask_Face, 'Face_Hits'] = "Face"
    depleted_data_1.loc[mask_Body, 'Face_Hits'] = "Body"
    content.append(depleted_data_1)
    print(filename)
  
# converting content to data frame
data_frame = pd.concat(content)


/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/3246.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/0365.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/0479.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/1754.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/2258.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/2361.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/2693.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/3310.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/3572.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/3976.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/4176.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/4597.csv
/Volumes/SSD/00_Data_Processing/Pre_proc

In [4]:
#df = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/01_Indivuduals_FlatS/0479.csv")

In [5]:
data_frame.columns

Index(['level_0', 'index', 'SubjectID', 'Session', 'SessionSubsection',
       'timeStampDataPointEnd', 'combinedGazeValidityBitmask',
       'eyePositionCombinedWorld.x', 'eyePositionCombinedWorld.y',
       'eyePositionCombinedWorld.z', 'eyeDirectionCombinedWorld.y',
       'eyeDirectionCombinedWorld.z', 'eyeDirectionCombinedLocal.x',
       'eyeDirectionCombinedLocal.y', 'eyeDirectionCombinedLocal.z',
       'playerBodyPosition.x', 'playerBodyPosition.y', 'playerBodyPosition.z',
       'hitColliderType', 'hitObjectColliderName', 'ordinalOfHit',
       'hitPointOnObject_x', 'hitPointOnObject_y', 'hitPointOnObject_z',
       'Eucledian_distance', 'Collider_Categorical',
       'Previous_Euclidean_value', 'Collider_stays', 'Face_Hits'],
      dtype='object')

In [6]:
data_frame.drop(['level_0', 'index','Collider_stays', 'Previous_Euclidean_value'],axis=1, inplace=True)

In [7]:
data_frame.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/Complete_data_Categorical_L.csv")

In [8]:
Trash

NameError: name 'Trash' is not defined

## Dealing with time 
- Create continues time 
- Delete duplicate time stamps

In [None]:
#data_frame =  pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/Complete_data_Categorical.csv")

In [None]:
# Group by Subject and Session because the continuous time only makes sence on a session scale
all_participants_sessions = pd.DataFrame(data_frame.groupby(["SubjectID", "Session"])["hitColliderType"].count())
all_participants_sessions.reset_index(inplace=True)
# Create tuples to later subset data to subject-session scale. We create this because subjects can have a different number of sessions, in this way we only subset for the existing pairs S-S.
L_all_participants_sessions = list(zip(all_participants_sessions.SubjectID, all_participants_sessions.Session))

In [None]:
len(L_all_participants_sessions)

In [None]:
temporalss = []
for tuples in L_all_participants_sessions:
    subject, session = tuples
    temporal = data_frame[(data_frame['SubjectID'] == subject) & (data_frame['Session'] == session)]
    temporal_c = temporal.sort_values(by=['timeStampDataPointEnd'])
    temporal_cr = temporal_c.reset_index(drop=True)
    temporal_cr["Time_Shift"] = temporal_cr.timeStampDataPointEnd.diff()
    temporal_c_ND =  temporal_cr[(temporal_cr["Time_Shift"] > 0.001) & (temporal_cr["Time_Shift"] < 1)].copy()
    number = temporal_c_ND.columns.get_loc('timeStampDataPointEnd')
    first_time= temporal_c_ND.iloc[0, number]
    temporal_c_ND["Continuous_Time"] = np.round(((temporal_c_ND.iloc[:, number] - first_time)/60), 3)
    # Delete duplicate time stamps
    temporal_c_ND.to_csv(f"/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/{subject}_{session}.csv", index=True)
    temporalss.append(temporal_c_ND)
    print(tuples)

In [None]:
Four = Temporals[Temporals.Session == 4].copy()

## Change files that were damaged during recordings

In [None]:
os.remove("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/0479_4.csv")
a2361_1 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/2361_1.csv")
a2361_1_1 = a2361_1[a2361_1.SessionSubsection == 1].copy()
a2361_1_1.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/2361_1.csv")
a2361_2 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/2361_2.csv")
a2361_2 = a2361_2[a2361_2.SessionSubsection != 1].copy()
a2361_2.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/2361_2.csv")

In [None]:
a3246_4 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/3246_4.csv")
a3246_4 = a3246_4[a3246_4.SessionSubsection != 1].copy()
a3246_4.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/3246_4.csv")
a3246_5 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/3246_5.csv")
a3246_5 = a3246_4[a3246_5.SessionSubsection != 2].copy()
a3246_5.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/3246_5.csv")

In [None]:
a3310_4 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/3310_4.csv")
a3310_4 = a3310_4[(a3310_4.SessionSubsection != 2)&(a3310_4.SessionSubsection != 3)].copy()
a3310_4.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/3310_4.csv")

In [None]:
a3976_1 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/3976_1.csv")
a3976_1 = a3976_1[(a3976_1.SessionSubsection != 1)&(a3976_1.SessionSubsection != 4)].copy()
a3976_1.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/3976_1.csv")
a3976_3 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/3976_3.csv")
a3976_3 = a3976_3[(a3976_3.SessionSubsection != 1)&(a3976_3.SessionSubsection != 4)].copy()
a3976_3.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/3976_3.csv")

In [None]:
a5741_4 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/5741_4.csv")
a5741_4 = a5741_4[(a5741_4.SessionSubsection != 1)&(a5741_4.SessionSubsection != 2)].copy()
a5741_4.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/5741_4.csv")
a5741_5 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/5741_5.csv")
a5741_5 = a5741_5[(a5741_5.SessionSubsection != 4)&(a5741_5.SessionSubsection != 4)].copy()
a5741_5.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/5741_5.csv")

In [None]:
a5531_1 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/5531_1.csv")
a5531_1 = a5531_1[(a5531_1.SessionSubsection != 3)].copy()
a5531_1.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/5531_1.csv")
os.remove("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/5531_2.csv")

In [None]:
a7264_1 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/7264_1.csv")
a7264_1 = a7264_1[(a7264_1.SessionSubsection != 3)&(a7264_1.SessionSubsection != 4)].copy()
a7264_1.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/7264_1.csv")

In [None]:
a8469_1 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/8469_1.csv")
a8469_1 = a8469_1[(a8469_1.SessionSubsection != 2)&(a8469_1.SessionSubsection != 3)].copy()
a8469_1.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/8469_1.csv")

In [None]:
a9601_5 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/9601_5.csv")
a9601_5 = a9601_5[(a9601_5.SessionSubsection != 4)].copy()
a9601_5.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/9601_5.csv")

In [None]:
Four.head(20000)

In [None]:
List = list(Four[Four.Time_Shift > 1].index)

In [None]:
List_P1 = list([element - 1 for element in List])

In [None]:
List

In [None]:
Four.iloc[764060:764090]

In [None]:
len(Four)

In [None]:
temporal_c.columns

In [None]:
data_frame.tail(50)

In [None]:
Temporals = pd.concat(temporals)

In [None]:
data_frame["CumSum"] = pd.cumsum

In [None]:
data_frame.Continuous_Time.describe()

In [None]:
import seaborn as sns 
sns.catplot(data=Temporals, col="SubjectID", y="timeStampDataPointEnd", x="Session")

In [None]:
temporal_c.iloc[0, number]

In [None]:
temporal_c.columns.get_loc('timeStampDataPointEnd')

In [None]:
data_frames_list = pd.concat(temporals, axis=0)

In [None]:
data_frames_list.tail()

In [None]:
data_frames_list.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/Data_Complete_ND.csv")

In [None]:
a = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/03_Individuals_IndividualSessions/479_5.csv")

In [None]:
a.combinedGazeValidityBitmask.value_counts(normalize=True)

# If we want to keep only the perfect cases run this 

In [None]:
Trash it 

In [None]:
clean_data = pd.read_csv("Volumes/SSD/00_Data_Processing/Pre_processed/clean_seven_participants.csv")
The_perfect_Set = clean_data[(clean_data['SubjectID'] == 1754) | (clean_data['SubjectID'] == 2258) | (clean_data['SubjectID'] == 2693)]

In [None]:
clean_data_1 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/clean_seven_participants_1.csv")
The_perfect_Set_1 = clean_data_1[(depleted_data['SubjectID'] == 4176) | (clean_data_1['SubjectID'] == 4796) | (clean_data_1['SubjectID'] == 4917) | (clean_data_1['SubjectID'] == 5238)]

In [None]:
clean_data_2 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/clean_seven_participants_2.csv")
The_perfect_Set_2 = clean_data_2[(clean_data_1['SubjectID'] == 6642) | (clean_data_2['SubjectID'] == 7412) | (clean_data_2['SubjectID'] == 7842)| (clean_data_2['SubjectID'] == 8007)]

In [None]:
clean_data_3 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/clean_seven_participants_3.csv")
The_perfect_Set_3 = clean_data_3[(clean_data_3['SubjectID'] == 8469) | (clean_data_3['SubjectID'] == 8673) | (clean_data_3['SubjectID'] == 9472)| (clean_data_3['SubjectID'] == 9601)]