In [1]:
import re
import numpy as np
import pandas as pd
import glob

In [2]:
path = "/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/" 

  
# csv files in the path
files = glob.glob(path + "/*.csv")
  
# defining an empty list to store 
# content
data_frame = pd.DataFrame()
content = []
  
# checking all the csv files in the 
# specified path
for filename in files:
    
    # reading content of csv file
    # content.append(filename)
    df = pd.read_csv(filename)
    content.append(df)
    print(filename)
  
# converting content to data frame
data_frame = pd.concat(content)


/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/0479.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/1754.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/2258.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/2361.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/2693.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/3246.csv
/Volumes/SSD/00_Data_Processing/Pre_processed/02_Individuals_Colliders/3310.csv


In [3]:
data_frame.columns

Index(['Unnamed: 0', 'index', 'SubjectID', 'Session', 'SessionSubsection',
       'timeStampDataPointStart', 'timeStampDataPointEnd',
       'timeStampGetVerboseData', 'combinedGazeValidityBitmask',
       'rayCastHitsCombinedEyes', 'eyePositionCombinedWorld.x',
       'eyePositionCombinedWorld.y', 'eyePositionCombinedWorld.z',
       'eyeDirectionCombinedWorld.y', 'eyeDirectionCombinedWorld.z',
       'eyeDirectionCombinedLocal.x', 'eyeDirectionCombinedLocal.y',
       'eyeDirectionCombinedLocal.z', 'hmdPosition.x', 'hmdPosition.y',
       'hmdPosition.z', 'hmdDirectionForward.x', 'hmdDirectionForward.y',
       'hmdDirectionForward.z', 'hmdRotation.x', 'hmdRotation.y',
       'hmdRotation.z', 'hmdDirectionUp.x', 'hmdDirectionUp.y',
       'hmdDirectionUp.z', 'playerBodyPosition.x', 'playerBodyPosition.y',
       'playerBodyPosition.z', 'bodyTrackerPosition.x',
       'bodyTrackerPosition.y', 'bodyTrackerPosition.z',
       'bodyTrackerRotation.x', 'bodyTrackerRotation.y',
       'bod

In [4]:
data_frame.drop(['Unnamed: 0', 'rayCastHitsCombinedEyes', 'timeStampGetVerboseData', 'hitObjectColliderBoundsCenter'],axis=1, inplace=True)

In [5]:
#Since we have two colliders hits per frame, we calculate the distance between each hit and the participant
data_frame['Eucledian_distance'] = np.linalg.norm(data_frame.loc[:, ["hitPointOnObject_x","hitPointOnObject_y","hitPointOnObject_z"]].values - data_frame.loc[:, ["playerBodyPosition.x","playerBodyPosition.y","playerBodyPosition.z"]], axis=1)

In [6]:
# Since the collider names are too detailed, here we create a dictionary with patterns to classify them into our categories of interest
# INCLUDE CHURCH
patterns = {'\d{2}_Sa':'Passive_Agent', '\d{2}_Cma':'Active_Agent', 'Building_\d+': 'Building'}
patterns.update(dict.fromkeys(['Castle-TaskBuilding_56','HighSilo-TaskBuilding_49', 'Windmill-TaskBuilding_10_1', 'Church_TaskBuilding_16'], 'Global_Landmark'))
patterns.update(dict.fromkeys(['TaskBuilding_2','TaskBuilding_3', 'TaskBuilding_5', 'TaskBuilding_8', 'TaskBuilding_9', 'TaskBuilding_11', 'TaskBuilding_13', 'TaskBuilding_14', 'TaskBuilding_20', 'TaskBuilding_21', 'TaskBuilding_23','TaskBuilding_27', 'TaskBuilding_29', 'TaskBuilding_32', 'TaskBuilding_34',  'TaskBuilding_38', 'TaskBuilding_41', 'TaskBuilding_42', 'TaskBuilding_44', 'TaskBuilding_45', 'TaskBuilding_47', 'TaskBuilding_50', 'TaskBuilding_51', 'TaskBuilding_52', 'BasketballCourt_58', 'Construction_57', 'Graffity_02', 'Graffity_03', 'Graffity_05', 'Graffity_08', 'Graffity_09', 'Graffity_11', 'Graffity_13', 'Graffity_14', 'Graffity_20', 'Graffity_21', 'Graffity_23', 'Graffity_27', 'Graffity_29', 'Graffity_32', 'Graffity_34', 'Graffity_38', 'Graffity_41', 'Graffity_42', 'Graffity_44', 'Graffity_45', 'Graffity_47',  'Graffity_50', 'Graffity_51', 'Graffity_52'], 'TaskBuilding_Public'))
patterns.update(dict.fromkeys(['TaskBuilding_1','TaskBuilding_4', 'TaskBuilding_6', 'TaskBuilding_7', 'TaskBuilding_12', 'TaskBuilding_15', 'TaskBuilding_17', 'TaskBuilding_18', 'TaskBuilding_19', 'TaskBuilding_22', 'TaskBuilding_24','TaskBuilding_25', 'TaskBuilding_26', 'TaskBuilding_28', 'TaskBuilding_30',  'TaskBuilding_31', 'TaskBuilding_33', 'TaskBuilding_35', 'TaskBuilding_36', 'TaskBuilding_37', 'TaskBuilding_39', 'TaskBuilding_40', 'TaskBuilding_43', 'TaskBuilding_48', 'TaskBuilding_54', 'Graffity_55' ], 'TaskBuilding_Residential'))
default_val = 'Background'

In [7]:
# Here we look for the patterns contained in the dictionary and create the more general/informative variable Collider_Categorical
data_frame['Collider_Categorical'] = data_frame['hitObjectColliderName'].apply(lambda x: next((val for key, val in patterns.items() if re.match(key, x)), default_val))

In [8]:
# Since the double hits where unfolded in a long format in case of more than one hit per frame the second hit would be the row directly underneath.
# Therefore, if we want to know which hit was closest to the participant per frame we can compare each row with the previous one and see which one has the smallest Euclidean distance.
data_frame['Previous_Euclidean_value'] = data_frame['Eucledian_distance'].shift(1)
#Here we declare the conditions to choose between collider hits:
data_frame['Collider_stays'] = (data_frame["ordinalOfHit"] == 2) & (data_frame['Collider_Categorical'] != 'Background') & (data_frame['Eucledian_distance'] <  data_frame['Previous_Euclidean_value']) 

In [9]:
original_shape = data_frame.shape

In [10]:
data_frame.reset_index(inplace=True)
#Drop all the second hit colliders that do not comply with the criteria
indexCollider = data_frame[(data_frame["ordinalOfHit"] == 2) & (data_frame['Collider_stays'] == False)].index

In [11]:
depleted_data = data_frame.drop(index=indexCollider)

In [12]:
depleted_data.reset_index(inplace=True, drop=True)
#Take the index of all second colliders that will stay
indexColliderStays = depleted_data[depleted_data['Collider_stays'] == True].index
#We subtract one from that list of indexes because now is the first collider that has to go (so row directly on top)
indexColliderDelete = indexColliderStays - 1
depleted_data_1 = depleted_data.drop(indexColliderDelete)

In [None]:
depleted_data.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/clean_seven_participants.csv")

In [None]:
Make_Error

In [None]:
clean_data = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/clean_seven_participants.csv")
The_perfect_Set = clean_data[(clean_data['SubjectID'] == 1754) | (clean_data['SubjectID'] == 2258) | (clean_data['SubjectID'] == 2693)]

In [None]:
clean_data_1 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/clean_seven_participants_1.csv")
The_perfect_Set_1 = clean_data_1[(depleted_data['SubjectID'] == 4176) | (clean_data_1['SubjectID'] == 4796) | (clean_data_1['SubjectID'] == 4917) | (clean_data_1['SubjectID'] == 5238)]


In [None]:
clean_data_2 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/clean_seven_participants_2.csv")
The_perfect_Set_2 = clean_data_2[(clean_data_1['SubjectID'] == 6642) | (clean_data_2['SubjectID'] == 7412) | (clean_data_2['SubjectID'] == 7842)| (clean_data_2['SubjectID'] == 8007)]

In [None]:
clean_data_3 = pd.read_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/clean_seven_participants_3.csv")
The_perfect_Set_3 = clean_data_3[(clean_data_3['SubjectID'] == 8469) | (clean_data_3['SubjectID'] == 8673) | (clean_data_3['SubjectID'] == 9472)| (clean_data_3['SubjectID'] == 9601)]

In [None]:
The_perfect_Set.drop(["Unnamed: 0", "level_0"], axis=1, inplace=True)

# Visual inspection of data


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(rc={"figure.figsize":(12, 8)})
sns.set(font_scale = 1.3)

In [None]:
#create figure and axes objects of a predefined size
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(18,4), sharex=True)
sns.countplot(data=clean_data, x="SubjectID",  hue="Session", ax=axes[0])
sns.countplot(data=clean_data_1, x="SubjectID",  hue="Session", ax=axes[1])
sns.countplot(data=clean_data_2, x="SubjectID",  hue="Session", ax=axes[2])
sns.countplot(data=clean_data_2, x="SubjectID",  hue="Session", ax=axes[3])
plt.xticks(rotation = -35);
#90*60*30 = 162000 I have around 100 frames per second but this is not stable (some missing data for some participants)

In [None]:
The_perfect_Set.head()

In [None]:
The_perfect_Set_2.head()
The_perfect_Set_2.drop(["Unnamed: 0", "level_0"], axis=1, inplace=True)

In [None]:
Concat= pd.concat([The_perfect_Set, The_perfect_Set_1])

In [None]:
Concat= pd.concat(Concat, The_perfect_Set_2)

In [None]:
The_perfect_Set.drop(["level_0"], axis=1, inplace=True)

In [None]:
The_perfect_Set_1.shape

In [None]:
The_perfect_Set_1.head()

In [None]:
Complete_14.to_csv("/Volumes/SSD/00_Data_Processing/Pre_processed/complete_sevenParticipants.csv")

In [None]:
The_perfect_Set.shape

In [None]:
sns.pointplot(data=Medianss, x='ID', y='norm').set(ylabel='Normalized difference between average absolut error (non meaningful - meaningful)')

In [None]:
cosa = clean_data[(clean_data['SubjectID'] == 1754) & (clean_data['Session'] == 1) & (clean_data['SessionSubsection'] == 1) | (clean_data['SubjectID'] == 1754) & (clean_data['Session'] == 1) & (clean_data['SessionSubsection'] == 3)]

In [None]:
sns.relplot(
    data=cosa, kind="point",
    x="hitPointOnObject_x", y="hitPointOnObject_z",
    hue="Collider_Categorical",
    facet_kws=dict(sharex=False),
)

In [None]:
cosa.head()

In [None]:
cosa_ = clean_data[(clean_data['SubjectID'] == 1154) & (clean_data['Session'] == 1) & (clean_data['SessionSubsection'] == 1) | (clean_data['SubjectID'] == 1154) & (clean_data['Session'] == 1) & (clean_data['SessionSubsection'] == 3)]

In [None]:
value = cosa["timeStampDataPointEnd"].tolist()[-1] - cosa["timeStampDataPointStart"].tolist()[0]

In [None]:
query = clean_data.query('SubjectID == 1154')

In [None]:
clean_data.Eucledian_distance.describe()

In [None]:
fifty = clean_data.iloc[50].copy

In [None]:
#Idea for later
#indexes_to_keep = set(range(data_frame.shape[0])) - set(indexCollider)
#df_sliced = data_frame.take(list(indexes_to_keep))

In [None]:
fifty.head()

In [None]:
little = clean_data.head(50)

In [None]:
little['delta'] = little['timeStampDataPointEnd'] - little['timeStampDataPointStart']

In [None]:
little['delta']