In [1]:
# Added cell to set Working Directory to your location
import os
import ast
import glob
import numpy as np
import pandas as pd
import json
import time

In [2]:
os.chdir("/Volumes/TwoTeras/1_Experiment_2/Eye_Tracking/Exploration_short/")

In [3]:
# Columns to keep from the raw data.
Keep = ["SubjectID", "Session", "SessionSubsection",'timeStampDataPointStart', 'timeStampDataPointEnd', 'timeStampGetVerboseData','combinedGazeValidityBitmask','rayCastHitsCombinedEyes','eyePositionCombinedWorld.x', 'eyePositionCombinedWorld.y', 'eyePositionCombinedWorld.z','eyeDirectionCombinedWorld.y', 
 'eyeDirectionCombinedWorld.z', 'eyeDirectionCombinedLocal.x', 'eyeDirectionCombinedLocal.y', 'eyeDirectionCombinedLocal.z','hmdPosition.x', 'hmdPosition.y', 'hmdPosition.z', 'hmdDirectionForward.x', 'hmdDirectionForward.y', 'hmdDirectionForward.z', 
 'hmdRotation.x', 'hmdRotation.y', 'hmdRotation.z', 'hmdDirectionUp.x', 'hmdDirectionUp.y', 'hmdDirectionUp.z','playerBodyPosition.x', 'playerBodyPosition.y', 'playerBodyPosition.z', 'bodyTrackerPosition.x', 'bodyTrackerPosition.y', 
 'bodyTrackerPosition.z', 'bodyTrackerRotation.x', 'bodyTrackerRotation.y', 'bodyTrackerRotation.z']

In [4]:
data_path = os.getcwd()

# Getting the Folder without hidden files in ascending order
DATA_FOLDER = sorted([f for f in os.listdir(data_path) if not f.startswith('.')], key=str.lower)
subIDs = []
for sub in DATA_FOLDER:
    if sub[0:4].isdigit():
        subIDs.append(int(sub[0:4]))
    else:
        pass
subIDs = np.unique(subIDs)
#Sincesome participant IDs start with 0, we format them to show it in the string type
IDstrings = ['{:04d}'.format(id) for id in subIDs]
print(IDstrings)

['1031', '1142', '1234', '1268', '1574', '1843', '2069', '3193', '3540', '4580', '4598', '4847', '4875', '5161', '5189', '5191', '5743', '5766', '5851', '5972', '6254', '6266', '6406', '7081', '7393', '7823', '7935', '8629', '9297', '9535', '9627']


In [5]:
#Create a generalized path for all json files per participant
paths = [ID +  "/*.json" for ID in IDstrings]
print(paths)
#Create a sorted list of the paths to open de jsons
Sorted_individual_jsons = sorted([filename for path in paths for filename in glob.glob(path)], key=str.lower)

['1031/*.json', '1142/*.json', '1234/*.json', '1268/*.json', '1574/*.json', '1843/*.json', '2069/*.json', '3193/*.json', '3540/*.json', '4580/*.json', '4598/*.json', '4847/*.json', '4875/*.json', '5161/*.json', '5189/*.json', '5191/*.json', '5743/*.json', '5766/*.json', '5851/*.json', '5972/*.json', '6254/*.json', '6266/*.json', '6406/*.json', '7081/*.json', '7393/*.json', '7823/*.json', '7935/*.json', '8629/*.json', '9297/*.json', '9535/*.json', '9627/*.json']


In [None]:
data_raw = pd.DataFrame()

# read every file name in folder
for path in paths:
    for filename in glob.glob(path):
        with open(filename, 'r') as file:
            try:
                # make json files parsable
                data = "[" + file.read()
                data = data[:len(data)] + "]"
                raw = json.loads(data)
            except:
                print("reading did not work")
                

            # Uneast the higher level of each file
            currentDF_raw = pd.json_normalize(raw[0]['trials'][0]['dataPoints'])
            print( "Subject " + str(filename[5:9]) + " Session " + str(filename[17:19]) +" Section " + str(filename[23:24]) + " has been normalized")
            #Reduce columns to just necessary information
            print('time is: ', time.ctime())

            # insert participant id and session information from the file name
            currentDF_raw.insert(0, "SubjectID", [int(filename[5:9])] * currentDF_raw.shape[0], True)
            currentDF_raw.insert(1, "Session", [int(filename[17:19])] * currentDF_raw.shape[0], True)
            currentDF_raw.insert(2, "SessionSubsection", [int(filename[23:24])] * currentDF_raw.shape[0], True)
            
            #Take out the unnecesary information
            Reduced= currentDF_raw[Keep] # For PCA I'm keeping everything now, in old versions I had the reduced version 

            #Normalize the collider hits
            Raycast = currentDF_raw['rayCastHitsCombinedEyes'].explode().apply(pd.Series)
            Combined = pd.concat([currentDF_raw, Raycast], axis=1)
            print('Combined')

        data_raw = pd.concat([data_raw, Combined], ignore_index=True)
        print('Appended')
        currentDF_raw = pd.DataFrame()
    data_raw.sort_values(by=["SubjectID","Session","SessionSubsection"], inplace=True)
    data_raw.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/01_Indivuduals_Flat_smaller/" + str(filename[5:9]) + ".csv", index=False)
    print('Saved')
    data_raw = pd.DataFrame()

Subject 1031 Session 01 Section 1 has been normalized
time is:  Thu Jun 12 10:43:44 2025
Combined
Appended
Subject 1031 Session 01 Section 2 has been normalized
time is:  Thu Jun 12 10:43:55 2025
Combined
Appended
Subject 1031 Session 01 Section 3 has been normalized
time is:  Thu Jun 12 10:44:07 2025
Combined
Appended
Subject 1031 Session 02 Section 1 has been normalized
time is:  Thu Jun 12 10:44:21 2025
Combined
Appended
Subject 1031 Session 02 Section 2 has been normalized
time is:  Thu Jun 12 10:44:35 2025
Combined
Appended
Subject 1031 Session 02 Section 3 has been normalized
time is:  Thu Jun 12 10:44:48 2025
Combined
Appended
Subject 1031 Session 03 Section 1 has been normalized
time is:  Thu Jun 12 10:45:03 2025
Combined
Appended
Subject 1031 Session 03 Section 2 has been normalized
time is:  Thu Jun 12 10:45:19 2025
Combined
Appended
Subject 1031 Session 03 Section 3 has been normalized
time is:  Thu Jun 12 10:45:34 2025
Combined
Appended
Subject 1031 Session 04 Section 1 has