In [1]:
# Added cell to set Working Directory to your location
import os
import ast
import glob
import numpy as np
import pandas as pd
import json
import time

In [2]:
os.chdir("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Exploration_short/")

In [3]:
# Columns to keep from the raw data.
Keep = ["SubjectID", "Session", "SessionSubsection",'timeStampDataPointStart', 'timeStampDataPointEnd', 'timeStampGetVerboseData','combinedGazeValidityBitmask','rayCastHitsCombinedEyes','eyePositionCombinedWorld.x', 'eyePositionCombinedWorld.y', 'eyePositionCombinedWorld.z','eyeDirectionCombinedWorld.y', 
 'eyeDirectionCombinedWorld.z', 'eyeDirectionCombinedLocal.x', 'eyeDirectionCombinedLocal.y', 'eyeDirectionCombinedLocal.z','hmdPosition.x', 'hmdPosition.y', 'hmdPosition.z', 'hmdDirectionForward.x', 'hmdDirectionForward.y', 'hmdDirectionForward.z', 
 'hmdRotation.x', 'hmdRotation.y', 'hmdRotation.z', 'hmdDirectionUp.x', 'hmdDirectionUp.y', 'hmdDirectionUp.z','playerBodyPosition.x', 'playerBodyPosition.y', 'playerBodyPosition.z', 'bodyTrackerPosition.x', 'bodyTrackerPosition.y', 
 'bodyTrackerPosition.z', 'bodyTrackerRotation.x', 'bodyTrackerRotation.y', 'bodyTrackerRotation.z']

In [4]:
data_path = os.getcwd()

# Getting the Folder without hidden files in ascending order
DATA_FOLDER = sorted([f for f in os.listdir(data_path) if not f.startswith('.')], key=str.lower)
subIDs = []
for sub in DATA_FOLDER:
    if sub[0:4].isdigit():
        subIDs.append(int(sub[0:4]))
    else:
        pass
subIDs = np.unique(subIDs)
#Sincesome participant IDs start with 0, we format them to show it in the string type
IDstrings = ['{:04d}'.format(id) for id in subIDs]
print(IDstrings)

['0365', '0479', '1754', '2258', '2361', '2693', '3246', '3310', '3572', '3976', '4176', '4597', '4796', '4917', '5238', '5531', '5741', '6642', '7093', '7264', '7412', '7842', '8007', '8469', '8673', '8695', '9472', '9502', '9586', '9601']


In [5]:
#Create a generalized path for all json files per participant
paths = [ID +  "/*.json" for ID in IDstrings]
print(paths)
#Create a sorted list of the paths to open de jsons
Sorted_individual_jsons = sorted([filename for path in paths for filename in glob.glob(path)], key=str.lower)

['0365/*.json', '0479/*.json', '1754/*.json', '2258/*.json', '2361/*.json', '2693/*.json', '3246/*.json', '3310/*.json', '3572/*.json', '3976/*.json', '4176/*.json', '4597/*.json', '4796/*.json', '4917/*.json', '5238/*.json', '5531/*.json', '5741/*.json', '6642/*.json', '7093/*.json', '7264/*.json', '7412/*.json', '7842/*.json', '8007/*.json', '8469/*.json', '8673/*.json', '8695/*.json', '9472/*.json', '9502/*.json', '9586/*.json', '9601/*.json']


In [None]:
data_raw = pd.DataFrame()

# read every file name in folder
for path in paths:
    for filename in glob.glob(path):
        with open(filename, 'r') as file:
            try:
                # make json files parsable
                data = "[" + file.read()
                data = data[:len(data)] + "]"
                raw = json.loads(data)
            except:
                print("reading did not work")
                

            # Uneast the higher level of each file
            currentDF_raw = pd.json_normalize(raw[0]['trials'][0]['dataPoints'])
            print( "Subject " + str(filename[5:9]) + " Session " + str(filename[17:19]) +" Section " + str(filename[23:24]) + " has been normalized")
            #Reduce columns to just necessary information
            print('time is: ', time.ctime())

            # insert participant id and session information from the file name
            currentDF_raw.insert(0, "SubjectID", [int(filename[5:9])] * currentDF_raw.shape[0], True)
            currentDF_raw.insert(1, "Session", [int(filename[17:19])] * currentDF_raw.shape[0], True)
            currentDF_raw.insert(2, "SessionSubsection", [int(filename[23:24])] * currentDF_raw.shape[0], True)
            
            #Take out the unnecesary information
            Reduced= currentDF_raw[Keep]

            #Normalize the collider hits
            Raycast = Reduced['rayCastHitsCombinedEyes'].explode().apply(pd.Series)
            Combined = pd.concat([Reduced, Raycast], axis=1)
            print('Combined')

        data_raw = data_raw.append(Combined, ignore_index=True)
        print('Appended')
        currentDF_raw = pd.DataFrame()
    data_raw.sort_values(by=["SubjectID","Session","SessionSubsection"], inplace=True)
    data_raw.to_csv("/Volumes/TwoTeras/0_Experiment_1/Eye_Tracking/Pre_processed/01_Indivuduals_Flat_smaller/" + str(filename[5:9]) + ".csv")
    print('Saved')
    data_raw = pd.DataFrame()

Subject 0365 Session 01 Section 2 has been normalized
time is:  Thu May 11 16:52:16 2023
Combined
Appended
Subject 0365 Session 01 Section 3 has been normalized
time is:  Thu May 11 16:54:03 2023
Combined
Appended
Subject 0365 Session 02 Section 1 has been normalized
time is:  Thu May 11 16:56:03 2023
Combined
Appended
Subject 0365 Session 02 Section 2 has been normalized
time is:  Thu May 11 16:58:06 2023
Combined
Appended
Subject 0365 Session 02 Section 3 has been normalized
time is:  Thu May 11 17:00:05 2023
Combined
Appended
Subject 0365 Session 03 Section 1 has been normalized
time is:  Thu May 11 17:02:10 2023
Combined
Appended
Subject 0365 Session 03 Section 2 has been normalized
time is:  Thu May 11 17:04:14 2023
Combined
Appended
Subject 0365 Session 03 Section 3 has been normalized
time is:  Thu May 11 17:06:37 2023
Combined
Appended
Subject 0365 Session 04 Section 1 has been normalized
time is:  Thu May 11 17:08:43 2023
Combined
Appended
Subject 0365 Session 04 Section 2 has