In [125]:
### Load JSON data and prepare it for analysis ###

In [13]:
import json
import os
import ast
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

pd.set_option('display.precision', 9)
pd.set_option('display.width', 1000)  # Adjust to ensure there's enough room for all data
pd.set_option('display.max_columns', None)  # Show all columns

json_normalise = True

In [14]:
## Load JSON file

In [15]:
data_folder = '/home/tom/Documents/SWC/data'
json_filename = '240913_Yansu_Jerry/2024-09-13_11-23-37_YansuFirstSolo.json'
filepath = data_folder + os.sep + json_filename

with open(filepath) as f:
    
    file = json.load(f)
    print(file[0])

    ## Store timings as datetime objects
    # split time string
    start_time_utc_str = file[0]['timeLocal']
    start_time_utc_split = start_time_utc_str.split(':')
    hours = int(start_time_utc_split[0])
    minutes = int(start_time_utc_split[1])
    seconds = int(start_time_utc_split[2])
    microseconds = int(start_time_utc_split[3])*1000

    # convert to datetime object
    base_time = datetime(1970,1,1)
    start_time_utc = base_time + timedelta(hours=hours, minutes=minutes, seconds=seconds, microseconds=microseconds)

    # convert back to string of correct precision (10e-3)
    print(start_time_utc.strftime('%H:%M:%S.%f')[:-3])

    # repeat for application time
    # but here use just a time delta as this is not in reference to any date
    start_time_app_str = file[0]['timeApplication']
    start_time_app_split = start_time_app_str.split('.')
    seconds = int(start_time_app_split[0])
    microseconds = int(start_time_app_split[1])*1000

    start_time_app = timedelta(seconds=seconds + microseconds/1000000)
    print(str(start_time_app)[:-3])

    print(file[0].keys())


{'timeLocal': '11:23:37:306', 'timeApplication': '3018.317', 'eventDescription': 'logging start'}
11:23:37.306
0:50:18.317
dict_keys(['timeLocal', 'timeApplication', 'eventDescription'])


In [16]:
## Parse data into pandas dataframe

In [17]:
## Load dataframe with collapsed data dictionary
# Note json_normalize requires the json file, whereas read_json requires the filepath
if json_normalise == True:
    df = pd.json_normalize(file)
else:
    with open(filepath) as f:
        df = pd.read_json(f)
        print(type(df))

df.head()


Unnamed: 0,timeLocal,timeApplication,eventDescription,data.playerPosition.0.clientId,data.playerPosition.0.location.x,data.playerPosition.0.location.y,data.playerPosition.0.location.z,data.playerPosition.0.rotation.x,data.playerPosition.0.rotation.y,data.playerPosition.0.rotation.z,data.trialNum,data.trialType,data.playerInfo.0.clientId,data.playerInfo.0.location.x,data.playerInfo.0.location.y,data.playerInfo.0.location.z,data.playerInfo.0.rotation.x,data.playerInfo.0.rotation.y,data.playerInfo.0.rotation.z,data.wall1,data.wall2,data.wallTriggered,data.triggerClient,data.playerScores.0
0,11:23:37:306,3018.317,logging start,,,,,,,,,,,,,,,,,,,,,
1,11:23:37:333,3018.343,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,
2,11:23:37:350,3018.361,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,
3,11:23:37:371,3018.382,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,
4,11:23:37:392,3018.403,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,


In [18]:
##  Convert time columns into datetime format
df['timeLocal'] = pd.to_datetime(df['timeLocal'], format='%H:%M:%S:%f')

# Use to_timedelta instead as a vectorised function (lambdas are python loops)
# df['timeApplication'] = df['timeApplication'].apply(lambda x: timedelta(seconds=int(x) + (x - int(x))))
df['timeApplication'] = pd.to_numeric(df['timeApplication']) 
df['timeApplication'] = pd.to_timedelta(df['timeApplication'], unit='s')

In [19]:
# Should probably avoid changing dataframe inplace? 

In [20]:
### Pre-process data

In [21]:
## Take time in reference to start time
def reference_application_time(df):
    df2 = df.copy()
    start_time = df['timeApplication'].iloc[0]
    df2['timeReferenced'] = df['timeApplication'] - start_time

    return df2


In [25]:
df = reference_application_time(df)

In [82]:
# Fill nans in trialNum with the correct trial number (starting at 0 for pre-trial data)
# This is needed because trialNum is only recorded at the single timepoint that trialNum changes
def fill_trial_zero(df):
    df2 = df.copy()
    df2.loc[0, 'data.trialNum'] = 0 # Manually change first entry to 0 and fill forward
                                     # This means nans after trial 1 will not be set to 0
    df2['data.trialNum'] = df2['data.trialNum'].ffill()

    return df2

In [83]:
def fill_player_scores_solo(df):
    df2 = df.copy()
    df2.loc[0, 'data.playerScores.0'] = 0 
    df2['data.playerScores.0'] = df2['data.playerScores.0'].ffill()

    return df2

In [84]:
def fill_trial_type(df):
    df2 = df.copy()
    df2.loc[0, 'data.trialType'] = 'pre-trials'
    df2['data.trialType'] = df2['data.trialType'].ffill()

    return df2

In [90]:
def fill_post_final_trial(df):
    df2 = df.copy()
    final_trial_end_idx = df2[df2['eventDescription'] == 'trial end'].index[-1] if not df[df['eventDescription'] == 'trial end'].empty else None
    final_trial_start_idx = df2[df2['eventDescription'] == 'trial start'].index[-1] if not df[df['eventDescription'] == 'trial start'].empty else None

    if final_trial_end_idx < final_trial_start_idx:
        df2.loc[final_trial_end_idx + 1:, 'data.trialType'] = 'post-trials'
    
    return df2

In [None]:
def fill_client_id_solo(df):
    df2 = df.copy()
    
    

In [91]:
df = fill_trial_zero(df)

In [92]:
df = fill_player_scores_solo(df)

In [93]:
df = fill_trial_type(df)

In [94]:
df = fill_post_final_trial(df)

In [102]:
df.head()

Unnamed: 0,timeLocal,timeApplication,eventDescription,data.playerPosition.0.clientId,data.playerPosition.0.location.x,data.playerPosition.0.location.y,data.playerPosition.0.location.z,data.playerPosition.0.rotation.x,data.playerPosition.0.rotation.y,data.playerPosition.0.rotation.z,data.trialNum,data.trialType,data.playerInfo.0.clientId,data.playerInfo.0.location.x,data.playerInfo.0.location.y,data.playerInfo.0.location.z,data.playerInfo.0.rotation.x,data.playerInfo.0.rotation.y,data.playerInfo.0.rotation.z,data.wall1,data.wall2,data.wallTriggered,data.triggerClient,data.playerScores.0,timeReferenced
0,1900-01-01 11:23:37.306,0 days 00:50:18.317000,logging start,,,,,,,,0.0,pre-trials,,,,,,,,,,,,0.0,0 days 00:00:00
1,1900-01-01 11:23:37.333,0 days 00:50:18.343000,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,0.0,pre-trial,,,,,,,,,,,,0.0,0 days 00:00:00.026000
2,1900-01-01 11:23:37.350,0 days 00:50:18.361000,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,0.0,pre-trial,,,,,,,,,,,,0.0,0 days 00:00:00.044000
3,1900-01-01 11:23:37.371,0 days 00:50:18.382000,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,0.0,pre-trial,,,,,,,,,,,,0.0,0 days 00:00:00.065000
4,1900-01-01 11:23:37.392,0 days 00:50:18.403000,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,0.0,pre-trial,,,,,,,,,,,,0.0,0 days 00:00:00.086000
