In [101]:
### Load JSON data and prepare it for analysis ###

In [102]:
import json
import os
import ast
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

pd.set_option('display.precision', 9)
pd.set_option('display.width', 1000)  # Adjust to ensure there's enough room for all data
pd.set_option('display.max_columns', None)  # Show all columns

json_normalise = True

In [103]:
## Load JSON file

In [104]:
# data_folder = '/home/tom/Documents/SWC/data' # desktop Duan Lab
# json_filename = '240913_Yansu_Jerry/2024-09-13_11-23-37_YansuFirstSolo.json' 
data_folder = r'D:\Users\Tom\OneDrive\PhD\SWC\data' # desktop home
json_filename = r'first_experiments_2409\240913\2024-09-13_11-23-37_YansuFirstSolo.json'
filepath = data_folder + os.sep + json_filename

with open(filepath) as f:
    
    file = json.load(f)
    print(file[0])

    ## Store timings as datetime objects
    # split time string
    start_time_utc_str = file[0]['timeLocal']
    start_time_utc_split = start_time_utc_str.split(':')
    hours = int(start_time_utc_split[0])
    minutes = int(start_time_utc_split[1])
    seconds = int(start_time_utc_split[2])
    microseconds = int(start_time_utc_split[3])*1000

    # convert to datetime object
    base_time = datetime(1970,1,1)
    start_time_utc = base_time + timedelta(hours=hours, minutes=minutes, seconds=seconds, microseconds=microseconds)

    # convert back to string of correct precision (10e-3)
    print(start_time_utc.strftime('%H:%M:%S.%f')[:-3])

    # repeat for application time
    # but here use just a time delta as this is not in reference to any date
    start_time_app_str = file[0]['timeApplication']
    start_time_app_split = start_time_app_str.split('.')
    seconds = int(start_time_app_split[0])
    microseconds = int(start_time_app_split[1])*1000

    start_time_app = timedelta(seconds=seconds + microseconds/1000000)
    print(str(start_time_app)[:-3])

    print(file[0].keys())


{'timeLocal': '11:23:37:306', 'timeApplication': '3018.317', 'eventDescription': 'logging start'}
11:23:37.306
0:50:18.317
dict_keys(['timeLocal', 'timeApplication', 'eventDescription'])


In [105]:
## Parse data into pandas dataframe

In [106]:
## Load dataframe with collapsed data dictionary
# Note json_normalize requires the json file, whereas read_json requires the filepath
if json_normalise == True:
    df = pd.json_normalize(file)
else:
    with open(filepath) as f:
        df = pd.read_json(f)
        print(type(df))

df.head()


Unnamed: 0,timeLocal,timeApplication,eventDescription,data.playerPosition.0.clientId,data.playerPosition.0.location.x,data.playerPosition.0.location.y,data.playerPosition.0.location.z,data.playerPosition.0.rotation.x,data.playerPosition.0.rotation.y,data.playerPosition.0.rotation.z,data.trialNum,data.trialType,data.playerInfo.0.clientId,data.playerInfo.0.location.x,data.playerInfo.0.location.y,data.playerInfo.0.location.z,data.playerInfo.0.rotation.x,data.playerInfo.0.rotation.y,data.playerInfo.0.rotation.z,data.wall1,data.wall2,data.wallTriggered,data.triggerClient,data.playerScores.0
0,11:23:37:306,3018.317,logging start,,,,,,,,,,,,,,,,,,,,,
1,11:23:37:333,3018.343,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,
2,11:23:37:350,3018.361,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,
3,11:23:37:371,3018.382,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,
4,11:23:37:392,3018.403,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,


In [107]:
##  Convert time columns into datetime format
df['timeLocal'] = pd.to_datetime(df['timeLocal'], format='%H:%M:%S:%f')

# Use to_timedelta instead as a vectorised function (lambdas are python loops)
# df['timeApplication'] = df['timeApplication'].apply(lambda x: timedelta(seconds=int(x) + (x - int(x))))
df['timeApplication'] = pd.to_numeric(df['timeApplication']) 
df['timeApplication'] = pd.to_timedelta(df['timeApplication'], unit='s')

In [108]:
# Should probably avoid changing dataframe inplace? 

In [109]:
### Pre-process data

In [None]:
## Take time in reference to start time
def reference_application_time(df):
    ''' Set a time 'timeReferenced' as the time at which datalogging first begins
        (Calculated as the current application time minus the application time at first
        logged message)'''
    df2 = df.copy()
    start_time = df['timeApplication'].iloc[0]
    df2['timeReferenced'] = df['timeApplication'] - start_time

    return df2


In [111]:
df = reference_application_time(df)

In [112]:
# Fill nans in trialNum with the correct trial number (starting at 0 for pre-trial data)
# This is needed because trialNum is only recorded at the single timepoint that trialNum changes
def fill_trial_zero(df):
    df2 = df.copy()
    df2.loc[0, 'data.trialNum'] = 0 # Manually change first entry to 0 and fill forward
                                     # This means nans after trial 1 will not be set to 0
    df2['data.trialNum'] = df2['data.trialNum'].ffill()

    return df2

In [113]:
def fill_player_scores_solo(df):
    df2 = df.copy()
    df2.loc[0, 'data.playerScores.0'] = 0 
    df2['data.playerScores.0'] = df2['data.playerScores.0'].ffill()

    return df2

In [114]:
def fill_trial_type(df):
    df2 = df.copy()
    df2.loc[0, 'data.trialType'] = 'pre-trials'
    df2['data.trialType'] = df2['data.trialType'].ffill()

    return df2

In [115]:
def fill_post_final_trial(df):
    df2 = df.copy()
    final_trial_end_idx = df2[df2['eventDescription'] == 'trial end'].index[-1] if not df[df['eventDescription'] == 'trial end'].empty else None
    final_trial_start_idx = df2[df2['eventDescription'] == 'trial start'].index[-1] if not df[df['eventDescription'] == 'trial start'].empty else None

    if final_trial_end_idx < final_trial_start_idx:
        df2.loc[final_trial_end_idx + 1:, 'data.trialType'] = 'post-trials'
    
    return df2

In [116]:
def fill_client_id_solo(df): ## Is this necessary?? (No, i just need the change playerInfo to playerPosition
    df2 = df.copy()
    
    
    
    

In [117]:
def fill_trial_walls(df): 
    df2 = df.copy()
    # first_wall_index = df2[df2['data.wall2'].notnull()].index[0] if not df2[df2['data.wall2'].notnull()].empty else None
    # print(first_wall_index)
    # df2.loc[first_wall_index:, 'data.wall1'] = df2.loc[first_wall_index:, 'data.wall1'].ffill()
    # df2.loc[first_wall_index:, 'data.wall2'] = df2.loc[first_wall_index:, 'data.wall2'].ffill()

    final_trial_end_idx = df2[df2['eventDescription'] == 'trial end'].index[-1] if not df[df['eventDescription'] == 'trial end'].empty else None
    
    trial_start_indices = df2[df2['eventDescription'] == 'trial start'].index
    print(trial_start_indices)
    slice_onset_indices = df2[df2['eventDescription'] == 'slice onset'].index
    print(trial_start_indices)
    trial_end_indices = df2[df2['eventDescription'] == 'trial end'].index
    print(trial_end_indices)
    

    for idx in range(len(trial_start_indices) -1):
        # Forward fill the wall numbers from slice onset to end trial
        df2.loc[slice_onset_indices[idx]:trial_end_indices[idx], 'data.wall1'] = df2.loc[slice_onset_indices[idx]:trial_end_indices[idx], 'data.wall1'].ffill()
        df2.loc[slice_onset_indices[idx]:trial_end_indices[idx], 'data.wall2'] = df2.loc[slice_onset_indices[idx]:trial_end_indices[idx], 'data.wall2'].ffill()

        # Backwards fill the wall numbers from slice onset to start trial
        df2.loc[trial_start_indices[idx]:slice_onset_indices[idx], 'data.wall1'] = df2.loc[trial_start_indices[idx]:slice_onset_indices[idx], 'data.wall1'].bfill()
        df2.loc[trial_start_indices[idx]:slice_onset_indices[idx], 'data.wall2'] = df2.loc[trial_start_indices[idx]:slice_onset_indices[idx], 'data.wall2'].bfill()
    
    return df2
    

In [118]:
df = fill_trial_zero(df)

In [119]:
df = fill_player_scores_solo(df)

In [120]:
df = fill_trial_type(df)

In [121]:
df = fill_post_final_trial(df)

In [122]:
df = fill_trial_walls(df)

Index([1435, 2376, 2802, 3242, 3686, 4224, 4626, 5077, 5625, 6145, 6566, 7101, 7599, 8159, 8672, 9214, 9762, 10253, 10685, 11138, 11647, 12213, 12854, 13247, 13700, 14294, 15018, 15571, 16101, 16933, 17287], dtype='int64')
Index([2218, 2640, 3108, 3583, 3984, 4472, 4918, 5426, 5979, 6417, 6876, 7415, 7942, 8460, 9028, 9570, 10128, 10571, 11001, 11438, 12010, 12654, 13139, 13467, 14104, 14819, 15393, 15897, 16693, 17140], dtype='int64')


In [123]:
df[df['data.wall2'].notnull()]['data.wall2'].iloc[0]

np.float64(8.0)

In [124]:
df.iloc[1494]

timeLocal                           1900-01-01 11:24:08.455000
timeApplication                         0 days 00:50:49.466000
eventDescription                                   slice onset
data.playerPosition.0.clientId                             0.0
data.playerPosition.0.location.x                  -0.020248514
data.playerPosition.0.location.y                   1.984999657
data.playerPosition.0.location.z                  -5.649295807
data.playerPosition.0.rotation.x                 359.921661377
data.playerPosition.0.rotation.y                 162.361968994
data.playerPosition.0.rotation.z                           0.0
data.trialNum                                              1.0
data.trialType                                         HighLow
data.playerInfo.0.clientId                                 NaN
data.playerInfo.0.location.x                               NaN
data.playerInfo.0.location.y                               NaN
data.playerInfo.0.location.z                           

In [126]:
df.iloc[1430:1440]

Unnamed: 0,timeLocal,timeApplication,eventDescription,data.playerPosition.0.clientId,data.playerPosition.0.location.x,data.playerPosition.0.location.y,data.playerPosition.0.location.z,data.playerPosition.0.rotation.x,data.playerPosition.0.rotation.y,data.playerPosition.0.rotation.z,data.trialNum,data.trialType,data.playerInfo.0.clientId,data.playerInfo.0.location.x,data.playerInfo.0.location.y,data.playerInfo.0.location.z,data.playerInfo.0.rotation.x,data.playerInfo.0.rotation.y,data.playerInfo.0.rotation.z,data.wall1,data.wall2,data.wallTriggered,data.triggerClient,data.playerScores.0,timeReferenced
1430,1900-01-01 11:24:07.145,0 days 00:50:48.156000,time,0.0,-0.020248514,1.984999657,-5.649295807,359.921661377,162.361968994,0.0,0.0,pre-trials,,,,,,,,,,,,0.0,0 days 00:00:29.839000
1431,1900-01-01 11:24:07.165,0 days 00:50:48.177000,time,0.0,-0.020248514,1.984999657,-5.649295807,359.921661377,162.361968994,0.0,0.0,pre-trials,,,,,,,,,,,,0.0,0 days 00:00:29.860000
1432,1900-01-01 11:24:07.187,0 days 00:50:48.198000,time,0.0,-0.020248514,1.984999657,-5.649295807,359.921661377,162.361968994,0.0,0.0,pre-trials,,,,,,,,,,,,0.0,0 days 00:00:29.881000
1433,1900-01-01 11:24:07.207,0 days 00:50:48.218000,time,0.0,-0.020248514,1.984999657,-5.649295807,359.921661377,162.361968994,0.0,0.0,pre-trials,,,,,,,,,,,,0.0,0 days 00:00:29.901000
1434,1900-01-01 11:24:07.228,0 days 00:50:48.239000,time,0.0,-0.020248514,1.984999657,-5.649295807,359.921661377,162.361968994,0.0,0.0,pre-trials,,,,,,,,,,,,0.0,0 days 00:00:29.922000
1435,1900-01-01 11:24:07.259,0 days 00:50:48.270000,trial start,,,,,,,,1.0,HighLow,0.0,-0.020248514,1.984999657,-5.649295807,359.921661377,162.361968994,0.0,,,,,0.0,0 days 00:00:29.953000
1436,1900-01-01 11:24:07.261,0 days 00:50:48.272000,time,0.0,-0.020248514,1.984999657,-5.649295807,359.921661377,162.361968994,0.0,1.0,HighLow,,,,,,,,,,,,0.0,0 days 00:00:29.955000
1437,1900-01-01 11:24:07.269,0 days 00:50:48.280000,time,0.0,-0.020248514,1.984999657,-5.649295807,359.921661377,162.361968994,0.0,1.0,HighLow,,,,,,,,,,,,0.0,0 days 00:00:29.963000
1438,1900-01-01 11:24:07.290,0 days 00:50:48.301000,time,0.0,-0.020248514,1.984999657,-5.649295807,359.921661377,162.361968994,0.0,1.0,HighLow,,,,,,,,,,,,0.0,0 days 00:00:29.984000
1439,1900-01-01 11:24:07.311,0 days 00:50:48.322000,time,0.0,-0.020248514,1.984999657,-5.649295807,359.921661377,162.361968994,0.0,1.0,HighLow,,,,,,,,,,,,0.0,0 days 00:00:30.005000


In [59]:
df.head()

Unnamed: 0,timeLocal,timeApplication,eventDescription,data.playerPosition.0.clientId,data.playerPosition.0.location.x,data.playerPosition.0.location.y,data.playerPosition.0.location.z,data.playerPosition.0.rotation.x,data.playerPosition.0.rotation.y,data.playerPosition.0.rotation.z,data.trialNum,data.trialType,data.playerInfo.0.clientId,data.playerInfo.0.location.x,data.playerInfo.0.location.y,data.playerInfo.0.location.z,data.playerInfo.0.rotation.x,data.playerInfo.0.rotation.y,data.playerInfo.0.rotation.z,data.wall1,data.wall2,data.wallTriggered,data.triggerClient,data.playerScores.0,timeReferenced
0,1900-01-01 11:23:37.306,0 days 00:50:18.317000,logging start,,,,,,,,0.0,pre-trials,,,,,,,,,,,,0.0,0 days 00:00:00
1,1900-01-01 11:23:37.333,0 days 00:50:18.343000,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,0.0,pre-trials,,,,,,,,,,,,0.0,0 days 00:00:00.026000
2,1900-01-01 11:23:37.350,0 days 00:50:18.361000,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,0.0,pre-trials,,,,,,,,,,,,0.0,0 days 00:00:00.044000
3,1900-01-01 11:23:37.371,0 days 00:50:18.382000,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,0.0,pre-trials,,,,,,,,,,,,0.0,0 days 00:00:00.065000
4,1900-01-01 11:23:37.392,0 days 00:50:18.403000,time,0.0,0.0,1.914999962,0.0,0.0,0.0,0.0,0.0,pre-trials,,,,,,,,,,,,0.0,0 days 00:00:00.086000


In [58]:
df[df['eventDescription'] == 'server-selected trigger activation']

Unnamed: 0,timeLocal,timeApplication,eventDescription,data.playerPosition.0.clientId,data.playerPosition.0.location.x,data.playerPosition.0.location.y,data.playerPosition.0.location.z,data.playerPosition.0.rotation.x,data.playerPosition.0.rotation.y,data.playerPosition.0.rotation.z,data.trialNum,data.trialType,data.playerInfo.0.clientId,data.playerInfo.0.location.x,data.playerInfo.0.location.y,data.playerInfo.0.location.z,data.playerInfo.0.rotation.x,data.playerInfo.0.rotation.y,data.playerInfo.0.rotation.z,data.wall1,data.wall2,data.wallTriggered,data.triggerClient,data.playerScores.0,timeReferenced
2121,1900-01-01 11:24:21.509,0 days 00:51:02.520000,server-selected trigger activation,0.0,-18.552213669,1.984999895,1.871329188,3.469141483,278.484039307,0.0,1.0,HighLow,,,,,,,,7.0,8.0,7.0,0.0,0.0,0 days 00:00:44.203000
2543,1900-01-01 11:24:30.224,0 days 00:51:11.235000,server-selected trigger activation,0.0,-13.879068375,1.984999776,-12.260874748,9.387865067,209.661636353,0.0,2.0,HighLow,,,,,,,,6.0,2.0,6.0,0.0,50.0,0 days 00:00:52.918000
3011,1900-01-01 11:24:39.904,0 days 00:51:20.915000,server-selected trigger activation,0.0,-18.63368988,1.984999657,0.006714127,5.456036568,268.972869873,0.0,3.0,HighLow,,,,,,,,8.0,7.0,7.0,0.0,100.0,0 days 00:01:02.598000
3486,1900-01-01 11:24:49.741,0 days 00:51:30.753000,server-selected trigger activation,0.0,-13.118782043,1.984999776,-13.105634689,14.278943062,195.631469727,-2.2e-07,4.0,HighLow,,,,,,,,6.0,2.0,6.0,0.0,120.0,0 days 00:01:12.436000
3887,1900-01-01 11:24:58.006,0 days 00:51:39.018000,server-selected trigger activation,0.0,18.68775177,1.984999776,0.700445116,8.164925575,69.641807556,0.0,5.0,HighLow,,,,,,,,3.0,1.0,3.0,0.0,170.0,0 days 00:01:20.701000
4375,1900-01-01 11:25:08.080,0 days 00:51:49.092000,server-selected trigger activation,0.0,-12.951211929,1.984999657,13.500212669,3.719601154,329.774505615,-1.07e-07,6.0,HighLow,,,,,,,,8.0,6.0,8.0,0.0,220.0,0 days 00:01:30.775000
4821,1900-01-01 11:25:17.303,0 days 00:51:58.314000,server-selected trigger activation,0.0,12.398611069,1.984999895,-13.97539711,7.188714504,128.850708008,0.0,7.0,HighLow,,,,,,,,2.0,4.0,4.0,0.0,270.0,0 days 00:01:39.997000
5329,1900-01-01 11:25:27.803,0 days 00:52:08.814000,server-selected trigger activation,0.0,-12.301939964,1.984999657,14.068635941,359.163238525,328.209564209,1.3e-08,8.0,HighLow,,,,,,,,8.0,7.0,8.0,0.0,290.0,0 days 00:01:50.497000
5882,1900-01-01 11:25:39.260,0 days 00:52:20.272000,server-selected trigger activation,0.0,-0.383580565,1.984999895,-18.616758347,10.460948944,181.953994751,1.4e-08,9.0,HighLow,,,,,,,,5.0,4.0,5.0,0.0,340.0,0 days 00:02:01.955000
6320,1900-01-01 11:25:48.305,0 days 00:52:29.316000,server-selected trigger activation,0.0,-0.655188143,1.984999776,-18.590547562,17.829362869,162.757522583,-2.24e-07,10.0,HighLow,,,,,,,,5.0,4.0,5.0,0.0,390.0,0 days 00:02:10.999000
