# Data Structure
Data is parsed and grouped together by gidx which is the gaze index and is common for one gaze

Other variables are

- 'ts' : time stamp 
- 'pcl': pupil center (left eye)
- 'pcr': pupil center (right eye)
- 'pdl': pupil diameter (left eye)
- 'pdr': pupil diameter (right eye)
- 'gdl': gaze direction (left eye)
- 'gdr': gaze direction (right eye)
- 'gp': gaze position 
- 'gp3': gaze position in 3D

After this the data is stored in the csv file with the following columns 
- 'gidx'
- 'timestamp'
- 'pupil_center_left_x'
- 'pupil_center_left_y'
- 'pupil_center_left_z'
- 'pupil_center_right_x' 
- 'pupil_center_right_y'
- 'pupil_center_right_z'
- 'pupil_diameter_left'
- 'pupil_diameter_right'
- 'gaze_direction_left_x'
- 'gaze_direction_left_y'
- 'gaze_direction_left_z'
- 'gaze_direction_right_x'
- 'gaze_direction_right_y'
- 'gaze_direction_right_z'
- 'gaze_position_x'
- 'gaze_position_y'
- 'gaze_point_3d_x'
- 'gaze_point_3d_y'
- 'gaze_point_3d_z'

In [19]:
import json
import csv
import pandas as pd

In [20]:
def read_eye_tracking_data(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    eye_tracking_data = []

    for line in lines:
        data = json.loads(line)
        eye_tracking_data.append(data)

    return eye_tracking_data

def parse_data(eye_tracking_data):
    # Grouping data by 'gidx' and organizing it 
    grouped_data = {}

    for data in eye_tracking_data:
        s = data.get('s')
        if s!=0:
            continue
        gidx = data.get('gidx')

        if gidx is not None:
            if gidx not in grouped_data:
                grouped_data[gidx] = {
                    'ts' :None,
                    'pcl':None,
                    'pcr':None,
                    'pdl': None,
                    'pdr': None,
                    'gdl': None,
                    'gdr': None,
                    'gp':None,
                    'gp3': None
                }

            # Extracting pupil diameter for left and right eyes
            ts_data = data.get('ts',None)
            grouped_data[gidx]['ts'] = ts_data
            pc_data = data.get('pc',None)
            pd_data = data.get('pd', None)
            gd_data = data.get('gd',None)
            gp_data = data.get('gp',None)
            gp3_data = data.get('gp3',None)
            eye = data.get('eye',None)

            if pc_data is not None:
                if eye =='left':
                    grouped_data[gidx]['pcl'] = pc_data
                elif eye =='right':
                    grouped_data[gidx]['pcr'] = pc_data 

            if pd_data is not None:
                if eye =='left':
                    grouped_data[gidx]['pdl'] = pd_data
                elif eye =='right':
                    grouped_data[gidx]['pdr'] = pd_data 

            if gd_data is not None:
                if eye =='left':
                    grouped_data[gidx]['gdl'] = gd_data
                elif eye =='right':
                    grouped_data[gidx]['gdr'] = gd_data
            if gp_data is not None:
                grouped_data[gidx]['gp'] = gp_data
            if gp3_data is not None:
                grouped_data[gidx]['gp3'] = gp3_data
    return grouped_data

def print_data(grouped_data):
    for gidx, data_dict in grouped_data.items():
        print(f"Group with gidx={gidx}:")
        print(f"  pcl: {data_dict['pcl']}")
        print(f"  pcr: {data_dict['pcr']}")
        print(f"  pdl: {data_dict['pdl']}")
        print(f"  pdr: {data_dict['pdr']}")
        print(f"  gdl: {data_dict['gdl']}")
        print(f"  gdr: {data_dict['gdr']}")
        print(f"  gp: {data_dict['gp']}")
        print(f"  gp3: {data_dict['gp3']}")
        print("\n")

In [52]:
common_path="subject7/"

In [50]:
file_path = f'{common_path}livedata.json'
eye_tracking_data = read_eye_tracking_data(file_path)
grouped_data = parse_data(eye_tracking_data)
# print_data(grouped_data)

# Now filtered_grouped_data contains only entries where all values are not None
filtered_grouped_data = {gidx: data for gidx, data in grouped_data.items() if all(value is not None for value in data.values())}

In [51]:
with open(f'{common_path}data.csv', 'w', newline='') as csvfile:
    # Define the CSV writer
    csv_writer = csv.writer(csvfile)

    # Write the header
    csv_writer.writerow(['gidx','timestamp','pupil_center_left_x','pupil_center_left_y','pupil_center_left_z','pupil_center_right_x','pupil_center_right_y','pupil_center_right_z','pupil_diameter_left','pupil_diameter_right','gaze_direction_left_x','gaze_direction_left_y','gaze_direction_left_z','gaze_direction_right_x','gaze_direction_right_y','gaze_direction_right_z','gaze_position_x','gaze_position_y','gaze_point_3d_x', 'gaze_point_3d_y', 'gaze_point_3d_z'])

    # Write the data
    for gidx, data in filtered_grouped_data.items():
        # print(gidx)
        # print(data)
        gidx = gidx
        time_stamp = data['ts']
        pcl_x = data['pcl'][0]
        pcl_y = data['pcl'][1]
        pcl_z = data['pcl'][2]
        pcr_x = data['pcr'][0]
        pcr_y = data['pcr'][1]
        pcr_z = data['pcr'][2]
        pdl = data['pdl']
        pdr = data['pdr']
        gdl_x = data['gdl'][0]
        gdl_y = data['gdl'][1]
        gdl_z = data['gdl'][2]
        gdr_x = data['gdr'][0]
        gdr_y = data['gdr'][1]
        gdr_z = data['gdr'][2]
        gp_x = data['gp'][0]
        gp_y = data['gp'][1]
        gp3_x = data['gp3'][0]
        gp3_y = data['gp3'][1]
        gp3_z = data['gp3'][2]

        csv_writer.writerow([gidx,time_stamp,pcl_x,pcl_y,pcl_z,pcr_x,pcr_y,pcr_z,pdl,pdr,gdl_x,gdl_y,gdl_z,gdr_x,gdr_y,gdr_z,gp_x,gp_y,gp3_x, gp3_y, gp3_z])

print("CSV file created successfully.")

CSV file created successfully.


In [53]:
# Read both CSV files
data1 = pd.read_csv(f'{common_path}data_exported.csv')
data = pd.read_csv(f'{common_path}data.csv')

# Merge the columns from data1 to data based on matching timestamps
merged_data = pd.merge(data, data1[['Computer timestamp', 'Eye movement type', 'Gaze event duration', 'Eye movement type index', 'Fixation point X', 'Fixation point Y']], 
                       left_on='timestamp', right_on='Computer timestamp', how='left')

# Drop the redundant 'Computer timestamp' column
merged_data.drop(columns=['Computer timestamp'], inplace=True)

# Save the merged data to a new CSV file
merged_data.to_csv(f'{common_path}merged_data.csv', index=False)

print("Merged data saved to 'merged_data.csv'")


Merged data saved to 'merged_data.csv'
