In [77]:
import h5py
import numpy as np
import pandas as pd
import I2MC


# def load_gaze_data(h5_file: h5py.Dataset, output='pandas'):
#     """
#     Load gaze data from a Titta-generated HDF5 file.
#     """
#     with h5py.File(h5_file, 'r') as f:
#         labels = np.array(f['gaze']['axis0'])

#         categorical_data  = np.array(f['gaze']['block0_values'])
#         categorical_labels = labels[28:]

#         measurement_data = np.array(f['gaze']['block1_values'])
#         measurement_labels = labels[2:28]

#         timestamps = np.array(f['gaze']['block2_values'])
#         timestamp_labels = labels[:2]

#     if output == 'pandas':
#         categorical_data = pd.DataFrame(categorical_data, columns=categorical_labels)
#         measurement_data = pd.DataFrame(measurement_data, columns=measurement_labels)
#         timestamps = pd.DataFrame(timestamps, columns=timestamp_labels)

#         return categorical_data, measurement_data, timestamps

#     return (categorical_data, categorical_labels), (measurement_data, measurement_labels), (timestamps, timestamp_labels)

In [92]:
def load_gaze_data(filepath:str):
    """ Load gaze data from a Titta-generated HDF5 file. """
    return pd.read_hdf(filepath, 'gaze')

def preprocess_raw(df: pd.DataFrame):
    """
    Preprocess raw gaze data into minimal form.
    """

    d_minimal = pd.DataFrame(columns=['R_X', 'R_Y', 'L_X', 'L_Y', 'x', 'y', 'time'])
    
    d_minimal['R_X'] = ((df['right_gaze_point_on_display_area_x'].interpolate()) - 0.5) * 1900
    d_minimal['R_Y'] = -((df['right_gaze_point_on_display_area_y'].interpolate()) - 0.5) * 1300
    d_minimal['L_X'] = ((df['left_gaze_point_on_display_area_x'].interpolate()) - 0.5) * 1900
    d_minimal['L_Y'] = -((df['left_gaze_point_on_display_area_y'].interpolate()) - 0.5) * 1300

    d_minimal['x'] = (d_minimal['R_X'] + d_minimal['L_X']) / 2
    d_minimal['y'] = (d_minimal['R_Y'] + d_minimal['L_Y']) / 2
    d_minimal['time'] = df['device_time_stamp']

    return d_minimal

In [103]:

def calculate_dispersion(x, y):
    """
    Calculate the dispersion of a set of x and y points.
    Dispersion = max(x) - min(x) + max(y) - min(y)
    """
    if len(x) < 2:
        return 0
    return (np.max(x) - np.min(x)) + (np.max(y) - np.min(y))

def fixation_detection(gaze_data, dispersion_threshold, min_duration_ms):
    """
    I-DT dispersion-based fixation detection algorithm.

    gaze_data: DataFrame with 'time', 'x', 'y' columns.
    dispersion_threshold: Max dispersion (in pixels or degrees).
    min_duration_ms: Minimum duration for a fixation (in milliseconds).

    Returns a list of fixations, each as a (start_time, end_time, centroid_x, centroid_y).
    """
    fixations = []
    start_idx = 0
    while start_idx < len(gaze_data):
        end_idx = start_idx
        while end_idx < len(gaze_data):
            window = gaze_data.iloc[start_idx:end_idx+1]
            dispersion = calculate_dispersion(window['x'], window['y'])

            if dispersion > dispersion_threshold:
                break

            end_idx += 1

        duration = gaze_data.iloc[end_idx-1]['time'] - gaze_data.iloc[start_idx]['time']

        if duration >= min_duration_ms:
            fixation_x = window['x'].mean()
            fixation_y = window['y'].mean()
            fixations.append((gaze_data.iloc[start_idx]['time'], gaze_data.iloc[end_idx-1]['time'], fixation_x, fixation_y))

        start_idx = end_idx

    return fixations

In [104]:
df_gaze = load_gaze_data('test1.h5')
df = preprocess_raw(df_gaze)
df.describe()

Unnamed: 0,R_X,R_Y,L_X,L_Y,x,y,time
count,31074.0,31074.0,31075.0,31075.0,31074.0,31074.0,31075.0
mean,-47.786884,179.469696,-312.385651,92.016235,-180.099945,135.751541,254574000000.0
std,365.957916,164.144104,500.304626,202.390564,423.468872,173.780807,74736580.0
min,-1283.096558,-1190.302979,-921.628113,-1554.17688,-961.292969,-1302.567383,254444600000.0
25%,-335.787598,109.362915,-724.033844,5.601001,-528.909424,87.274895,254509300000.0
50%,-301.081177,215.324066,-673.271301,121.8535,-491.71109,165.307098,254574000000.0
75%,250.26181,257.117004,91.256924,197.531898,171.036804,225.432922,254638700000.0
max,1326.105713,903.727539,1329.288208,898.290466,1259.873535,890.628296,254703400000.0


In [112]:
log_level    = 1    # 0: no output, 1: output from this script only, 2: provide some output on command line regarding I2MC internal progress
do_plot_data = True # if set to True, plot of fixation detection for each trial will be saved as png-file in output folder.
# the figures works best for short trials (up to around 20 seconds)

opt = {}
# General variables for eye-tracking data
opt['xres']         = 1920.0                # maximum value of horizontal resolution in pixels
opt['yres']         = 1080.0                # maximum value of vertical resolution in pixels
opt['missingx']     = -opt['xres']          # missing value for horizontal position in eye-tracking data (example data uses -xres). used throughout the algorithm as signal for data loss
opt['missingy']     = -opt['yres']          # missing value for vertical position in eye-tracking data (example data uses -yres). used throughout algorithm as signal for data loss
opt['freq']         = 300.0                 # sampling frequency of data (check that this value matches with values actually obtained from measurement!)

# Variables for the calculation of angular measures
# These values are used to calculate noise measures (RMS and BCEA) of
# fixations. The may be left as is, but don't use the noise measures then.
# If either or both are empty, the noise measures are provided in pixels
# instead of degrees.
opt['scrSz']        = [30.0, 18.0]    # screen size in cm
opt['disttoscreen'] = 65.0                  # distance to screen in cm.



In [113]:
fixations, _, _ = I2MC.I2MC(df, opt, log_level==2,logging_offset="      ")

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [110]:
fixations = fixation_detection(df, 1024, 100)
len(fixations)

75

In [111]:
import ast

def map_fixations_to_screen(fixations, stimulus_data_file):
    # Load the stimulus data
    stimulus_data = pd.read_csv(stimulus_data_file)

    # Parse true_position into actual lists of floats
    stimulus_data['true_position'] = stimulus_data['true_position'].apply(ast.literal_eval)

    # Create a list to store the fixation features
    fixation_features = []

    for fixation in fixations:
        start_time, end_time, x, y = fixation
        duration = end_time - start_time

        # Current gaze point
        gaze_position = (x, y)

        # Find the nearest object (minimum Euclidean distance)
        object_coords = [tuple(pos) for pos in stimulus_data['true_position']]
        distances = [np.sqrt((gaze_position[0] - obj_pos[0])**2 + (gaze_position[1] - obj_pos[1])**2) for obj_pos in object_coords]
        
        nearest_object_index = np.argmin(distances)

        # Get data for nearest object and add fixation duration
        nearest_object = stimulus_data.iloc[nearest_object_index].to_dict()
        nearest_object['fixation_duration'] = duration
        nearest_object['fixation_x'] = x  # Add fixation point for reference if needed
        nearest_object['fixation_y'] = y

        fixation_features.append(nearest_object)

    return fixation_features

def save_fixation_features(fixation_features, output_file):
    # Convert the fixation features to a DataFrame
    fixation_features_df = pd.DataFrame(fixation_features)
    
    # Save the DataFrame to a CSV file
    fixation_features_df.to_csv(output_file, index=False)

# Example usage
stimulus_data_file = 'stimulus_data.csv'
output_file = 'fixation_features.csv'

fixation_features = map_fixations_to_screen(fixations, stimulus_data_file)
save_fixation_features(fixation_features, output_file)

# Collect data from multiple trials and create histograms
all_fixation_features = []

# # Assuming you have a list of trials with their corresponding stimulus data files
# trials = ['trial1_stimulus_data.csv', 'trial2_stimulus_data.csv', ...]

# for trial in trials:
#     fixation_features = map_fixations_to_screen(fixations, trial)
#     all_fixation_features.extend(fixation_features)

# all_fixation_features_df = pd.DataFrame(all_fixation_features)

# Create histograms for the amount of time spent on distractor and target features
# distractor_time = all_fixation_features_df[all_fixation_features_df['feature_type'] == 'distractor']['fixation_duration']
# target_time = all_fixation_features_df[all_fixation_features_df['feature_type'] == 'target']['fixation_duration']

# distractor_time.hist(bins=50, alpha=0.5, label='Distractor')
# target_time.hist(bins=50, alpha=0.5, label='Target')

# plt.legend()
# plt.xlabel('Fixation Duration')
# plt.ylabel('Frequency')
# plt.title('Histogram of Fixation Durations on Distractor and Target Features')
# plt.show()