In [5]:
import h5py
import numpy as np
import pandas as pd


def load_gaze_data(h5_file: h5py.Dataset, output='pandas'):
    """
    Load gaze data from a Titta-generated HDF5 file.
    """
    with h5py.File(h5_file, 'r') as f:
        labels = np.array(f['gaze']['axis0'])

        categorical_data  = np.array(f['gaze']['block0_values'])
        categorical_labels = labels[28:]

        measurement_data = np.array(f['gaze']['block1_values'])
        measurement_labels = labels[2:28]

        timestamps = np.array(f['gaze']['block2_values'])
        timestamp_labels = labels[:2]

    if output == 'pandas':
        categorical_data = pd.DataFrame(categorical_data, columns=categorical_labels)
        measurement_data = pd.DataFrame(measurement_data, columns=measurement_labels)
        timestamps = pd.DataFrame(timestamps, columns=timestamp_labels)

        return categorical_data, measurement_data, timestamps

    return (categorical_data, categorical_labels), (measurement_data, measurement_labels), (timestamps, timestamp_labels)

In [6]:

def preprocess_raw(df: pd.DataFrame):
    
    df['rx'] = df[b'right_gaze_point_on_display_area_x'].interpolate()
    df['ry'] = df[b'right_gaze_point_on_display_area_y'].interpolate()
    df['lx'] = df[b'left_gaze_point_on_display_area_x'].interpolate()
    df['ly'] = df[b'left_gaze_point_on_display_area_y'].interpolate()

    d = pd.DataFrame()
    d['x'] = (df['rx'] + df['lx']) / 2
    d['y'] = (df['ry'] + df['ly']) / 2

    return d

In [7]:

def calculate_dispersion(x, y):
    """
    Calculate the dispersion of a set of x and y points.
    Dispersion = max(x) - min(x) + max(y) - min(y)
    """
    if len(x) < 2:
        return 0
    return (np.max(x) - np.min(x)) + (np.max(y) - np.min(y))

def dispersion_fixation_detection(gaze_data, dispersion_threshold, min_duration_ms):
    """
    I-DT dispersion-based fixation detection algorithm.

    gaze_data: DataFrame with 'time', 'x', 'y' columns.
    dispersion_threshold: Max dispersion (in pixels or degrees).
    min_duration_ms: Minimum duration for a fixation (in milliseconds).

    Returns a list of fixations, each as a (start_time, end_time, centroid_x, centroid_y).
    """
    fixations = []
    start_idx = 0
    while start_idx < len(gaze_data):
        end_idx = start_idx
        while end_idx < len(gaze_data):
            window = gaze_data.iloc[start_idx:end_idx+1]
            dispersion = calculate_dispersion(window['x'], window['y'])

            if dispersion > dispersion_threshold:
                break

            end_idx += 1

        duration = gaze_data.iloc[end_idx-1]['time'] - gaze_data.iloc[start_idx]['time']

        if duration >= min_duration_ms:
            fixation_x = window['x'].mean()
            fixation_y = window['y'].mean()
            fixations.append((gaze_data.iloc[start_idx]['time'], gaze_data.iloc[end_idx-1]['time'], fixation_x, fixation_y))

        start_idx = end_idx

    return fixations

In [11]:

categorical, continuous, time = load_gaze_data('test1.h5')
df = preprocess_raw(continuous)
df['time'] = time[b'device_time_stamp']

In [12]:
df.head()

Unnamed: 0,x,y,time
0,,,254444590452
1,328.245789,0.633861,254444598681
2,328.268005,0.634459,254444607189
3,328.266937,0.639287,254444615357
4,328.265869,0.652838,254444623692


In [22]:

fixations = dispersion_fixation_detection(df, dispersion_threshold=32, min_duration_ms=20)
print(fixations)

[(254444590452.0, 254454273489.0, 328.598, 0.4517263), (254454281980.0, 254456566043.0, 310.69104, 0.32589325), (254456574391.0, 254464605386.0, 332.99463, 0.4913532), (254464613938.0, 254466526704.0, 325.30075, 0.52031934), (254466534999.0, 254473630353.0, 325.77878, 0.49208772), (254473638690.0, 254477828415.0, 312.2322, 0.41410905), (254477836795.0, 254478028450.0, 308.6863, 0.54790145), (254478036824.0, 254478691870.0, 325.12234, 0.5263269), (254478700309.0, 254478991978.0, 302.11005, 0.52282107), (254479000204.0, 254481630338.0, 338.96426, 0.5141976), (254481638529.0, 254481721943.0, 319.0298, 0.5439858), (254481730276.0, 254536076097.0, 332.34476, 0.4225398), (254536084373.0, 254537772346.0, 317.84274, 0.26783204), (254537780774.0, 254549463855.0, 334.71088, 0.43794817), (254549472116.0, 254550326515.0, 302.86618, 0.38385627), (254550334917.0, 254652237645.0, 334.5225, 0.43992943), (254652246048.0, 254661820862.0, 330.80365, 0.552817), (254661829197.0, 254678988143.0, 330.59473, 

In [23]:
len(fixations)

21

In [None]:
def map_fixations_to_screen(fixations, stimulus_data_file):
    # Load the stimulus data
    stimulus_data = pd.read_csv(stimulus_data_file)
    
    # Create a list to store the fixation features
    fixation_features = []

    for fixation in fixations:
        start_time, end_time, x, y = fixation
        duration = end_time - start_time

        # Find the object in the stimulus data that corresponds to the fixation location
        object_data = stimulus_data[(stimulus_data['x_min'] <= x) & (stimulus_data['x_max'] >= x) &
                                    (stimulus_data['y_min'] <= y) & (stimulus_data['y_max'] >= y)]

        if not object_data.empty:
            object_features = object_data.iloc[0].to_dict()
            object_features['fixation_duration'] = duration
            fixation_features.append(object_features)

    return fixation_features

def save_fixation_features(fixation_features, output_file):
    # Convert the fixation features to a DataFrame
    fixation_features_df = pd.DataFrame(fixation_features)
    
    # Save the DataFrame to a CSV file
    fixation_features_df.to_csv(output_file, index=False)

# Example usage
stimulus_data_file = 'stimulus_data.csv'
output_file = 'fixation_features.csv'

fixation_features = map_fixations_to_screen(fixations, stimulus_data_file)
save_fixation_features(fixation_features, output_file)

# Collect data from multiple trials and create histograms
all_fixation_features = []

# Assuming you have a list of trials with their corresponding stimulus data files
trials = ['trial1_stimulus_data.csv', 'trial2_stimulus_data.csv', ...]

for trial in trials:
    fixation_features = map_fixations_to_screen(fixations, trial)
    all_fixation_features.extend(fixation_features)

all_fixation_features_df = pd.DataFrame(all_fixation_features)

# Create histograms for the amount of time spent on distractor and target features
distractor_time = all_fixation_features_df[all_fixation_features_df['feature_type'] == 'distractor']['fixation_duration']
target_time = all_fixation_features_df[all_fixation_features_df['feature_type'] == 'target']['fixation_duration']

distractor_time.hist(bins=50, alpha=0.5, label='Distractor')
target_time.hist(bins=50, alpha=0.5, label='Target')

# plt.legend()
# plt.xlabel('Fixation Duration')
# plt.ylabel('Frequency')
# plt.title('Histogram of Fixation Durations on Distractor and Target Features')
# plt.show()