In [8]:
import os
import pandas as pd
import numpy as np
from scipy.stats import entropy

base_dir = 'gaipat_data/participants'

# list of all participant folders
participants = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]

# one participant to test
participant_id = participants[0] if participants else None

# made if in case things don't load correctly, could refactor later
if participant_id:
    print(f"[INFO] Testing participant: {participant_id}")
    
    # each participant has figure subfolders: car, house, sc, tb, tc, tsb
    participant_path = os.path.join(base_dir, participant_id)
    figure_names = [d for d in os.listdir(participant_path) if os.path.isdir(os.path.join(participant_path, d))]
    
    if figure_names:
        figure_name = figure_names[0]  # just pick the first figure for testing
        print(f"[INFO] Testing figure: {figure_name}")
        
        # build file paths
        screen_path = os.path.join(participant_path, figure_name, 'screen', 'gazepoints.csv')
        table_path = os.path.join(participant_path, figure_name, 'table', 'gazepoints.csv')
        
        if not os.path.exists(screen_path):
            print(f"[ERROR] Missing screen gaze file: {screen_path}")
        elif not os.path.exists(table_path):
            print(f"[ERROR] Missing table gaze file: {table_path}")
        else:
            # Load and display shape + column info
            df_screen = pd.read_csv(screen_path)
            df_table = pd.read_csv(table_path)
            
            print(f"[SUCCESS] Loaded screen gaze: {df_screen.shape}")
            print(f"[SUCCESS] Loaded table gaze: {df_table.shape}")
            print(f"[INFO] Screen columns: {df_screen.columns.tolist()}")
            print(f"[INFO] Table columns: {df_table.columns.tolist()}")
    else:
        print(f"[ERROR] No figure folders found for {participant_id}")
else:
    print("[ERROR] No participant folders found.")


[INFO] Testing participant: 87891249
[INFO] Testing figure: sc
[SUCCESS] Loaded screen gaze: (4525, 3)
[SUCCESS] Loaded table gaze: (3432, 3)
[INFO] Screen columns: ['timestamp', 'x', 'y']
[INFO] Table columns: ['timestamp', 'x', 'y']


In [11]:
def merge_preprocess_gaze(df_screen, df_table):
    """
    Merges and preprocesses gaze data from screen and table into a single dataframe.
    - add source column
    - drop NaN
    - convert to seconds
    - sort by timestamp
    """

    # assign source labels
    df_screen['source'] = 'screen'
    df_table['source'] = 'table'

    # drop NaN, specifying columns in abundance of caution
    df_screen = df_screen.dropna(subset=['x', 'y', 'timestamp'])
    df_table = df_table.dropna(subset=['x', 'y', 'timestamp'])

    # merge dataframes
    df = pd.concat([df_screen, df_table], ignore_index=True)

    # Convert timestamps from milliseconds to seconds
    df['timestamp'] = df['timestamp'] / 1000.0

    # sort by new timestamp
    df = df.sort_values('timestamp').reset_index(drop=True)

    return df

df_merged = merge_preprocess_gaze(df_screen, df_table)

Reusable functions to calculate gaze entropy and gaze entropy over time

In [13]:
def calc_gaze_entropy(xy_points, bins=10):
    """
    Calculate spatial entropy of gaze data.
    Shannon entropy quantifies how unpredictable the location of a point is based on its x and y values

    Parameters:
        xy_points (np.ndarray): 2D array of shape (N, 2) for gaze coordinates
        bins (int): number of bins per axis for histogram
    Returns:
        float: Shannon entropy in bits
    """
    if xy_points.shape[0] < 2:
        return np.nan  # when there's not enough data to compute entropy

    # 2D histogram over gaze space
    H, _, _ = np.histogram2d(xy_points[:, 0], xy_points[:, 1], bins=bins)

    # Flatten and normalize to get probabilities
    p = H.flatten() / np.sum(H)
    # remove zero bins to avoid log(0)
    p = p[p > 0]  

    # compute Shannon entropy in bits
    return entropy(p, base=2)

# calculating entropy in 2 second chunks 
def compute_entropy_over_time(df, window_size=2.0, step_size=0.5, bins=10):
    """
    Slides a time window over gaze data and computes spatial entropy per window.
    Returns a DataFrame with: start_time, end_time, entropy
    helps understand how gaze patterns change over time
    """
    results = []
    start_time = df['timestamp'].min()
    end_time = df['timestamp'].max()
    current = start_time

    while current + window_size <= end_time:
        window = df[(df['timestamp'] >= current) & (df['timestamp'] < current + window_size)]
        if len(window) >= 2:
            xy = window[['x', 'y']].to_numpy()
            ent = calc_gaze_entropy(xy, bins=bins)
            results.append({
                'start_time': current,
                'end_time': current + window_size,
                'entropy': ent
            })
        current += step_size

    return pd.DataFrame(results)

entropy_df = compute_entropy_over_time(df_merged)

# check it worked
print(entropy_df.head())

     start_time      end_time   entropy
0  1.706261e+09  1.706261e+09  3.489089
1  1.706261e+09  1.706261e+09  3.360848
2  1.706261e+09  1.706261e+09  2.654981
3  1.706261e+09  1.706261e+09  2.052651
4  1.706261e+09  1.706261e+09  2.584189
