In [1]:
import os, sqlite3
import pandas as pd
import numpy as np
from zoneinfo import ZoneInfo
from google.protobuf.json_format import MessageToDict
# import sys; sys.path.insert(0, "gen")
from gen import messages_pb2  # generated by setup.sh

# Eye-Tracking

In [2]:
data_path = "/store/kruu/eye_tracking/training_data/010/Scenario 3/ET/010_scenario_3_gaze_data_fusion.tsv"
df_eye_tracking = pd.read_csv(data_path, sep="\t")

  df_eye_tracking = pd.read_csv(data_path, sep="\t")


In [None]:
def slice_between_events(df, start="ScreenRecordingStart", end="ScreenRecordingEnd", *, include_bounds=False):
    s = df["Event"]

    starts = np.flatnonzero(s.eq(start))
    ends   = np.flatnonzero(s.eq(end))

    if len(starts) != 1 or len(ends) != 1:
        raise ValueError(f"Expected exactly one '{start}' and one '{end}' "
                         f"(got {len(starts)} and {len(ends)}).")
    i, j = int(starts[0]), int(ends[0])
    if j <= i:
        raise ValueError(f"'{end}' occurs before '{start}' (positions {j} <= {i}).")

    if include_bounds:
        return df.iloc[i:j+1]     # includes the start & end rows
    else:
        return df.iloc[i+1:j]     # strictly between them
    
df_eye_tracking_sliced = slice_between_events(df_eye_tracking, include_bounds=False)
df_eye_tracking_sliced = df_eye_tracking_sliced.drop(columns=["Mouse position X [DACS px]", "Mouse position Y [DACS px]"])

In [4]:
TZ = ZoneInfo("Europe/Zagreb")

date_str = df_eye_tracking_sliced["Recording date"].astype(str).str.replace(r"\.$", "", regex=True)
base_start = pd.to_datetime(
        date_str + " " + df_eye_tracking_sliced["Recording start time"].astype(str),
        format="%d.%m.%Y %H:%M:%S.%f",
        errors="coerce",
    ).dt.tz_localize(TZ, ambiguous="infer", nonexistent="shift_forward")

offset = pd.to_timedelta(pd.to_numeric(df_eye_tracking_sliced["Recording timestamp [ms]"], errors="coerce"), unit="ms")

df_eye_tracking_sliced["epoch_ms"] = (
    (base_start + offset)
      .dt.tz_convert("UTC")
      .dt.tz_localize(None)
      .astype("int64") // 1_000_000
    ).astype("Int64")

In [5]:
df_eye_tracking_sliced

Unnamed: 0,Recording timestamp [ms],Computer timestamp [ms],Sensor,Participant name,Recording date,Recording start time,Timeline name,Event,Event value,Gaze point X [DACS px],...,Eye openness filtered [mm],Gaze point left X [DACS mm],Gaze point left Y [DACS mm],Gaze point right X [DACS mm],Gaze point right Y [DACS mm],Eye movement type,Eye movement event duration [ms],Fixation point X [DACS px],Fixation point Y [DACS px],epoch_ms
28349,217103,26496749,,10,4.9.2025.,15:53:24.795,Timeline1,ScreenRecordingStart,Screen Recording,,...,,,,,,EyesNotFound,4233.0,,,1756994221898
28350,217105,26496752,Eye Tracker,10,4.9.2025.,15:53:24.795,Timeline1,,,,...,,,,,,EyesNotFound,4233.0,,,1756994221900
28351,217114,26496760,Eye Tracker,10,4.9.2025.,15:53:24.795,Timeline1,,,,...,,,,,,EyesNotFound,4233.0,,,1756994221909
28352,217122,26496768,Eye Tracker,10,4.9.2025.,15:53:24.795,Timeline1,,,,...,,,,,,EyesNotFound,4233.0,,,1756994221917
28353,217130,26496777,Eye Tracker,10,4.9.2025.,15:53:24.795,Timeline1,,,,...,,,,,,EyesNotFound,4233.0,,,1756994221925
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
468298,3860648,30140294,Eye Tracker,10,4.9.2025.,15:53:24.795,Timeline1,,,,...,,,,,,EyesNotFound,962.0,,,1756997865443
468299,3860650,30140297,Mouse,10,4.9.2025.,15:53:24.795,Timeline1,,,,...,,,,,,EyesNotFound,962.0,,,1756997865445
468300,3860656,30140303,Eye Tracker,10,4.9.2025.,15:53:24.795,Timeline1,,,,...,,,,,,EyesNotFound,962.0,,,1756997865451
468301,3860665,30140311,Eye Tracker,10,4.9.2025.,15:53:24.795,Timeline1,,,,...,,,,,,EyesNotFound,962.0,,,1756997865460


# Events db for mouse tracking

In [15]:
from gen import messages_pb2
from google.protobuf.descriptor import FieldDescriptor as FD

# Map numeric types to readable names
TYPE = {getattr(FD, n): n.replace("TYPE_","").lower() for n in dir(FD) if n.startswith("TYPE_")}

def describe(desc, prefix=""):
    # fields
    for f in desc.fields:
        card = {FD.LABEL_OPTIONAL:"", FD.LABEL_REQUIRED:"required ", FD.LABEL_REPEATED:"repeated "}[f.label]
        if f.type == FD.TYPE_MESSAGE:
            print(f"{prefix}{f.name}: {card}message")
            describe(f.message_type, prefix=f"{prefix}{f.name}.")
        else:
            print(f"{prefix}{f.name}: {card}{TYPE[f.type]}")
    # oneofs
    for o in desc.oneofs:
        print(f"{prefix}(oneof) {o.name}: " + ", ".join(f.name for f in o.fields))

# Top-level Event payload schema
describe(messages_pb2.Event.DESCRIPTOR)

# Quick & dirty: just top-level field names
print([f.name for f in messages_pb2.Event.DESCRIPTOR.fields])

timestamp: message
timestamp.seconds: int64
timestamp.nanos: int32
track: message
track.track_number: uint32
track.target_identification: string
track.position: message
track.position.lat_deg: double
track.position.lon_deg: double
track.velocity: message
track.velocity.Vx_ms: double
track.velocity.Vy_ms: double
track.call_sign: string
track.flight_level_m: double
track.calculated_altitude_m: double
track.calculated_rate_of_climb_ftmin: double
flight_plan: message
flight_plan.uuid: string
flight_plan.fixm: string
trajectory: message
trajectory.flight_plan_uuid: string
trajectory.points: repeated message
trajectory.points.lat_deg: double
trajectory.points.lon_deg: double
trajectory.points.altitude_m: double
trajectory.points.flight_level: double
trajectory.points.ground_speed_ms: double
trajectory.points.indicated_airspeed_ms: double
trajectory.points.true_airspeed_ms: double
trajectory.points.mach: double
trajectory.points.true_heading: double
trajectory.points.vertical_rate: double
tra

In [27]:
TZ = ZoneInfo("Europe/Zagreb")

def load_mouse_positions(db_path, start_epoch_ms, end_epoch_ms, limit=None, batch=20_000):
    """ Return a DataFrame with epoch_ms and:
      - 'asd_event.mouse_position.x' (Int32)
      - 'asd_event.mouse_position.y' (Int32)
    Only rows whose payload has the mouse_position message are populated"""

    # Read-only, immutable: faster & avoids locks
    con = sqlite3.connect(f"file:{db_path}?mode=ro&immutable=1", uri=True)
    con.text_factory = bytes
    # Read-only speed PRAGMAs (best-effort; safe if ignored)
    con.execute("PRAGMA query_only=ON")
    con.execute("PRAGMA mmap_size=268435456")        # 256 MiB
    con.execute("PRAGMA temp_store=MEMORY")

    sql = ('SELECT id, epoch_ms, payload FROM "events" '
           'WHERE epoch_ms BETWEEN ? AND ? '
           'ORDER BY epoch_ms')
    cur = con.execute(sql, (start_epoch_ms, end_epoch_ms))

    ids, epochs, xs, ys = [], [], [], []
    seen = 0
    while True:
        rows = cur.fetchmany(batch)
        if not rows:
            break
        for id_, ms, blob in rows:
            ev = messages_pb2.Event()
            ev.ParseFromString(blob)

            ae = getattr(ev, "asd_event", None)
            if ae is not None and ae.HasField("mouse_position"):
                # proto3 scalars default to 0; we only read them when the message exists
                x = int(ae.mouse_position.x)
                y = int(ae.mouse_position.y)
            else:
                x = y = None

            ids.append(int(id_))
            epochs.append(int(ms))
            xs.append(x)
            ys.append(y)

            seen += 1
            if limit is not None and seen >= int(limit):
                rows = []
                break
        if limit is not None and seen >= int(limit):
            break

    con.close()

    df = pd.DataFrame({
        "id": ids,
        "epoch_ms": epochs,
        "Mouse position X": pd.Series(xs, dtype="Int32"),
        "Mouse position Y": pd.Series(ys, dtype="Int32"),
    }).sort_values("epoch_ms").reset_index(drop=True)
    
    df = df.dropna()

    return df


In [28]:
DB = "/store/kruu/eye_tracking/training_data/010/Scenario 3/simulator/010_scenario_3/polaris-events-2025-09-04T15_04_14.db"
df_mouse = load_mouse_positions(DB, int(df_eye_tracking_sliced.epoch_ms.min()), int(df_eye_tracking_sliced.epoch_ms.max()))
# df_mouse = load_mouse_positions(DB, 1756995761392, 1756995771444) # for task '010_3_5_1'

In [29]:
df_mouse[["Mouse position X", "Mouse position Y"]].value_counts()

Mouse position X  Mouse position Y
1811              1012                5
1679              1196                5
1221              1815                4
1732              1433                4
1954              943                 4
                                     ..
1778              1339                1
                  1251                1
                  1221                1
                  1206                1
                  1474                1
Name: count, Length: 22297, dtype: int64

In [30]:
(~df_mouse[["Mouse position X", "Mouse position Y"]].isna()).sum()

Mouse position X    24535
Mouse position Y    24535
dtype: int64

In [31]:
df_mouse.dropna()

Unnamed: 0,id,epoch_ms,Mouse position X,Mouse position Y
13,500,1756994222789,2290,1069
16,503,1756994222989,2101,1085
18,505,1756994223090,1818,1100
20,507,1756994223190,1813,1100
25,512,1756994223497,1845,1089
...,...,...,...,...
126687,127174,1756997839919,3454,743
126688,127175,1756997840372,3257,566
126689,127176,1756997840522,3204,522
126690,127177,1756997840622,3203,521


# Merge ET + mouse: Full Union time

In [None]:
TZ = ZoneInfo("Europe/Zagreb")

dfe = df_eye_tracking_sliced.sort_values("epoch_ms").astype({"epoch_ms":"int64"})
dfm = df_mouse.sort_values("epoch_ms").astype({"epoch_ms":"int64"})

TOL = 8  # ms
timeline = pd.merge_asof(dfe, dfm, on="epoch_ms", direction="nearest", tolerance=8)

timeline["ts_utc"] = pd.to_datetime(timeline["epoch_ms"].astype("int64"), unit="ms", utc=True)
timeline["ts_cet"] = timeline["ts_utc"].dt.tz_convert(TZ)


In [33]:
timeline[["Mouse position X", "Mouse position Y"]].value_counts()

Mouse position X  Mouse position Y
1954              943                 9
1811              1012                8
1958              981                 6
2132              1502                6
1628              1320                6
                                     ..
2575              1010                1
2571              79                  1
2572              546                 1
2568              469                 1
2569              594                 1
Name: count, Length: 22148, dtype: int64