In [None]:
import os
from pathlib import Path

store_dir = "/store/kruu/eye_tracking/training_data"

****
# Build ET windows
****

In [None]:
from utils.helper import load_and_process_et

features = ['Recording timestamp [ms]', 'epoch_ms', 'Gaze point X [DACS px]', 'Gaze point Y [DACS px]', 'Event']
interpolate_cols = ['Gaze point X [DACS px]', 'Gaze point Y [DACS px]']
fill_columns = ['Gaze point X [DACS px]', 'Gaze point Y [DACS px]']

chunks_et, blinks, atco_task_map = load_and_process_et(store_dir,
                                                            features,
                                                            interpolate_cols,
                                                            fill_columns,
                                                            time_resampling=False,
                                                            fixed_window_ms=15000,
                                                            window_step_ms=5000,
                                                            min_task_presence=0.5) 

Finding tasks for participant 001 Scenario 1
⚠️ Unmatched 'end' for Task 5 at 3459514
⚠️ Unmatched 'start' for Task 3 at 3443448
Finding tasks for participant 001 Scenario 2
⚠️ Unmatched 'end' for Task 5 at 1475678
Finding tasks for participant 001 Scenario 3
Finding tasks for participant 002 Scenario 1
⚠️ Unmatched 'end' for Task 3 at 732128
⚠️ Unmatched 'end' for Task 8 at 1168579
⚠️ Unmatched 'end' for Task 1 at 1407071
⚠️ Unmatched 'start' for Task 12 at 1171948
Finding tasks for participant 002 Scenario 2
⚠️ Unmatched 'end' for Task 7 at 1407679
⚠️ Unmatched 'end' for Task 8 at 3096761
⚠️ Unmatched 'end' for Task 0 at 3174255
⚠️ Unmatched 'start' for Task 1 at 695652
⚠️ Unmatched 'start' for Task 12 at 3785377
⚠️ Unmatched 'start' for Task 13 at 3467085
⚠️ Unmatched 'start' for Task 13 at 3467602
⚠️ Unmatched 'start' for Task 13 at 3467757
⚠️ Unmatched 'start' for Task 6 at 1395939
⚠️ Unmatched 'start' for Task 4 at 3513371
Finding tasks for participant 002 Scenario 3
⚠️ Unmatched

In [90]:
atco_task_map

{'Aircraft requests': 'Task 0',
 'Assume': 'Task 1',
 'Conflict resolution': 'Task 2',
 'Entry conditions': 'Task 3',
 'Entry conflict resolution': 'Task 4',
 'Entry coordination': 'Task 5',
 'Exit conditions': 'Task 6',
 'Exit conflict resolution': 'Task 7',
 'Exit coordination': 'Task 8',
 'Non-conformance resolution': 'Task 9',
 'QoS': 'Task 10',
 'Return to route': 'Task 11',
 'Transfer': 'Task 12',
 'Zone conflict': 'Task 13'}

****
# Get ASD data
****

In [4]:
import pandas as pd
from utils.task_data_io import list_parquet_files


_, asd_files = list_parquet_files(store_dir)

def load_asd_data(
        file_index: list[dict],
    ) -> dict[str, pd.DataFrame]:
    
        dfs: dict[str, pd.DataFrame] = {}
        
        for item in file_index:
            p = item["path"]
            df = pd.read_parquet(p)

            df = df.copy()
            df["participant_id"] = str(item["participant_id"])
            df["scenario_id"] = str(item["scenario_id"])
            id = f"{df["participant_id"].iloc[0]}_{df["scenario_id"].iloc[0]}"

            dfs[id] = df

        return dfs
    
all_asd_data = load_asd_data(asd_files)

****
# Merge ET + ASD
****

Ideas of features to compute. The challenge: Different window types → different event types → different attributes → no consistent columns for XGBoost.

- Curretn approach: "wide table with many nullable columns" (Warneing, some none == False for boolean features)
- XG_boost can natively handle nan values
- One-Hot Event-Type Indicators (binary flag for different event types)
- Learned encodings though AE or NN ?
- Warning imbalance (oversample rare tasks, undersample idle task, consider focal loss, etc...)
- We can train a separate XGBoost per task group (e.g. “route-editing tasks”, “clearance tasks”, “handover tasks”) using subsets of features most relevant to each group

track_screen_position:
- n_tracks_visible
- n_tracks_newly_visible
- mean/std track_screen_position_x and y (mybe min and may to know if they are clustered in a corner)
- min_distance from gaze to any track_screen_position in window (Watch out for referential)
- boolean gaze_on_any_track (within radiuy R)
- mouse_near_track
- time_since_mouse_near_track

track_label_position:
- n_labels_visible, n_labels_hovered, n_labels_selected, n_labels_on_pip
- hovered_label_ratio = n_labels_hovered / max(n_labels_visible, 1)
- selected_label_ratio
- any_label_hovered, any_label_selected, any_label_on_pip
- gaze_inside_selected_label, gaze_inside_hovered_label, mouse_inside_label (actively manipulating VS passively looking)

pop up:
- n_popups opened / closed (_last_10s)
- popup_{NAME}_active (boolean)
- time_sice_last_popup_any / time_since_last_popup_for_current_track

transfer:
- n_transfer_events(_last_10s)
- per-transfer-type counts (ex: n_transfer_ASSUMED, n_transfer_TRANSFERRED)
- time_since_last_transfer

clearance:
- n_clearances(_last_10s)
- per-clearance-type  coutns (n_clearance_CLIMB, n_clearance_DESCEND, n_clearance_DIRECT, etc...)
- time_since_last_clearance

distance_measurement (not sure if reliable):
- n_distance_measurements_added(_last_10s)
- n_distance_measurements_removed(_last_10s)
- n_distance_measurements_updated(_last_5s)
- n_active_distance_measurements (added but not removed yet)
- counts where first/second kind == flight_id vs lat_lon

sep_tool (not sure if reliable):
- sep_tool_opened(_last30s) boolean
- sep_tool_connected(_last30s)
- sep_tool_currently_open (state)
- time_since_sep_tool_open

route_interaction (not sure if reliable):

speed_vector (probably not very reliable):

keyboard_shortcut (not reliable):

track_mark (not reliable):




In [5]:
def align_asd_to_screen_coords(asd_window: pd.DataFrame, toolbar_height: int = 27) -> pd.DataFrame:
    """
    Shift ASD Y-coordinates,
    i.e. window coordinates where (0, 0) is the top-left of the Polaris window
    including the 27px top bar.

    We do:
        y_window = y_asd + toolbar_height

    Only Y coordinates are shifted; widths/heights are unchanged.
    """
    asd = asd_window.copy()

    y_cols = [
        "track_label_position_y",
        "track_screen_position_y",
        "mouse_position_y",
        "distance_measurement_start_y",
        "distance_measurement_end_y"
    ]

    for col in y_cols:
        if col in asd.columns:
            asd[col] = asd[col] + toolbar_height

    return asd

In [6]:
from collections import defaultdict

# nb_event_per_task = defaultdict(list)
asd_windows = {}

for id, chunk in chunks_et.items():
    
    task_id = id.split("_")[2]
    p_s_id = "_".join(id.split("_")[:2])
    if p_s_id not in all_asd_data.keys():
        continue
    min_epoch, max_epoch = chunk.epoch_ms.min(), chunk.epoch_ms.max()
    window_asd = all_asd_data[p_s_id].query(f"(epoch_ms >= {min_epoch}) and (epoch_ms <= {max_epoch})")
    window_asd = align_asd_to_screen_coords(window_asd, toolbar_height=27) # Getting rid of offset due to toolbar
    asd_windows[id] = window_asd
    # We can directly compute the ASD features here
    # nb_event_per_task[task_id].append(window_asd.event_name.nunique())

## Generic features

In [None]:
from scipy.stats import entropy

ALL_EVENT_TYPES = [
"mouse_position",
"track_screen_position",
"track_label_position",
"popup",
"transfer",
"clearance",
"distance_measurement",
# "speed_vector",
# "sep_tool",
# "route_interaction",
# "keyboard_shortcut",
# "track_mark",
]

def compute_generic_asd_features(window_asd: pd.DataFrame) -> pd.DataFrame:
    """
     Computes generic ASD features for the given time window.
    """
    
    df_generic = pd.DataFrame()
    df_generic["participant_id"] =[ window_asd["participant_id"].max()]
    df_generic["scenario_id"] = [window_asd["scenario_id"].max()]
    df_generic["n_events_total"] = [len(window_asd)]
    df_generic["n_events_unique"] = [window_asd.event_name.nunique()]
    df_generic["events_per_ms"] = len(window_asd) / (window_asd["epoch_ms"].max() - window_asd["epoch_ms"].min() + 1)
    df_generic["events_per_timestamp"] = len(window_asd) / (window_asd["epoch_ms"].nunique()) # several events for a givent timestamp
    df_generic["event_type_entropy"] = entropy(window_asd.event_name.value_counts(normalize=True)) #High entropy = varied actions → possibly complex tasks / Low entropy = repetitive UI (monitoring)
    
    # ONE-HOT of event type in the window + count per event type
    present_types = set(window_asd.event_name.unique())

    for ev in ALL_EVENT_TYPES:
        # df_generic[f"event_{ev}_present"] = [1 if ev in present_types else 0]
        df_generic[f"event_{ev}_count"] = [(window_asd.event_name == ev).sum()]

    return df_generic

import random

key = random.choice(list(asd_windows.keys()))
generic_features  = compute_generic_asd_features(asd_windows[key])
while generic_features['event_clearance_count'].max() <= 1:
    key = random.choice(list(asd_windows.keys()))
    generic_features  = compute_generic_asd_features(asd_windows[key])
    
print(key)
generic_features    

# Example of transfer
# 015_2_1_52

# Example of popup
# 005_2_-1_171
# 008_2_-1_193

# Example of clearance
# 023_3_-1_535
# 018_1_5_13
# 023_3_12_124
# 023_3_-1_95

023_3_-1_95


Unnamed: 0,participant_id,scenario_id,n_events_total,n_events_unique,events_per_ms,events_per_timestamp,event_type_entropy,event_mouse_position_count,event_track_screen_position_count,event_track_label_position_count,event_popup_count,event_transfer_count,event_clearance_count,event_distance_measurement_count
0,23,3,383,5,0.027163,3.113821,1.171346,109,105,161,5,0,3,0


## Track screen and label features

In [72]:
import numpy as np

def compute_flight_lifecycle_features(window_asd: pd.DataFrame, mode = str) -> pd.DataFrame:
    """
    Compute number of flights that appear, disappear, and persist
    over the course of the window, based on track_screen_position_flight_track_number.
    """

    df_tracks = window_asd[window_asd["event_name"] == f"track_{mode}_position"].copy()

# Handle missing values
    if f"track_{mode}_position_flight_track_number" not in df_tracks.columns:
        return {
            f"n_flights_{mode}_ever": 0,
            f"n_flights_{mode}_appear": 0,
            f"n_flights_{mode}_disappear": 0,
            f"n_flights_{mode}_persist": 0,
            f"n_flights_{mode}_transient": 0,
        }

    df_tracks = df_tracks.dropna(subset=[f"track_{mode}_position_flight_track_number"])
    if df_tracks.empty:
        print("col empty")
        return {
            "n_flights_ever": 0,
            "n_flights_appear": 0,
            "n_flights_disappear": 0,
            "n_flights_persist": 0,
            "n_flights_transient": 0,
        }
        
    epochs = np.sort(df_tracks["epoch_ms"].unique())

    # Set of flights for each epoch
    flights_by_epoch = (
        df_tracks
        .groupby("epoch_ms")[f"track_{mode}_position_flight_track_number"]
        .apply(lambda s: set(s))
    )

    first_flights = flights_by_epoch.iloc[0]
    last_flights = flights_by_epoch.iloc[-1]

    # All flights seen at least once in the window
    flights_ever = set().union(*flights_by_epoch.tolist())

    # Appear: seen sometime, but not in the first timestamp
    flights_appear = flights_ever - first_flights

    # Disappear: seen sometime, but not in the last timestamp
    flights_disappear = flights_ever - last_flights

    # Persist: present both at start and end
    flights_persist = first_flights & last_flights

    # Transient: appear or disappear inside the window (not present both at start and end)
    flights_transient = flights_ever - (first_flights | last_flights)

    out = {
        f"n_flights_{mode}_ever": len(flights_ever),
        f"n_flights_{mode}_appear": len(flights_appear),
        f"n_flights_{mode}_disappear": len(flights_disappear),
        f"n_flights_{mode}_persist": len(flights_persist),
        f"n_flights_{mode}_transient": len(flights_transient),
    }

    return out

compute_flight_lifecycle_features(asd_windows[key], mode="label")


{'n_flights_label_ever': 21,
 'n_flights_label_appear': 20,
 'n_flights_label_disappear': 0,
 'n_flights_label_persist': 1,
 'n_flights_label_transient': 0}

In [73]:
def debug_mouse_hover_alignment(asd_window: pd.DataFrame, time_tolerance_ms: int = 0):
    """
    Debug: for label rows with hovered == 1, check if the mouse is inside
    the label bbox at (roughly) the same epoch_ms.

    time_tolerance_ms:
        0  -> require exact same epoch_ms
        >0 -> allow nearest mouse within ± tolerance
    """
    
    label_cols = ['epoch_ms',
       'track_label_position_x', 'track_label_position_y',
       'track_label_position_width', 'track_label_position_height',
       'track_label_position_visible', 'track_label_position_hovered',
       'track_label_position_selected', 'track_label_position_on_pip',
       'track_label_position_track_number',
       'track_label_position_flight_track_number']
    mouse_cols = ['epoch_ms', 'mouse_position_x', 'mouse_position_y']
    
    labels = asd_window.query("event_name == 'track_label_position'")[label_cols].copy()
    mouse = asd_window.query("event_name == 'mouse_position'")[mouse_cols].copy()

    labels = labels[labels["track_label_position_hovered"] == 1].copy()
    if labels.empty:
        print("No hovered labels in this window.")
        return

    if time_tolerance_ms == 0:
        # Merge on exact epoch_ms
        merged = pd.merge(
            labels,
            mouse[["epoch_ms", "mouse_position_x", "mouse_position_y"]],
            on="epoch_ms",
            how="inner",
            suffixes=("_label", "_mouse"),
        )
    else:
        # Approximate match: nearest mouse within ±time_tolerance_ms
        labels_sorted = labels.sort_values("epoch_ms").reset_index(drop=True)
        mouse_sorted  = mouse.sort_values("epoch_ms").reset_index(drop=True)
        mouse_sorted = mouse_sorted.rename(columns={"epoch_ms": "mouse_epoch_ms"})

        # Use merge_asof to get nearest mouse at or before label time
        merged = pd.merge_asof(
            labels_sorted,
            mouse_sorted[["mouse_epoch_ms", "mouse_position_x", "mouse_position_y"]],
            left_on="epoch_ms",
            right_on="mouse_epoch_ms",
            direction="nearest",
        )

        # Filter out matches that are too far in time
        merged["time_diff_ms"] = (merged["epoch_ms"] - merged["mouse_epoch_ms"]).abs()
        merged = merged[merged["time_diff_ms"] <= time_tolerance_ms]

    if merged.empty:
        print("No matching mouse samples for hovered labels.")
        return

    x0 = merged["track_label_position_x"]
    y0 = merged["track_label_position_y"]
    w  = merged["track_label_position_width"]
    h  = merged["track_label_position_height"]

    mx = merged["mouse_position_x"]
    my = merged["mouse_position_y"]

    # position = top right corner, with increasing y when descending
    inside = (mx >= x0) & (mx <= x0 + w) & (my >= y0) & (my <= y0 + h)
    merged["mouse_inside_label"] = inside

    n = len(merged)
    n_inside = inside.sum()
    print(f"Hovered labels with matching mouse samples: {n}")
    print(f"Mouse inside label bbox: {n_inside} ({n_inside / n:.1%})")

    # Show a few examples
    print("\nSample rows where hovered=1 but mouse_inside_label=False:")
    print(
        merged.loc[~inside, [
            "epoch_ms",
            "track_label_position_x", "track_label_position_y",
            "track_label_position_width", "track_label_position_height",
            "mouse_position_x", "mouse_position_y",
        ]].head(5)
    )

    return merged

debug_mouse_hover_alignment(asd_windows[key], time_tolerance_ms=0)


Hovered labels with matching mouse samples: 19
Mouse inside label bbox: 10 (52.6%)

Sample rows where hovered=1 but mouse_inside_label=False:
        epoch_ms  track_label_position_x  track_label_position_y  \
1  1758281716250                    84.0                   -25.0   
3  1758281716813                   -47.0                   -65.0   
5  1758281720003                   -47.0                   -65.0   
6  1758281720003                  1670.0                   860.0   
7  1758281720110                    84.0                   -25.0   

   track_label_position_width  track_label_position_height  mouse_position_x  \
1                        15.0                         14.0            2004.0   
3                        15.0                         14.0            1708.0   
5                        15.0                         14.0            1789.0   
6                       101.0                         86.0            1789.0   
7                        15.0                    

Unnamed: 0,epoch_ms,track_label_position_x,track_label_position_y,track_label_position_width,track_label_position_height,track_label_position_visible,track_label_position_hovered,track_label_position_selected,track_label_position_on_pip,track_label_position_track_number,track_label_position_flight_track_number,mouse_position_x,mouse_position_y,mouse_inside_label
0,1758281715842,1679.0,855.0,101.0,86.0,1.0,1.0,0.0,0.0,0.0,23.0,1765.0,914.0,True
1,1758281716250,84.0,-25.0,15.0,14.0,0.0,1.0,0.0,1.0,0.0,12.0,2004.0,1066.0,False
2,1758281716250,2002.0,1006.0,101.0,69.0,1.0,1.0,0.0,0.0,0.0,12.0,2004.0,1066.0,True
3,1758281716813,-47.0,-65.0,15.0,14.0,0.0,1.0,0.0,1.0,0.0,23.0,1708.0,862.0,False
4,1758281716813,1673.0,859.0,101.0,86.0,1.0,1.0,0.0,0.0,0.0,23.0,1708.0,862.0,True
5,1758281720003,-47.0,-65.0,15.0,14.0,0.0,1.0,0.0,1.0,0.0,23.0,1789.0,961.0,False
6,1758281720003,1670.0,860.0,101.0,86.0,1.0,1.0,0.0,0.0,0.0,23.0,1789.0,961.0,False
7,1758281720110,84.0,-25.0,15.0,14.0,0.0,1.0,0.0,1.0,0.0,12.0,2017.0,1051.0,False
8,1758281720110,1998.0,997.0,101.0,69.0,1.0,1.0,0.0,0.0,0.0,12.0,2017.0,1051.0,True
9,1758281720461,-98.0,73.0,15.0,14.0,0.0,1.0,0.0,1.0,0.0,30.0,1848.0,1071.0,False


In [74]:
import pandas as pd
import numpy as np

def debug_gaze_label_alignment(
    eye_df: pd.DataFrame,
    asd_df: pd.DataFrame,
    gaze_res=(1920, 1080),   # eye tracker resolution
    asd_res=(3840, 2160),    # Polaris window resolution
    toolbar_height=27,
    time_tolerance_ms=50,    # allow nearest gaze within ±50 ms of label event
):
    """
    Debug alignment of transformed gaze with label positions.

    For each label event where selected==1 or hovered==1:
      - find nearest gaze sample in time (within ±time_tolerance_ms)
      - check if gaze is inside that label's bbox

    Prints proportions and a few mismatching examples.
    """

    gaze_w, gaze_h = gaze_res
    asd_w, asd_h = asd_res

    # 1) Prepare and transform gaze to ASD/window coordinates
    eye = eye_df.copy()
    eye = eye.rename(columns={
        "Gaze point X [DACS px]": "gaze_x",
        "Gaze point Y [DACS px]": "gaze_y",
    })

    # basic cleaning
    eye = eye.sort_values("epoch_ms").reset_index(drop=True)
    valid_mask = eye["gaze_x"].notna() & eye["gaze_y"].notna()

    if "Blink" in eye.columns:
        valid_mask &= ~eye["Blink"].astype(bool)
    if "Loss of Attention" in eye.columns:
        valid_mask &= ~eye["Loss of Attention"].astype(bool)

    eye_valid = eye.loc[valid_mask].copy()

    # scale from HD -> 4K
    eye_valid["gaze_x_asd"] = eye_valid["gaze_x"] * (asd_w / gaze_w)
    eye_valid["gaze_y_asd"] = eye_valid["gaze_y"] * (asd_h / gaze_h)

    # gaze in window coordinates (Polaris window includes toolbar at top)
    # labels will be shifted by +toolbar_height on y
    # we expect labels to live in y ∈ [toolbar_height, asd_h)
    eye_valid = eye_valid[
        (eye_valid["gaze_x_asd"].between(0, asd_w)) &
        (eye_valid["gaze_y_asd"].between(0, asd_h))
    ].copy()

    if eye_valid.empty:
        print("No valid gaze samples after filtering.")
        return

    # 2) Prepare label events (apply +toolbar_height to Y)
    labels = asd_df[asd_df["event_name"] == "track_label_position"].copy()
    if labels.empty:
        print("No track_label_position events in ASD.")
        return

    labels["y_window"] = labels["track_label_position_y"] + toolbar_height
    labels["x_window"] = labels["track_label_position_x"]
    labels["w"] = labels["track_label_position_width"]
    labels["h"] = labels["track_label_position_height"]

    # optional: drop labels completely offscreen
    labels = labels[
        labels["x_window"].notna() & labels["y_window"].notna() &
        labels["w"].notna() & labels["h"].notna()
    ].copy()

    if "track_label_position_selected" in labels.columns:
        labels["is_selected"] = labels["track_label_position_selected"].fillna(0).astype(bool)
    else:
        labels["is_selected"] = False

    if "track_label_position_hovered" in labels.columns:
        labels["is_hovered"] = labels["track_label_position_hovered"].fillna(0).astype(bool)
    else:
        labels["is_hovered"] = False

    labels_sel = labels[labels["is_selected"]].copy()
    labels_hov = labels[labels["is_hovered"]].copy()

    if labels_sel.empty and labels_hov.empty:
        print("No selected or hovered labels found.")
        return

    # 3) Function to check alignment for a subset (selected or hovered)
    def _check_subset(subset: pd.DataFrame, kind: str):
        if subset.empty:
            print(f"\nNo {kind} labels to check.")
            return

        labels_sorted = subset.sort_values("epoch_ms").reset_index(drop=True)
        eye_sorted = eye_valid.sort_values("epoch_ms").reset_index(drop=True)

        # rename eye epoch to keep it
        eye_sorted = eye_sorted.rename(columns={"epoch_ms": "gaze_epoch_ms"})
        labels_sorted["epoch_ms"] = labels_sorted["epoch_ms"].astype("int64")
        eye_sorted["gaze_epoch_ms"] = eye_sorted["gaze_epoch_ms"].astype("int64")

        merged = pd.merge_asof(
            labels_sorted,
            eye_sorted[["gaze_epoch_ms", "gaze_x_asd", "gaze_y_asd"]],
            left_on="epoch_ms",
            right_on="gaze_epoch_ms",
            direction="nearest",
        )

        # drop rows where we didn't find any gaze
        merged = merged.dropna(subset=["gaze_epoch_ms"]).copy()
        if merged.empty:
            print(f"\nNo gaze samples matched {kind} labels.")
            return

        merged["time_diff_ms"] = (merged["epoch_ms"] - merged["gaze_epoch_ms"]).abs()
        merged = merged[merged["time_diff_ms"] <= time_tolerance_ms].copy()
        if merged.empty:
            print(f"\nNo {kind} labels within ±{time_tolerance_ms} ms of any gaze sample.")
            return

        # Check if gaze is inside label bbox
        x0 = merged["x_window"]
        y0 = merged["y_window"]
        w  = merged["w"]
        h  = merged["h"]

        gx = merged["gaze_x_asd"]
        gy = merged["gaze_y_asd"]

        inside = (gx >= x0) & (gx <= x0 + w) & (gy >= y0) & (gy <= y0 + h)
        merged["gaze_inside_label"] = inside

        n = len(merged)
        n_inside = inside.sum()
        print(f"\n[{kind.upper()}] matched labels: {n}")
        print(f"[{kind.upper()}] gaze inside label: {n_inside} ({n_inside / n:.1%})")

        # show a few failures for inspection
        print(f"\nExamples where {kind}==1 but gaze_inside_label=False:")
        bad = merged.loc[~inside, [
            "epoch_ms", "gaze_epoch_ms", "time_diff_ms",
            "x_window", "y_window", "w", "h",
            "gaze_x_asd", "gaze_y_asd",
        ]]
        print(bad.head(5))

    # 4) Run checks
    _check_subset(labels_sel, kind="selected")
    _check_subset(labels_hov, kind="hovered")

debug_gaze_label_alignment(chunks_et[key], asd_windows[key])



[SELECTED] matched labels: 9
[SELECTED] gaze inside label: 0 (0.0%)

Examples where selected==1 but gaze_inside_label=False:
        epoch_ms  gaze_epoch_ms  time_diff_ms  x_window  y_window      w  \
0  1758281718380  1758281718378             2     -47.0     -38.0   15.0   
1  1758281718380  1758281718378             2    1673.0     886.0  245.0   
2  1758281718380  1758281718378             2    1673.0     886.0  245.0   
3  1758281718380  1758281718378             2     -47.0     -38.0   15.0   
4  1758281719596  1758281719595             1    1673.0     886.0  245.0   

       h  gaze_x_asd  gaze_y_asd  
0   14.0      1974.0       768.0  
1  103.0      1974.0       768.0  
2  103.0      1974.0       768.0  
3   14.0      1974.0       768.0  
4  103.0      1556.0      1046.0  

[HOVERED] matched labels: 25
[HOVERED] gaze inside label: 1 (4.0%)

Examples where hovered==1 but gaze_inside_label=False:
        epoch_ms  gaze_epoch_ms  time_diff_ms  x_window  y_window      w  \
0  1758

In [75]:
def compute_track_position_features(window_asd: pd.DataFrame) -> pd.DataFrame:
    """
    Aggregate features for track_screen_position and track_label_position
    events within a single time window.

    Returns a 1-row DataFrame with:
      - counts of label/track events
      - number of unique labels/tracks
      - visibility / hovered / selected / on_pip stats
      - basic spatial stats (x/y, width/height, area)
    """
    features = {}
    features["participant_id"] = window_asd["participant_id"].max()
    features["scenario_id"] = window_asd["scenario_id"].max()

    # --- TRACK SCREEN POSITION FEATURES ---
    
    # Basically tells for one given timestamp, what are the flight displayed
    # 'track_screen_position_track_number' is always null
    
    scr = window_asd[window_asd["event_name"] == "track_screen_position"]
    
    for key, val in compute_flight_lifecycle_features(scr, mode = "screen").items():
        features[key] = val

    if "track_screen_position_visible" in scr.columns:
        vis = scr["track_screen_position_visible"]
        features["track_screen_n_visible"] = (vis == 1).sum()
        features["track_screen_visible_ratio"] = (
            features["track_screen_n_visible"] / max(len(scr), 1)
        )
    else:
        features["track_screen_n_visible"] = 0
        features["track_screen_visible_ratio"] = 0.0

    # Spatial stats for track_screen (x, y)
    for axis in ["x", "y"]:
        col = f"track_screen_position_{axis}"
        if col in scr.columns:
            vals = scr[col].dropna()
            features[f"track_screen_{axis}_mean"] = vals.mean() if len(vals) > 0 else np.nan
            features[f"track_screen_{axis}_std"] = vals.std() if len(vals) > 0 else np.nan
            features[f"track_screen_{axis}_min"] = vals.min() if len(vals) > 0 else np.nan
            features[f"track_screen_{axis}_max"] = vals.max() if len(vals) > 0 else np.nan
        else:
            features[f"track_screen_{axis}_mean"] = np.nan
            features[f"track_screen_{axis}_std"] = np.nan
            features[f"track_screen_{axis}_min"] = np.nan
            features[f"track_screen_{axis}_max"] = np.nan

    # --- TRACK LABEL POSITION FEATURES ---
    lab = window_asd[window_asd["event_name"] == "track_label_position"]
    
    for key, val in compute_flight_lifecycle_features(lab, mode = "label").items():
        features[key] = val

    # Visibility / hovered / selected / on_pip counts & ratios (Nan = False)
    def _count_bool(col_name: str) -> int:
        if col_name in lab.columns:
            return (lab[col_name] == 1).sum()
        return 0

    features["track_label_n_visible"] = _count_bool("track_label_position_visible")
    features["track_label_n_hovered"] = _count_bool("track_label_position_hovered")
    features["track_label_n_selected"] = _count_bool("track_label_position_selected")
    features["track_label_n_on_pip"] = _count_bool("track_label_position_on_pip")

    vis_count = max(features["track_label_n_visible"], 1)

    features["track_label_hovered_ratio"] = features["track_label_n_hovered"] / vis_count
    features["track_label_selected_ratio"] = features["track_label_n_selected"] / vis_count
    features["track_label_on_pip_ratio"] = features["track_label_n_on_pip"] / vis_count

    # Spatial stats for labels (x, y, width, height, area)
    for axis in ["x", "y"]:
        col = f"track_label_position_{axis}"
        if col in lab.columns:
            vals = lab[col].dropna()
            features[f"track_label_{axis}_mean"] = vals.mean() if len(vals) > 0 else np.nan
            features[f"track_label_{axis}_std"] = vals.std() if len(vals) > 0 else np.nan
            features[f"track_label_{axis}_min"] = vals.min() if len(vals) > 0 else np.nan
            features[f"track_label_{axis}_max"] = vals.max() if len(vals) > 0 else np.nan
        else:
            features[f"track_label_{axis}_mean"] = np.nan
            features[f"track_label_{axis}_std"] = np.nan
            features[f"track_label_{axis}_min"] = np.nan
            features[f"track_label_{axis}_max"] = np.nan

    # Width / height / area
    if {"track_label_position_width", "track_label_position_height"}.issubset(lab.columns):
        w = lab["track_label_position_width"].astype(float)
        h = lab["track_label_position_height"].astype(float)
        area = w * h

        features["track_label_width_mean"] = w.dropna().mean() if w.notna().any() else np.nan
        features["track_label_height_mean"] = h.dropna().mean() if h.notna().any() else np.nan
        features["track_label_area_mean"] = area.dropna().mean() if area.notna().any() else np.nan
        features["track_label_area_total"] = area.dropna().sum() if area.notna().any() else 0.0
    else:
        features["track_label_width_mean"] = np.nan
        features["track_label_height_mean"] = np.nan
        features["track_label_area_mean"] = np.nan
        features["track_label_area_total"] = 0.0
    
    
    return pd.DataFrame([features])

compute_track_position_features(asd_windows[key])
    

Unnamed: 0,participant_id,scenario_id,n_flights_screen_ever,n_flights_screen_appear,n_flights_screen_disappear,n_flights_screen_persist,n_flights_screen_transient,track_screen_n_visible,track_screen_visible_ratio,track_screen_x_mean,...,track_label_x_min,track_label_x_max,track_label_y_mean,track_label_y_std,track_label_y_min,track_label_y_max,track_label_width_mean,track_label_height_mean,track_label_area_mean,track_label_area_total
0,23,3,21,0,0,21,0,105,1.0,1881.104762,...,-109.0,3119.0,813.440994,576.969322,-65.0,1837.0,110.416149,71.78882,9468.881988,1524490.0


In [76]:
import numpy as np
import pandas as pd
from collections import defaultdict


def compute_gaze_mouse_label_track_features(
    eye_df_window: pd.DataFrame,
    asd_window: pd.DataFrame,
    radius_R: float = 100.0,
    label_dwell_threshold_ms: float = 200.0,
    gaze_res: tuple = (1920, 1080),   # (width, height) in px
    asd_res: tuple = (3840, 2160),    # (width, height) in px
    toolbar_height = 27 # in px
) -> pd.DataFrame:
    """
    Compute gaze/mouse vs label/track features for a single time window,
    accounting for:
      - different coordinate systems (gaze_res vs asd_res)
      - time alignment using epoch_ms
      - moving labels/tracks over time
      - out-of-bounds gaze/ASD coordinates
      - upper toolbar outside ASD coordinate system

    Features:
      - time_on_labels_ms
      - frac_time_on_labels
      - n_labels_looked_at (≥ label_dwell_threshold_ms)
      - mean_dist_gaze_label
      - mean_dist_gaze_track
      - gaze_inside_selected_label_ratio
      - gaze_inside_hovered_label_ratio
      - mouse_inside_label_ratio
      - gaze_near_track_ratio (min dist <= radius_R)
      - mouse_near_track_ratio (min dist <= radius_R)
    """

    gaze_w, gaze_h = gaze_res
    asd_w, asd_h = asd_res

    # ---------------- 1. Prepare eye data (epoch_ms + scale to ASD resolution) ----------------
    eye = eye_df_window.copy().rename(
        columns={
            "Gaze point X [DACS px]": "gaze_x",
            "Gaze point Y [DACS px]": "gaze_y",
        }
    )
    if "epoch_ms" not in eye.columns:
        raise ValueError("eye_df_window must contain 'epoch_ms'")

    eye = eye.sort_values("epoch_ms").reset_index(drop=True)

    # Scale gaze from its native resolution into ASD resolution
    eye["gaze_x_scaled"] = eye["gaze_x"] * (asd_w / gaze_w)
    eye["gaze_y_scaled"] = eye["gaze_y"] * (asd_h / gaze_h)

    # We can exlude blink and loss of attention from valid mask if needed
    valid_mask = eye["gaze_x_scaled"].notna() & eye["gaze_y_scaled"].notna()
    eye_valid = eye[valid_mask].copy()

    # Polaris area (outside = toolbar area or out of the screen)
    x_min, y_min, x_max, y_max = 0, toolbar_height, asd_w, asd_h - toolbar_height 
    in_polaris = (
        (eye_valid["gaze_x_scaled"] >= x_min) & (eye_valid["gaze_x_scaled"] <= x_max) &
        (eye_valid["gaze_y_scaled"] >= y_min) & (eye_valid["gaze_y_scaled"] <= y_max)
    )
    
    eye_polaris = eye_valid.loc[in_polaris].copy()
    eye_toolbar = eye_valid.loc[~in_polaris].copy()
    n_valid = len(eye_valid)
    frac_gaze_toolbar = len(eye_toolbar) / max(len(eye_valid), 1)
    
    print("Gaze X min/max:", eye_polaris["gaze_x_scaled"].min(),
                        eye_polaris["gaze_x_scaled"].max())
    print("Gaze Y min/max:", eye_polaris["gaze_y_scaled"].min(),
                            eye_polaris["gaze_y_scaled"].max())

    if n_valid == 0:
        return pd.DataFrame([{
            "frac_gaze_toolbar": 0.0,
            "time_on_labels_ms": 0.0,
            "frac_time_on_labels": 0.0,
            "n_labels_looked_at": 0,
            "mean_dist_gaze_label": np.nan,
            "mean_dist_gaze_track": np.nan,
            "gaze_inside_label_ratio": 0.0,
            "gaze_inside_selected_label_ratio": 0.0,
            "gaze_inside_hovered_label_ratio": 0.0,
            "mouse_inside_label_ratio": 0.0,
            "gaze_near_track_ratio": 0.0,
            "mouse_near_track_ratio": 0.0,
        }])

    eye_polaris = eye_polaris.sort_values("epoch_ms").reset_index(drop=True)
    ts = eye_polaris["epoch_ms"].values.astype(float)
    if len(ts) > 1:
        dt = np.diff(ts)
        last_dt = np.median(dt)
        dt = np.concatenate([dt, [last_dt]])
    else:
        dt = np.array([0.0])
    eye_polaris.loc[:, "dt_ms"] = dt
    total_time_ms = eye_polaris["dt_ms"].sum()

    # ---------------- 2. Label state over time ----------------
    labels_all = asd_window[asd_window["event_name"] == "track_label_position"].copy()

    # Basic geometry presence
    geom_mask = (
        labels_all["track_label_position_x"].notna() &
        labels_all["track_label_position_y"].notna() &
        labels_all["track_label_position_width"].notna() &
        labels_all["track_label_position_height"].notna()
    )
    labels_all = labels_all[geom_mask].copy()

    # Filter labels that are completely off-screen
    if not labels_all.empty:
        x = labels_all["track_label_position_x"]
        y = labels_all["track_label_position_y"]
        w = labels_all["track_label_position_width"]
        h = labels_all["track_label_position_height"]

        # Keep labels whose bounding box at least partially overlaps the ASD screen
        overlap_mask = ~(
            (x + w < 0) | (x > asd_w) |
            (y + h < 0) | (y > asd_h)
        )
        labels_all = labels_all[overlap_mask].copy()
    
    print("Label X min/max:", labels_all["track_label_position_x"].min(),
                          labels_all["track_label_position_x"].max())
    print("Label Y min/max:", labels_all["track_label_position_y"].min(),
                            labels_all["track_label_position_y"].max())

    label_state_epochs = np.array([], dtype="int64")
    label_state = {}

    if not labels_all.empty:
        labels_all = labels_all.sort_values("epoch_ms")
        label_state_epochs = labels_all["epoch_ms"].unique().astype("int64")

        for ep in label_state_epochs:
            labs_ep = labels_all[labels_all["epoch_ms"] == ep]
            
            # We assume here that position x/y are the bottom left corner
            rects = labs_ep[[
                "track_label_position_x",
                "track_label_position_y",
                "track_label_position_width",
                "track_label_position_height",
            ]].values.astype(float)

            cx = rects[:, 0] + rects[:, 2] / 2.0
            cy = rects[:, 1] + rects[:, 3] / 2.0

            selected = labs_ep.get(
                "track_label_position_selected",
                pd.Series(False, index=labs_ep.index)
            ).fillna(0).astype(bool).values

            hovered = labs_ep.get(
                "track_label_position_hovered",
                pd.Series(False, index=labs_ep.index)
            ).fillna(0).astype(bool).values

            if "track_label_position_track_number" in labs_ep.columns:
                label_ids = labs_ep["track_label_position_track_number"].values
            else:
                label_ids = np.arange(len(labs_ep))

            label_state[int(ep)] = {
                "rects": rects,
                "centers": np.vstack([cx, cy]).T,
                "selected": selected,
                "hovered": hovered,
                "ids": label_ids,
            }

    def get_label_state_at(t_ms: int):
        if label_state_epochs.size == 0:
            return None
        idx = np.searchsorted(label_state_epochs, t_ms, side="right") - 1
        if idx < 0:
            return None
        ep = int(label_state_epochs[idx])
        return label_state.get(ep, None)

    # ---------------- 3. Track state over time (in ASD coords) ----------------
    tracks_all = asd_window[asd_window["event_name"] == "track_screen_position"].copy()
    if {"track_screen_position_x", "track_screen_position_y"}.issubset(tracks_all.columns):
        tracks_all = tracks_all.dropna(subset=["track_screen_position_x", "track_screen_position_y"]).copy()
    else:
        tracks_all = tracks_all.iloc[0:0].copy()

    # Filter tracks that are clearly outside the ASD screen
    if not tracks_all.empty:
        tx = tracks_all["track_screen_position_x"]
        ty = tracks_all["track_screen_position_y"]
        on_screen = (
            (tx >= -radius_R) & (tx <= asd_w + radius_R) &
            (ty >= -radius_R) & (ty <= asd_h + radius_R)
        )
        tracks_all = tracks_all[on_screen].copy()

    track_state_epochs = np.array([], dtype="int64")
    track_state = {}

    if not tracks_all.empty:
        tracks_all = tracks_all.sort_values("epoch_ms")
        track_state_epochs = tracks_all["epoch_ms"].unique().astype("int64")

        for ep in track_state_epochs:
            tr_ep = tracks_all[tracks_all["epoch_ms"] == ep]
            pts = tr_ep[["track_screen_position_x", "track_screen_position_y"]].values.astype(float)
            track_state[int(ep)] = {"points": pts}

    def get_track_state_at(t_ms: int):
        if track_state_epochs.size == 0:
            return None
        idx = np.searchsorted(track_state_epochs, t_ms, side="right") - 1
        if idx < 0:
            return None
        ep = int(track_state_epochs[idx])
        return track_state.get(ep, None)

    # ---------------- 4. Mouse events (already in ASD coords) ----------------
    mouse_all = asd_window[asd_window["event_name"] == "mouse_position"].copy()
    if {"mouse_position_x", "mouse_position_y"}.issubset(mouse_all.columns):
        mouse_all = mouse_all.dropna(subset=["mouse_position_x", "mouse_position_y"]).copy()
    else:
        mouse_all = mouse_all.iloc[0:0].copy()

    # Filter mouse positions to within screen bounds (optional)
    if not mouse_all.empty:
        mx = mouse_all["mouse_position_x"]
        my = mouse_all["mouse_position_y"]
        in_screen = (
            (mx >= 0) & (mx <= asd_w) &
            (my >= 0) & (my <= asd_h)
        )
        mouse_all = mouse_all[in_screen].copy()

    mouse_all = mouse_all.sort_values("epoch_ms").reset_index(drop=True)

    # ---------------- 5. Iterate over gaze samples ----------------
    time_on_labels_ms = 0.0
    labels_dwell_ms = defaultdict(float)

    gaze_inside_label_count = 0
    gaze_inside_selected_count = 0
    gaze_inside_hovered_count = 0
    gaze_near_track_count = 0

    gaze_min_dist_label = []
    gaze_min_dist_track = []

    gaze_x = eye_polaris["gaze_x_scaled"].values.astype(float)
    gaze_y = eye_polaris["gaze_y_scaled"].values.astype(float)
    gaze_ts = eye_polaris["epoch_ms"].values.astype("int64")
    dt_ms = eye_polaris["dt_ms"].values.astype(float)

    for gx, gy, t_ms, dt in zip(gaze_x, gaze_y, gaze_ts, dt_ms):
        # For each gaze ts, get the label state (can be several)
        lab_state = get_label_state_at(int(t_ms))
        inside_any_label = False
        inside_selected_label = False
        inside_hovered_label = False

        # ----- labels -----
        # Ts where we actually have at least one label displayed
        if lab_state is not None:
            rects = lab_state["rects"]
            centers = lab_state["centers"]
            selected = lab_state["selected"]
            hovered = lab_state["hovered"]
            ids = lab_state["ids"]

            dx = centers[:, 0] - gx
            dy = centers[:, 1] - gy
            dists = np.sqrt(dx * dx + dy * dy)
            gaze_min_dist_label.append(float(dists.min()))

            x0 = rects[:, 0]
            y0 = rects[:, 1]
            w = rects[:, 2]
            h = rects[:, 3]
            inside = (gx >= x0) & (gx <= x0 + w) & (gy >= y0) & (gy <= y0 + h)

            if inside.any():
                inside_any_label = True
                # For each label that contains the gaze, add dwell time dt
                for flag, lbl_id in zip(inside, ids):
                    if flag:
                        labels_dwell_ms[lbl_id] += dt

                inside_selected_label = (inside & selected).any()
                inside_hovered_label = (inside & hovered).any()
        else:
            gaze_min_dist_label.append(np.nan)

        # ----- tracks -----
        tr_state = get_track_state_at(int(t_ms))
        if tr_state is not None:
            pts = tr_state["points"]
            dx_t = pts[:, 0] - gx
            dy_t = pts[:, 1] - gy
            dists_t = np.sqrt(dx_t * dx_t + dy_t * dy_t)
            min_dist_t = float(dists_t.min())
            gaze_min_dist_track.append(min_dist_t)
            if min_dist_t <= radius_R:
                gaze_near_track_count += 1
        else:
            gaze_min_dist_track.append(np.nan)

        # Increment once for that specific timestamp, even though there could be several boxes containing the gaze
        if inside_any_label:
            time_on_labels_ms += dt
            gaze_inside_label_count += 1
        if inside_selected_label:
            gaze_inside_selected_count += 1
        if inside_hovered_label:
            gaze_inside_hovered_count += 1

    # ---------------- 6. Mouse-based features ----------------
    mouse_inside_label_count = 0
    mouse_near_track_count = 0
    n_mouse = len(mouse_all)

    for _, row in mouse_all.iterrows():
        mx = float(row["mouse_position_x"])
        my = float(row["mouse_position_y"])
        t_ms = int(row["epoch_ms"])

        lab_state = get_label_state_at(t_ms)
        if lab_state is not None:
            rects = lab_state["rects"]
            x0 = rects[:, 0]
            y0 = rects[:, 1]
            w = rects[:, 2]
            h = rects[:, 3]
            inside = (mx >= x0) & (mx <= x0 + w) & (my >= y0) & (my <= y0 + h)
            if inside.any():
                mouse_inside_label_count += 1

        tr_state = get_track_state_at(t_ms)
        if tr_state is not None:
            pts = tr_state["points"]
            dx_t = pts[:, 0] - mx
            dy_t = pts[:, 1] - my
            dists_t = np.sqrt(dx_t * dx_t + dy_t * dy_t)
            if dists_t.min() <= radius_R:
                mouse_near_track_count += 1

    # ---------------- 7. Aggregate to final features ----------------
    time_on_labels_ms = float(time_on_labels_ms)
    frac_time_on_labels = float(time_on_labels_ms / total_time_ms) if total_time_ms > 0 else 0.0

    n_labels_looked_at = sum(
        dwell >= label_dwell_threshold_ms for dwell in labels_dwell_ms.values()
    )

    mean_dist_gaze_label = float(np.nanmean(gaze_min_dist_label)) if len(gaze_min_dist_label) > 0 else np.nan
    mean_dist_gaze_track = float(np.nanmean(gaze_min_dist_track)) if len(gaze_min_dist_track) > 0 else np.nan
    
    # Number of timestamps where the gaze was in one displayed label / number of timestamps with valid displayed labels
    gaze_inside_label_ratio = (
        gaze_inside_label_count / n_valid if n_valid > 0 else 0.0
    )

    # fraction of gaze samples where gaze was inside any label
    gaze_inside_selected_ratio = (
        gaze_inside_selected_count / n_valid if n_valid > 0 else 0.0 
    )
    
    # fraction of gaze samples where gaze was inside any hovered label
    gaze_inside_hovered_ratio = (
        gaze_inside_hovered_count / n_valid if n_valid > 0 else 0.0
    )

    # fraction of gaze samples where gaze was inside any selected label
    mouse_inside_label_ratio = (
        mouse_inside_label_count / n_mouse if n_mouse > 0 else 0.0
    )
    
    # Number of timestamps where the gaze was near a track displayed / number of timestamps where we have displayed tracks
    gaze_near_track_ratio = (
        gaze_near_track_count / n_valid if n_valid > 0 else 0.0
    )
    
    # Number of timestamps where the mouse was near a track displayed / number of timestamps where we observed a mouse movement
    mouse_near_track_ratio = (
        mouse_near_track_count / n_mouse if n_mouse > 0 else 0.0
    )

    out = {
        "frac_gaze_toolbar": frac_gaze_toolbar,
        "time_on_labels_ms": time_on_labels_ms,
        "frac_time_on_labels": frac_time_on_labels,
        "n_labels_looked_at": n_labels_looked_at,
        "mean_dist_gaze_label": mean_dist_gaze_label,
        "mean_dist_gaze_track": mean_dist_gaze_track,
        "gaze_inside_label_ratio": gaze_inside_label_ratio,
        "gaze_inside_selected_label_ratio": gaze_inside_selected_ratio,
        "gaze_inside_hovered_label_ratio": gaze_inside_hovered_ratio,
        "mouse_inside_label_ratio": mouse_inside_label_ratio,
        "gaze_near_track_ratio": gaze_near_track_ratio,
        "mouse_near_track_ratio": mouse_near_track_ratio,
    }

    return pd.DataFrame([out])


compute_gaze_mouse_label_track_features(chunks_et[key], asd_windows[key])

Gaze X min/max: 2.0 2456.0
Gaze Y min/max: 384.0 1980.0
Label X min/max: 70.0 3119.0
Label Y min/max: 47.0 1837.0


Unnamed: 0,frac_gaze_toolbar,time_on_labels_ms,frac_time_on_labels,n_labels_looked_at,mean_dist_gaze_label,mean_dist_gaze_track,gaze_inside_label_ratio,gaze_inside_selected_label_ratio,gaze_inside_hovered_label_ratio,mouse_inside_label_ratio,gaze_near_track_ratio,mouse_near_track_ratio
0,0.005175,232.0,0.015467,1,511.724713,169.858453,0.018111,0.000647,0.005175,0.53211,0.141009,0.174312


# Transfer features

## WARNING: ASD TRANSFER CONTAINS BOTH ASSUME AND TRANSFER TASK TYPES (1 and 12)

In [77]:
TRANSFER_TYPES = [
    'TRANSFER', 'ASSUME', 'FORCE_ASSUME', 'RELEASE', 'REJECT_TRANSFER', 
    'REQUEST_TRANSFER', 'CANCEL_TRANSFER', 'ACTIVATE_NEXT_SECTOR', 'FORCE_ACT', 
    'DECONTROL', 'TRANSFER_TO_NEXT_SECTOR', 'FORCE_RELEASE', 'ENABLE_AUTO_CONTROL',
    'TRANSFER_TO_ANY', 'MANUAL_OUTBOUND', 'MANUAL_INBOUND'
]

def compute_transfer_features_window(asd_window: pd.DataFrame) -> pd.DataFrame:
    """
    Aggregate transfer events over a time window into a single feature row.

    For each transfer type T in TRANSFER_TYPES, compute:
      - transfer_type_<T>_count
      - transfer_type_<T>_present  (0/1)

    Returns a 1-row DataFrame.
    """
    df_t = asd_window[asd_window["event_name"] == "transfer"].copy()

    # If no transfer events, just return zeros
    if df_t.empty:
        data = {}
        for t in TRANSFER_TYPES:
            base = f"transfer_type_{t}"
            data[f"{base}_count"] = 0
            data[f"{base}_present"] = 0
        return pd.DataFrame([data])

    # Count occurrences per type in this window
    counts = df_t["transfer_type_name"].value_counts().to_dict()

    features = {}
    for t in TRANSFER_TYPES:
        base = f"transfer_type_{t}"
        c = counts.get(t, 0)
        features[f"{base}_count"] = c
        features[f"{base}_present"] = 1 if c > 0 else 0

    return pd.DataFrame([features])

compute_transfer_features_window(asd_windows[key])

Unnamed: 0,transfer_type_TRANSFER_count,transfer_type_TRANSFER_present,transfer_type_ASSUME_count,transfer_type_ASSUME_present,transfer_type_FORCE_ASSUME_count,transfer_type_FORCE_ASSUME_present,transfer_type_RELEASE_count,transfer_type_RELEASE_present,transfer_type_REJECT_TRANSFER_count,transfer_type_REJECT_TRANSFER_present,...,transfer_type_FORCE_RELEASE_count,transfer_type_FORCE_RELEASE_present,transfer_type_ENABLE_AUTO_CONTROL_count,transfer_type_ENABLE_AUTO_CONTROL_present,transfer_type_TRANSFER_TO_ANY_count,transfer_type_TRANSFER_TO_ANY_present,transfer_type_MANUAL_OUTBOUND_count,transfer_type_MANUAL_OUTBOUND_present,transfer_type_MANUAL_INBOUND_count,transfer_type_MANUAL_INBOUND_present
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Popup features

Could be verz interesting as it corresponds to actual ATCO intentions

In [78]:
# One popup window is defined by the tuple (popup_name, popup_flight_track_number)
def compute_popup_durations(df_popup):
    """
    Compute durations for popup *windows*, where a window is identified by:
        (popup_name, popup_flight_track_number)

    Returns a list of tuples:
        (popup_name, popup_flight_track_number, duration_ms)
    """
    df_popup = df_popup.sort_values("epoch_ms")
    durations = []
    
    #(popup_name, flight_track) -> open_time_ms
    open_time = {}
    for _, row in df_popup.iterrows():
        name = row.popup_name
        flight_track = row.popup_flight_track_number
        t = row.epoch_ms
        
        key = (name, flight_track)

        if row.popup_opened == 1: 
            open_time[key] = t

        elif row.popup_opened == 0 and key in open_time:
            durations.append((name, flight_track, t - open_time[key]))
            del open_time[key]

    return durations

compute_popup_durations(asd_windows[key])

[('CFLMenu', 23.0, 1423), ('CFLMenu', 12.0, 1418)]

In [79]:
def compute_popup_features_window(asd_window: pd.DataFrame) -> pd.DataFrame:
    """Computes features related to popups:
    
    - n_popups_open
    - n_popups_close
    - popup_dwell_time (how long a popup stays open) -> mean, total, longest, open/close rythm
    - popup count per flight (average, max)
    - revisit rate of a same popup (average, max)
    - inter-popup time (average, max)
    - any_popup_opened (bool)
    - popup_overlap (at the same time)
    
    Returns a 1-row dataframe
    """
    
    df_p = asd_window[asd_window["event_name"] == "popup"].copy()
    features = {}
    
    if df_p.empty:
        features['n_popup_open'] = 0
        features['n_popup_close'] = 0
        features["popup_any"] = 0
        features["popup_overlap"] = 0
        features[f'popup_dwell_total_ms'] = 0
        features[f'popup_dwell_mean_ms'] = 0
        features[f'popup_dwell_max_ms'] = 0
        features[f'popup_per_flight_mean'] = 0
        features[f'popup_per_flight_max'] = 0
        features["popup_revisit_count"] = 0
        features["popup_inter_time_mean_ms"] = 0
        features["popup_inter_time_median_ms"] = 0
        features["popup_inter_time_std_ms"] = 0
        return pd.DataFrame([features])
    
    df_p = df_p.sort_values(['epoch_ms'])
    
    # Basic counts
    features['n_popup_open'] = (df_p.popup_opened == 1).sum()
    features['n_popup_close'] = (df_p.popup_opened == 0).sum()
    features["popup_any"] = 1
    features['popup_overlap'] = int(((df_p.popup_opened.replace(to_replace=0, value=-1).cumsum()) > 2).any()) # Count if there are two consecutive open without a closing between
    
    # Dwell time
    durations = compute_popup_durations(df_p)
    if durations:
        total = sum(d for _,_,d in durations)
        features['popup_dwell_total_ms'] = total
        features['popup_dwell_mean_ms'] = total / len(durations)
        features['popup_dwell_max_ms'] = max(d for _,_,d in durations)
    else:
        features['popup_dwell_total_ms'] = 0
        features['popup_dwell_mean_ms'] = 0
        features['popup_dwell_max_ms'] = 0
    
    # flight features
    nb_popup_per_flight = df_p.query('popup_opened == 1').groupby(['popup_flight_track_number']).size()
    features[f'popup_per_flight_mean'] = nb_popup_per_flight.mean()
    features[f'popup_per_flight_max'] = nb_popup_per_flight.max()
    
    # Revisit count
    revisit_count = 0
    # last open/close state: key = (popup_name, popup_flight_track_number)
    last_state = {}
    for _, row in df_p.iterrows():
        key = (row["popup_name"], row["popup_flight_track_number"])
        opened = int(row["popup_opened"])
        
        # revisit: previous state = closed (0), now opened (1)
        if key in last_state:
            if last_state[key] == 0 and opened == 1:
                revisit_count += 1
        last_state[key] = opened
    
    features["popup_revisit_count"] = revisit_count
    
    # Inter-popup time
    deltas = df_p["epoch_ms"].diff().dropna()
    if len(deltas) > 0:
        features["popup_inter_time_mean_ms"] = float(deltas.mean())
        features["popup_inter_time_median_ms"] = float(deltas.median())
        features["popup_inter_time_std_ms"] = float(deltas.std())
    else:
        features["popup_inter_time_mean_ms"] = 0.0
        features["popup_inter_time_median_ms"] = 0.0
        features["popup_inter_time_std_ms"] = 0.0
    
    return pd.DataFrame([features])

compute_popup_features_window(asd_windows[key])

Unnamed: 0,n_popup_open,n_popup_close,popup_any,popup_overlap,popup_dwell_total_ms,popup_dwell_mean_ms,popup_dwell_max_ms,popup_per_flight_mean,popup_per_flight_max,popup_revisit_count,popup_inter_time_mean_ms,popup_inter_time_median_ms,popup_inter_time_std_ms
0,2,3,1,0,2841,1420.5,1423,1.0,1,0,2228.5,2131.0,946.833495


# Clearance 

In [80]:
CLEARANCE_TYPES = ['cleared-flight-level', 'cleared-speed', 'direct-to',
                   'heading', 'route-clearance']

def compute_clearance_features_window(asd_window: pd.DataFrame) -> pd.DataFrame:
    """
    Aggregate clearance events over a time window into a single feature row.

    Inputs:
      - asd_window: ASD events for this time window (full frame, mixed event_name)

    Output:
      - 1-row DataFrame with features like:
          clearance_count
          clearance_unique_flights
          clearance_type_heading_count, clearance_type_heading_present, ...
          clearance_inter_event_mean_ms, ...
    """

    df = asd_window[asd_window["event_name"] == "clearance"].copy()
    features = {}

    # No clearances in this window
    if df.empty:
        features["clearance_count"] = 0
        features["clearance_unique_flights"] = 0
        features["clearance_any"] = 0

        # type features
        for ctype in CLEARANCE_TYPES:
            base = f"clearance_type_{ctype}"
            features[f"{base}_count"] = 0
            features[f"{base}_present"] = 0

        # timing features
        features["clearance_inter_event_mean_ms"] = 0.0
        features["clearance_inter_event_median_ms"] = 0.0
        features["clearance_inter_event_std_ms"] = 0.0

        return pd.DataFrame([features])

    # Sort chronologically
    df = df.sort_values("epoch_ms").reset_index(drop=True)

    # Basic counts
    features["clearance_count"] = len(df)
    features["clearance_any"] = 1

    # Flight-centric: use flight_track_number if available, else track_number
    flight_col = "clearance_flight_track_number"
    if flight_col not in df.columns or df[flight_col].isna().all():
        flight_col = "clearance_track_number"
    features["clearance_unique_flights"] = df[flight_col].nunique()

    # Per-type counts
    type_counts = df["clearance_type"].value_counts().to_dict()
    for ctype in CLEARANCE_TYPES:
        base = f"clearance_type_{ctype}"
        c = type_counts.get(ctype, 0)
        features[f"{base}_count"] = c
        features[f"{base}_present"] = 1 if c > 0 else 0

    # Timing features
    deltas = df["epoch_ms"].diff().dropna()
    if len(deltas) > 0:
        features["clearance_inter_event_mean_ms"] = float(deltas.mean())
        features["clearance_inter_event_median_ms"] = float(deltas.median())
        features["clearance_inter_event_std_ms"] = float(deltas.std())
    else:
        features["clearance_inter_event_mean_ms"] = 0.0
        features["clearance_inter_event_median_ms"] = 0.0
        features["clearance_inter_event_std_ms"] = 0.0

    # Optional: clearances per flight distribution
    per_flight_counts = df.groupby(flight_col).size()
    features["clearance_max_per_flight"] = int(per_flight_counts.max())
    features["clearance_mean_per_flight"] = float(per_flight_counts.mean())

    return pd.DataFrame([features])
    
compute_clearance_features_window(asd_windows[key])

Unnamed: 0,clearance_count,clearance_any,clearance_unique_flights,clearance_type_cleared-flight-level_count,clearance_type_cleared-flight-level_present,clearance_type_cleared-speed_count,clearance_type_cleared-speed_present,clearance_type_direct-to_count,clearance_type_direct-to_present,clearance_type_heading_count,clearance_type_heading_present,clearance_type_route-clearance_count,clearance_type_route-clearance_present,clearance_inter_event_mean_ms,clearance_inter_event_median_ms,clearance_inter_event_std_ms,clearance_max_per_flight,clearance_mean_per_flight
0,3,1,3,3,1,0,0,0,0,0,0,0,0,4457.0,4457.0,275.771645,1,1.0


# Questions for Gunnar:
- What is the referential for X/y values ? It is different fomr the gaze at it might be as high as 5000
- Why the mouse events are so rare ? Are they only refring to clicks, and not position ?
- track_screen = a/c location on the screen ? Can they have a visibility = 0 ?
- Why do we have more track labels than track screens ? not necessarily with the same track number ?
- Why transmitting the track label information if visible = 0?
- Why do we have the exact same label information but for 2 different timestamps ? It means that the bale is still diplayed as it is form one timestamp to another ?
- Can we assume that the label window for one aircraft doesn't really move over 15s ?
- Why for a same epoch we can observe several instance of track label refering to a same flight number ?
- Some events are badly recorded ? 
