In [None]:
%load_ext dotenv
%dotenv

In [None]:
from os import environ
from pathlib import Path
from pprint import pprint
from tqdm import tqdm
from tempfile import TemporaryDirectory
from subprocess import run
from concurrent.futures import ThreadPoolExecutor, as_completed

import polars as pl

from eott_dataset import *

# %env set EOTT_DATASET_PATH
# %env set GAZEFILTER_BIN_PATH

bin_name = "gfvio"

ds_dir = get_dataset_root()
print("dataset:", ds_dir)

gf_dir = Path(environ["GAZEFILTER_BIN_PATH"])
exe_path = str(gf_dir.joinpath(bin_name).with_suffix(".exe"))
print("gazefilter:", gf_dir)

In [None]:
ds = EyeTyperDataset(ds_dir)
ds

In [None]:
def mouse_calibration_points(ds: EyeTyperDataset, pid: int, record: int):
    q = ds.scan("timeline").filter(pid=pid, record=record)

    # offset = q.filter(source="log").select("offset").first().collect()[0, 0]
    q = q.filter(source="mouse")  # .with_columns(pl.col("offset") - offset)

    return pl.concat(
        [
            q.select(id=pl.col("frame")),
            ds.scan("mouse")
            .filter(pid=pid, record=record)
            .select(
                x=pl.col("mouse").struct.field("x"),
                y=pl.col("mouse").struct.field("y"),
                w=pl.when(pl.col("event") == "click")
                .then(pl.lit(1.0))
                .otherwise(pl.lit(0.5)),
            ),
        ],
        how="horizontal",
    ).drop_nulls()


def pair(dtype) -> pl.Struct:
    return pl.Struct([pl.Field(side, dtype) for side in ("left", "right")])


def pair_struct(**kwargs) -> pl.Struct:
    return pair(pl.Struct(kwargs))


def gazefilter_dataframe_json(path: Path):
    lf = pl.scan_ndjson(
        path,
        schema={
            "trackTime": pl.Float64,
            "trackEvent": pl.Int64,
            "frameOffset": pl.List(pl.Int64),
            "frameOrigin": pl.List(pl.Int64),
            "cameraFocal": pl.List(pl.Float64),
            "screenOrigin": pl.List(pl.Int64),
            "faceTarget": pl.List(pl.Float64),
            "facePosition": pl.List(pl.Float64),
            "faceRotation": pl.List(pl.Float64),
            "eyeTarget": pair(pl.List(pl.Float64)),
            "eyeAspectRatio": pair(pl.Float64),
            "pupilConfidence": pair(pl.Float64),
            "pupilCenter": pair(pl.List(pl.Float64)),
            "calibConfidence": pair(pl.Float64),
            "calibShift": pl.List(pl.Float64),
            "calibEdge": pl.List(pl.Float64),
            "gazeEvent": pl.Int64,
            "gazeDuration": pl.Float64,
            "gazePoint": pair(pl.List(pl.Float64)),
            "gazeBestPoint": pl.List(pl.Float64),
            "fixationPoint": pl.List(pl.Float64),
            "faceShape": pl.List(pl.List(pl.Float64)),
        },
        infer_schema_length=0,
    )
    lf = lf.select(
        timestamp=pl.col("trackTime").cast(pl.Duration("ms")),
        tracking=pl.col("trackEvent").cast(pl.Int8),
        offset=pl.col("frameOffset").cast(pl.Array(pl.Int32, 2)),
        origin=pl.col("frameOrigin").cast(pl.Array(pl.UInt32, 2)),
        focals=pl.col("cameraFocal").cast(pl.Array(pl.Float32, 2)),
        screen=pl.col("screenOrigin").cast(pl.Array(pl.UInt32, 2)),
        target=pl.col("faceTarget").cast(pl.Array(pl.Float32, 3)),
        position=pl.col("facePosition").cast(pl.Array(pl.Float32, 3)),
        rotation=pl.col("faceRotation").cast(pl.Array(pl.Float32, 3)),
        eyes=pl.col("eyeTarget").cast(pair(pl.Array(pl.Float32, 3))),
        ear=pl.col("eyeAspectRatio").cast(pair(pl.Float32)),
        pupilconf=pl.col("pupilConfidence").cast(pair(pl.Float32)),
        pupils=pl.col("pupilCenter").cast(pair(pl.Array(pl.Float32, 2))),
        calibconf=pl.col("calibConfidence").cast(pair(pl.Float32)),
        calibshift=pl.col("calibShift").cast(pl.Array(pl.Float32, 3)),
        calibedge=pl.col("calibEdge").cast(pl.Array(pl.Float32, 3)),
        gazing=pl.col("gazeEvent").cast(pl.Int8),
        gazedur=pl.col("gazeDuration").cast(pl.Duration("ms")),
        gazepoint=pl.col("gazePoint").cast(pair(pl.Array(pl.Float32, 2))),
        gazebest=pl.col("gazeBestPoint").cast(pl.Array(pl.Float32, 2)),
        fixation=pl.col("fixationPoint").cast(pl.Array(pl.Float32, 2)),
        faceshape=pl.col("faceShape").cast(pl.List(pl.Array(pl.Float32, 2))),
    )
    lf = lf.with_columns(
        pl.col("offset").arr.to_struct(fields=["x", "y"]),
        pl.col("origin").arr.to_struct(fields=["x", "y"]),
        pl.col("focals").arr.to_struct(fields=["x", "y"]),
        pl.col("screen").arr.to_struct(fields=["x", "y"]),
        pl.col("target").arr.to_struct(fields=["x", "y", "s"]),
        pl.col("position").arr.to_struct(fields=["x", "y", "z"]),
        pl.col("rotation").arr.to_struct(fields=["roll", "pitch", "yaw"]),
        pl.struct(
            left=pl.col("eyes")
            .struct.field("left")
            .arr.to_struct(fields=["x", "y", "s"]),
            right=pl.col("eyes")
            .struct.field("right")
            .arr.to_struct(fields=["x", "y", "s"]),
        ).alias("eyes"),
        pl.struct(
            left=pl.col("pupils").struct.field("left").arr.to_struct(fields=["x", "y"]),
            right=pl.col("pupils")
            .struct.field("right")
            .arr.to_struct(fields=["x", "y"]),
        ).alias("pupils"),
        pl.col("calibshift").arr.to_struct(fields=["x", "y", "z"]),
        pl.col("calibedge").arr.to_struct(fields=["x", "y", "z"]),
        pl.struct(
            left=pl.col("gazepoint")
            .struct.field("left")
            .arr.to_struct(fields=["x", "y"]),
            right=pl.col("gazepoint")
            .struct.field("right")
            .arr.to_struct(fields=["x", "y"]),
        ).alias("gazepoint"),
        pl.col("gazebest").arr.to_struct(fields=["x", "y"]),
        pl.col("fixation").arr.to_struct(fields=["x", "y"]),
        pl.col("faceshape").list.eval(pl.element().arr.to_struct(fields=["x", "y"])),
    )
    return lf.with_row_index("index")


def run_gazefilter(src: Path, dst: Path):
    return run([exe_path, str(src), "--output", str(dst), "--ignore-timestamps"])


def parse_stem(s: str) -> tuple[int, int, int]:
    it = map(lambda v: v[1:], s.split("_"))
    pid, record, aux = map(int, it)
    return pid, record, aux


def unparse_stem(pid: int, record: int, aux: int) -> str:
    return f"P{pid:>02d}_R{record}_A{aux}"


def unpack_webcam_files(lf: pl.LazyFrame, dst_path: Path):
    assert dst_path.is_dir()

    q = lf.select("pid", "record", "aux", "file")
    pid: int
    record: int
    aux: int
    payload: bytes
    for pid, record, aux, payload in q.collect().iter_rows():
        # payload = lf.filter(pid=pid, record=record, aux=aux).collect()["file"][0]
        file_path = dst_path / unparse_stem(pid, record, aux)
        file_path = file_path.with_suffix(".mp4")

        with file_path.open("wb") as f:
            f.write(payload)

        yield file_path


def get_output_path(root: Path, name: str):
    return root.joinpath(name).with_suffix(".jsonl")

In [None]:
lf = ds.scan("webcam")

total = lf.select("record").count().collect()[0, 0]
assert isinstance(total, int)

with (
    tqdm(total=total, desc="gazefilter") as progress,
    TemporaryDirectory(prefix="eott_gazefilter_", delete=False) as output_dir,
):
    output_dir = Path(output_dir)

    q = ds.scan("form").select("pid").unique(maintain_order=True)
    pids: list[int] = q.collect()["pid"].to_list()

    for pid in pids:
        dir_prefix = f"eott_videos_p{pid:>02d}"

        with (
            TemporaryDirectory(prefix=dir_prefix, delete=True) as video_dir,
            ThreadPoolExecutor(max_workers=8) as executor,
        ):
            video_dir = Path(video_dir)
            q = lf.filter(pid=pid)

            futures = [
                executor.submit(
                    run_gazefilter, path, get_output_path(output_dir, path.name)
                )
                for path in unpack_webcam_files(q, video_dir)
            ]

            for future in as_completed(futures):
                future.result()
                progress.update()
        # break

    lfs: list[pl.LazyFrame] = []
    for path in output_dir.glob("*.jsonl"):
        pid, record, aux = parse_stem(path.stem)
        lf = gazefilter_dataframe_json(path)
        lf = lf.select(
            pl.lit(pid, pl.UInt8).alias("pid"),
            pl.lit(record, pl.UInt8).alias("record"),
            pl.lit(aux, pl.UInt8).alias("aux"),
            pl.all()
        )
        lfs.append(lf)

    lf = pl.concat(lfs).sort("pid", "record", "aux", "index")
    df = lf.collect()

df.write_parquet(ds_dir / "gazefilter.parquet", compression="lz4")

In [None]:
df

In [None]:
output_dir = Path("~/Documents/eott_samples").expanduser()

pid, record = 10, 3
path = output_dir / f"{pid:02d}_{record}"

ds.extract_webcam_recording(path.with_suffix(".mp4"), pid=pid, record=record, aux=0)
mouse_calibration_points(ds, pid=pid, record=record).collect().write_csv(path.with_suffix(".targets.csv"))