# MultiplEYE preprocessing

In [1]:
from pathlib import Path

data_dir = Path("../data/pilot-hr-1-zh")
stim_dir = Path("../data/stimuli_MultiplEYE_HR_CH_Zurich_1_2025")
output_dir = Path("output")
output_dir.mkdir(exist_ok=True)

## EDF to ASC

Use the `edf2asc` binary from the [EyeLink Developers Kit](https://www.sr-research.com/support/thread-13.html) to convert EDF files to ASC files.

To discuss:
- We probably can't distribute the binary due to licensing issues. (But we might be able to distribute a Docker image?)
- There is already an [issue](https://github.com/aeye-lab/pymovements/issues/509) to integrate this into `pymovements`.
- The `-input` option is unnecessary, but currently required by `parse_eyelink()` in `pymovements`.
- The `-ftime` option would make handling 2kHz ASC files easier, but `pymovements` doesn't support floating point timestamps yet (https://github.com/aeye-lab/pymovements/pull/739).

In [2]:
import subprocess

edf = data_dir / "ch1hr007.edf"
subprocess.run(["./edf2asc", edf, "-input", "-ftime", "-p", output_dir, "-y"])


EDF2ASC: EyeLink EDF file -> ASCII (text) file translator
EDF2ASC version 4.2.762.0 Linux   standalone Jul 20 2023 
(c)1995-2023 by SR Research, last modified Jul 20 2023

processing file ../data/pilot-hr-1-zh/ch1hr007.edf 
loadEvents = 1
| DATE: Wed Jun 19 17:49:30 2024                                              |
| TYPE: EDF_FILE BINARY EVENT SAMPLE TAGGED                                   |
| VERSION: EYELINK II 1                                                       |
| SOURCE: EYELINK CL                                                          |
| EYELINK II CL v6.14 Mar  6 2020 (EyeLink Portable Duo)                      |
| CAMERA: EyeLink USBCAM Version 1.01                                         |
| SERIAL NUMBER: CLU-DBE08                                                    |
| CAMERA_CONFIG: DBE08200.SCD                                                 |
| RECORDED BY libeyelink.py                                                   |

missing sample at 819712.500000 
missin

CompletedProcess(args=['./edf2asc', PosixPath('../data/pilot-hr-1-zh/ch1hr007.edf'), '-input', '-ftime', '-p', PosixPath('output'), '-y'], returncode=255)

ample at 2675320.500000 
missing sample at 2689794.500000 
missing sample at 2692863.500000 
missing sample at 2720722.500000 
missing sample at 2822118.500000 
missing sample at 3045886.500000 
missing sample at 3106620.500000 
missing sample at 3188916.500000 
missing sample at 3206301.500000 
missing sample at 3391546.500000 
missing sample at 3420093.500000 
missing sample at 3426127.500000 
missing sample at 3578011.500000 
missing sample at 3602479.500000 
missing sample at 3645916.500000 
missing sample at 3664998.500000 
missing sample at 3683847.500000 
missing sample at 3787312.500000 
missing sample at 3818034.500000 
missing sample at 3859301.500000 
missing sample at 3864391.500000 
missing sample at 3905822.500000 
missing sample at 3981997.500000 
Converted successfully: 26935 events, 3220492 samples, 404 blocks.


## ASC to sample-level CSV

Convert the ASC files to CSV files (one for each page) where each row is a sample.

### Parse ASC file

In [6]:
import csv

import polars as pl
import pymovements as pm

asc = output_dir / "ch1hr007.asc"

experiment = pm.Experiment(
    sampling_rate=2000,
    screen_width_px=1275,
    screen_height_px=916,
    screen_width_cm=37,
    screen_height_cm=28,
    distance_cm=60,
)
data = pm.gaze.from_asc(
    asc,
    patterns=[
        r"start_recording_(?P<trial>(?:PRACTICE_)?trial_\d+)_(?P<screen>.+)",
        {"pattern": r"stop_recording_", "column": "trial", "value": None},
        {"pattern": r"stop_recording_", "column": "screen", "value": None},
        {
            "pattern": r"start_recording_(?:PRACTICE_)?trial_\d+_page_\d+",
            "column": "activity",
            "value": "reading",
        },
        {
            "pattern": r"start_recording_(?:PRACTICE_)?trial_\d+_question_\d+",
            "column": "activity",
            "value": "question",
        },
        {
            "pattern": r"start_recording_(?:PRACTICE_)?trial_\d+_(familiarity_rating_screen_\d+|subject_difficulty_screen)",
            "column": "activity",
            "value": "rating",
        },
        {"pattern": r"stop_recording_", "column": "activity", "value": None},
        {
            "pattern": r"start_recording_PRACTICE_trial_",
            "column": "practice",
            "value": True,
        },
        {
            "pattern": r"start_recording_trial_",
            "column": "practice",
            "value": False,
        },
        {"pattern": r"stop_recording_", "column": "practice", "value": None},
    ],
    trial_columns=["trial", "screen"],
    experiment=experiment,
)
data.frame

time,pupil,practice,trial,activity,screen,pixel
f64,f64,bool,str,str,str,list[f64]
709376.0,206.0,,,,,"[102.2, 101.4]"
709376.5,210.0,,,,,"[104.1, 99.8]"
709377.0,206.0,,,,,"[99.1, 106.5]"
709377.5,204.0,,,,,"[85.2, 103.3]"
709378.0,202.0,,,,,"[98.2, 106.7]"
709378.5,209.0,,,,,"[100.2, 91.7]"
709379.0,203.0,,,,,"[97.9, 96.4]"
709379.5,206.0,,,,,"[109.3, 103.4]"
709380.0,203.0,,,,,"[93.6, 105.4]"
709380.5,204.0,,,,,"[94.6, 103.7]"


### Map trial numbers to stimulus IDs

In [7]:
stimulus_ids = {}
with open(data_dir / "logfiles" / "completed_stimuli.csv") as f:
    reader = csv.DictReader(f)
    for i, row in enumerate(reader):
        stimulus_ids[f"trial_{i + 1}"] = row["stimulus_id"]

df = data.frame.with_columns(
    pl.col("trial").replace(stimulus_ids).alias("stimulus_id")
)
df

time,pupil,practice,trial,activity,screen,pixel,stimulus_id
f64,f64,bool,str,str,str,list[f64],str
709376.0,206.0,,,,,"[102.2, 101.4]",
709376.5,210.0,,,,,"[104.1, 99.8]",
709377.0,206.0,,,,,"[99.1, 106.5]",
709377.5,204.0,,,,,"[85.2, 103.3]",
709378.0,202.0,,,,,"[98.2, 106.7]",
709378.5,209.0,,,,,"[100.2, 91.7]",
709379.0,203.0,,,,,"[97.9, 96.4]",
709379.5,206.0,,,,,"[109.3, 103.4]",
709380.0,203.0,,,,,"[93.6, 105.4]",
709380.5,204.0,,,,,"[94.6, 103.7]",


### Write separate CSVs for each page

Convert [x, y] pixel column to separate pixel_x and piyel_y columns. This is necessary because polars does not support nested values when exporting CSV.

In [8]:
df = df.select(
    [
        pl.all().exclude("pixel"),
        pl.col("pixel").list.get(0).alias("pixel_x"),
        pl.col("pixel").list.get(1).alias("pixel_y"),
    ]
)
df

time,pupil,practice,trial,activity,screen,stimulus_id,pixel_x,pixel_y
f64,f64,bool,str,str,str,str,f64,f64
709376.0,206.0,,,,,,102.2,101.4
709376.5,210.0,,,,,,104.1,99.8
709377.0,206.0,,,,,,99.1,106.5
709377.5,204.0,,,,,,85.2,103.3
709378.0,202.0,,,,,,98.2,106.7
709378.5,209.0,,,,,,100.2,91.7
709379.0,203.0,,,,,,97.9,96.4
709379.5,206.0,,,,,,109.3,103.4
709380.0,203.0,,,,,,93.6,105.4
709380.5,204.0,,,,,,94.6,103.7


Split data into CSV files.

In [11]:
csv_dir = output_dir / "007"
csv_dir.mkdir(exist_ok=True)

stimulus_screen_practice = df[["stimulus_id", "screen", "practice"]].unique()
for stimulus_id, screen, practice in stimulus_screen_practice.iter_rows():
    if stimulus_id is not None:
        screen_df = df.filter((pl.col("stimulus_id") == stimulus_id) & (pl.col("screen") == screen))
        screen_df = screen_df.select([
            pl.col("time"),
            pl.col("pixel_x"),
            pl.col("pixel_y"),
            pl.col("pupil"),
        ])
        if practice:
            screen_df.write_csv(csv_dir / f"{stimulus_id}__{screen}.csv")
        else:
            screen_df.write_csv(csv_dir / f"stimulus_{stimulus_id}__{screen}.csv")