In [1]:
import pandas as pd
import numpy as np
import os

import json
from pathlib import Path

os.chdir('/cluster/home/kruu/git/eye_tracking/')
# os.chdir('/home/kruu/git_folder/eye_tracking/')
from utils.data_processing import EyeTrackingProcessor

import torch
from torch.utils.data import DataLoader
from models.lstm_classifier import LSTMClassifier, train_classifier, split_by_participant, export_to_onnx
from utils.dataset import GazeMouseDataset

****
# Import Datasets
****

In [2]:
# data_path = "/store/kruu/eye_tracking"
data_path = "/cluster/home/kruu/store/eye_tracking"
files_list = os.listdir(data_path)
files_list = [os.path.join(data_path, file) for file in files_list]

tasks = ['Task 1', 'Task 2', 'Task 3', 'Task 4', 'Task 5', 'Task 6']
features = ['Recording timestamp', 'Gaze point X', 'Gaze point Y', 'Mouse position X', 'Mouse position Y', 'Event', 'Participant name']
interpolate_col = ['Recording timestamp', 'Gaze point X', 'Gaze point Y', 'Mouse position X', 'Mouse position Y', 'Blink']

processor = EyeTrackingProcessor()
all_data = processor.load_data(files_list)
dataset = processor.get_features(all_data, tasks, features)
dataset, blinks = processor.detect_blinks(dataset)

# Fixed Time step resampling
dataset_time_resampled = processor.resample_tasks_fixed_time(dataset, interpolate_col, timestep = 0.01)
dataset_time_resampled.Blink = (dataset_time_resampled.Blink > 0.5) #Transform interpolated data
# dataset_time_resampled = processor.pad_tasks(dataset_time_resampled) # No need to pad here
dataset_time_resampled["id"] = dataset_time_resampled["Participant name"].astype(str) + "_" + dataset_time_resampled["Task_id"].astype(str) + "_" + dataset_time_resampled["Task_execution"].astype(str)

  df = pd.read_csv(path, sep='\t')
  df = pd.read_csv(path, sep='\t')
  df = pd.read_csv(path, sep='\t')
  df = pd.read_csv(path, sep='\t')


In [3]:
dataset_time_resampled["Mouse position X"] = dataset_time_resampled["Mouse position X"].ffill().bfill()
dataset_time_resampled["Mouse position Y"] = dataset_time_resampled["Mouse position Y"].ffill().bfill()
dataset_time_resampled["Gaze point X"] = dataset_time_resampled["Gaze point X"].ffill().bfill()
dataset_time_resampled["Gaze point Y"] = dataset_time_resampled["Gaze point Y"].ffill().bfill()

In [4]:
# add information about movements
dataset_time_resampled["Gaze X delta"] = dataset_time_resampled.groupby("id")["Gaze point X"].diff().fillna(0)
dataset_time_resampled["Gaze Y delta"] = dataset_time_resampled.groupby("id")["Gaze point Y"].diff().fillna(0)
dataset_time_resampled["Mouse X delta"] = dataset_time_resampled.groupby("id")["Mouse position X"].diff().fillna(0)
dataset_time_resampled["Mouse Y delta"] = dataset_time_resampled.groupby("id")["Mouse position Y"].diff().fillna(0)


****
# Classifier
****

In [None]:
features = ['Relative timestamp', 'Gaze point X', 'Gaze point Y', 'Mouse position X', 'Mouse position Y', 'Gaze X delta', 'Gaze Y delta', 'Mouse X delta', 'Mouse Y delta','Blink']

input_dim = len(features)
hidden_dim = 128
num_classes = 6
num_layers = 1
batch_size = 32
lr = 0.001
num_epochs = 100


train_df, val_df, test_df = split_by_participant(dataset_time_resampled, val_split=0.2, test_split=0.1)

model, mean, std, best_ckpt_path = train_classifier(train_df,
                                    val_df,
                                    features,
                                    batch_size=batch_size,
                                    hidden_dim=hidden_dim,
                                    num_layers=num_layers,
                                    learning_rate=lr,
                                    num_epochs=num_epochs)

export_dir = Path(best_ckpt_path).parent.parent
onnx_path = os.path.join(export_dir,"best_lstm_classifier.onnx")

export_to_onnx(ckpt_path=best_ckpt_path, 
               export_path=onnx_path,
               input_dim=input_dim, 
               hidden_dim=hidden_dim,
               num_classes=num_classes,
               num_layers=num_layers,
               sequence_len=10000)

metadata = {
    "features": features,
    "mean": {k: float(v) for k, v in mean.to_dict().items()},
    "std": {k: float(v) for k, v in std.to_dict().items()},
    "train_ids": train_df["id"],
    "val_ids": val_df["id"],
    "test_ids": test_df["id"]
}

# Drop duplicates to get unique sequence-level IDs
metadata["train_ids"] = metadata["train_ids"].drop_duplicates().tolist()
metadata["val_ids"] = metadata["val_ids"].drop_duplicates().tolist()
metadata["test_ids"] = metadata["test_ids"].drop_duplicates().tolist()

# Save to JSON
metadata_path = os.path.join(export_dir,"model_metadata.json")
with open(metadata_path, "w") as f:
    json.dump(metadata, f, indent=4)

print(f"✅ Metadata saved to {metadata_path}")

/raid/persistent_scratch/kruu/venvs/aware/lib/python3.12/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /raid/persistent_scratch/kruu/venvs/aware/lib/python ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mkruu[0m ([33mzhaw_zav[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/raid/persistent_scratch/kruu/venvs/aware/lib/python3.12/site-packages/pytorch_lightning/core/optimizer.py:378: Found unsupported keys in the optimizer configuration: {'gradient_clip_val'}

  | Name        | Type             | Params | Mode 
---------------------------------------------------------
0 | conv1       | Conv1d           | 2.0 K  | train
1 | relu        | ReLU             | 0      | train
2 | conv2       | Conv1d           | 24.7 K | train
3 | dropout_cnn | Dropout          | 0      | train
4 | lstm        | LSTM             | 264 K  | train
5 | dropout     | Dropout          | 0      | train
6 | fc          | Linear           | 1.5 K  | train
7 | criterion   | CrossEntropyLoss | 0      | train
---------------------------------------------------------
292 K     Trainable params
0         Non-trainable params
292 K     Total params
1.170     Total estimated model params size (MB)
8         Modules in train mode
0         Modules in e

Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

/raid/persistent_scratch/kruu/venvs/aware/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


                                                                           

/raid/persistent_scratch/kruu/venvs/aware/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Epoch 99: 100%|██████████| 15/15 [00:04<00:00,  3.04it/s, v_num=nio2, train_loss_step=0.831, val_loss=2.340, train_loss_epoch=0.676]

`Trainer.fit` stopped: `max_epochs=100` reached.


Epoch 99: 100%|██████████| 15/15 [00:04<00:00,  3.03it/s, v_num=nio2, train_loss_step=0.831, val_loss=2.340, train_loss_epoch=0.676]




✅ Model exported to GazeMouse_Classification/bwkjnio2/best_lstm_classifier.onnx
✅ Metadata saved to GazeMouse_Classification/bwkjnio2/model_metadata.json


****
# Analysis
****

In [None]:
from models.lstm_classifier import evaluate_onnx_model

metadata_path = '/cluster/home/kruu/git/eye_tracking/GazeMouse_Classification/bwkjnio2/model_metadata.json'
onnx_path = '/cluster/home/kruu/git/eye_tracking/GazeMouse_Classification/bwkjnio2/best_lstm_classifier.onnx'


with open(metadata_path, "r") as f:
    metadata = json.load(f)

res = evaluate_onnx_model(onnx_path, 
                          dataset_time_resampled.query(f"id in {metadata["test_ids"]}"),
                          metadata["features"],
                          metadata["mean"], 
                          metadata["std"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset['Relative timestamp'] = dataset.groupby('id')['Recording timestamp'].transform(lambda x: x - x.min())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset[features] = (dataset[features] - self.mean) / self.std
[1;31m2025-04-07 14:56:50.319011796 [E:onnxruntime:Default, env.cc:234 ThreadMain] pthread_setaffinity_np failed for thread: 701746, index: 2, mask: {3, 43, }, error code: 22 error msg: Invalid argument. Specify the number of threads explicitly so the affinity is not set.[m
[1;31m2025-04-07 14