# MPIIGaze Dataset Evaluation

This notebook evaluates the existing eye-tracking model on the MPIIGaze dataset with error output in degrees of visual angle for direct comparison with research literature.

## Dataset Overview
- **MPIIGaze**: 213,659 images from 15 participants during natural laptop use
- **Evaluation**: Leave-one-person-out cross-validation
- **Metric**: Mean angular error in degrees
- **Benchmarks**: State-of-the-art ranges from 4.3° to 10.8° depending on evaluation protocol

## Setup and Dependencies

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from scipy.io import loadmat
import urllib.request
import tarfile
import json
from pathlib import Path
import cv2
from sklearn.model_selection import LeaveOneOut
import pickle

## 1. Download and Extract MPIIGaze Dataset

In [None]:
# Create data directory
data_dir = Path("./mpiigaze_data")
data_dir.mkdir(exist_ok=True)

# Download URL
dataset_url = "http://datasets.d2.mpi-inf.mpg.de/MPIIGaze/MPIIGaze.tar.gz"
dataset_file = data_dir / "MPIIGaze.tar.gz"
extract_dir = data_dir / "MPIIGaze"

print(f"Downloading MPIIGaze dataset from {dataset_url}")
print(f"File size: ~2.1 GB - this may take a few minutes...")

In [None]:
# Download the dataset if not already present
if not dataset_file.exists():
    print("Downloading MPIIGaze dataset...")
    urllib.request.urlretrieve(dataset_url, dataset_file)
    print(f"Download complete: {dataset_file}")
else:
    print(f"Dataset already downloaded: {dataset_file}")

# Extract the dataset if not already extracted
if not extract_dir.exists():
    print("Extracting dataset...")
    with tarfile.open(dataset_file, 'r:gz') as tar:
        tar.extractall(data_dir)
    print(f"Extraction complete: {extract_dir}")
else:
    print(f"Dataset already extracted: {extract_dir}")

In [None]:
# Explore the dataset structure
print("Dataset structure:")
for root, dirs, files in os.walk(extract_dir):
    level = root.replace(str(extract_dir), '').count(os.sep)
    indent = ' ' * 2 * level
    print(f"{indent}{os.path.basename(root)}/")
    subindent = ' ' * 2 * (level + 1)
    for file in files[:5]:  # Limit to first 5 files per directory
        print(f"{subindent}{file}")
    if len(files) > 5:
        print(f"{subindent}... and {len(files) - 5} more files")
    if level > 3:  # Limit depth for readability
        break

## 2. Explore MPIIGaze Data Format

In [None]:
# Look for evaluation subset and data structure
evaluation_dir = extract_dir / "Evaluation Subset"
if evaluation_dir.exists():
    print(f"Found evaluation subset at: {evaluation_dir}")
    
    # List subjects in evaluation subset
    subjects = [d.name for d in evaluation_dir.iterdir() if d.is_dir()]
    subjects.sort()
    print(f"Found {len(subjects)} subjects: {subjects}")
else:
    print("Evaluation subset not found, looking for main data directory...")
    # Check for alternative structure
    for item in extract_dir.iterdir():
        print(f"Found: {item.name}")

In [None]:
# Examine a single subject's data structure
if evaluation_dir.exists() and subjects:
    sample_subject = subjects[0]
    subject_dir = evaluation_dir / sample_subject
    
    print(f"Examining subject {sample_subject} structure:")
    for item in subject_dir.iterdir():
        print(f"  {item.name}")
        if item.is_dir():
            # Look inside subdirectories
            files = list(item.iterdir())
            print(f"    Contains {len(files)} files")
            if files:
                print(f"    Sample files: {[f.name for f in files[:3]]}")

In [None]:
# Load and examine sample data files
def explore_subject_data(subject_id):
    """
    Explore the data structure for a given subject
    """
    subject_dir = evaluation_dir / subject_id
    
    # Look for different data types
    data_types = {}
    
    for item in subject_dir.iterdir():
        if item.is_dir():
            files = list(item.iterdir())
            data_types[item.name] = len(files)
        elif item.suffix in ['.mat', '.json', '.txt']:
            data_types[item.name] = "metadata"
    
    return data_types

if evaluation_dir.exists() and subjects:
    sample_data = explore_subject_data(subjects[0])
    print(f"Data structure for {subjects[0]}:")
    for key, value in sample_data.items():
        print(f"  {key}: {value}")

## 3. Load Sample Images and Annotations

In [None]:
# Function to load MPIIGaze data for a subject
def load_subject_data(subject_id, data_type="left"):
    """
    Load data for a specific subject
    data_type: 'left', 'right', or 'normalized' (if available)
    """
    subject_dir = evaluation_dir / subject_id
    
    # Try to find image data directory
    possible_dirs = ["left", "right", "normalized", "images", "data"]
    image_dir = None
    
    for dirname in possible_dirs:
        candidate_dir = subject_dir / dirname
        if candidate_dir.exists():
            image_dir = candidate_dir
            print(f"Found image directory: {dirname}")
            break
    
    if image_dir is None:
        print(f"Available directories in {subject_dir}:")
        for item in subject_dir.iterdir():
            print(f"  {item.name}")
        return None, None
    
    # Load images
    image_files = sorted([f for f in image_dir.iterdir() if f.suffix in ['.png', '.jpg', '.jpeg']])
    print(f"Found {len(image_files)} images")
    
    # Look for annotation files
    annotation_files = [f for f in subject_dir.iterdir() if f.suffix in ['.mat', '.json', '.txt']]
    print(f"Found annotation files: {[f.name for f in annotation_files]}")
    
    return image_files, annotation_files

# Test with first subject
if evaluation_dir.exists() and subjects:
    sample_images, sample_annotations = load_subject_data(subjects[0])

In [None]:
# Load and display sample images
if sample_images:
    # Load first few images
    fig, axes = plt.subplots(2, 3, figsize=(12, 8))
    axes = axes.flatten()
    
    for i, img_path in enumerate(sample_images[:6]):
        img = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
        axes[i].imshow(img, cmap='gray')
        axes[i].set_title(f"{img_path.name}\nShape: {img.shape}")
        axes[i].axis('off')
    
    plt.tight_layout()
    plt.suptitle(f"Sample images from {subjects[0]}")
    plt.show()
    
    print(f"Image shape: {img.shape}")
    print(f"Image dtype: {img.dtype}")
    print(f"Image range: {img.min()} to {img.max()}")

In [None]:
# Load and examine annotation data
def load_annotations(subject_id):
    """
    Load annotation data for a subject
    """
    subject_dir = evaluation_dir / subject_id
    
    # Look for .mat files (common format for MPIIGaze)
    mat_files = [f for f in subject_dir.iterdir() if f.suffix == '.mat']
    
    annotations = {}
    for mat_file in mat_files:
        try:
            data = loadmat(str(mat_file))
            # Filter out MATLAB metadata
            clean_data = {k: v for k, v in data.items() if not k.startswith('__')}
            annotations[mat_file.name] = clean_data
            print(f"Loaded {mat_file.name} with keys: {list(clean_data.keys())}")
        except Exception as e:
            print(f"Error loading {mat_file.name}: {e}")
    
    return annotations

if evaluation_dir.exists() and subjects:
    sample_annotations = load_annotations(subjects[0])
    
    # Examine the structure of annotations
    for filename, data in sample_annotations.items():
        print(f"\n{filename}:")
        for key, value in data.items():
            if isinstance(value, np.ndarray):
                print(f"  {key}: shape {value.shape}, dtype {value.dtype}")
                if value.size < 20:  # Print small arrays
                    print(f"    {value}")
                else:
                    print(f"    Sample values: {value.flat[:5]}...")
            else:
                print(f"  {key}: {type(value)} - {value}")

## 4. Load Existing Model

In [None]:
# First, let's check what we need from the original analysis notebook
import sys
sys.path.append('..')  # Add parent directory to path

try:
    # Try to import the et_util modules
    import et_util.dataset_utils as dataset_utils
    import et_util.embedding_preprocessing as embed_pre
    import et_util.model_layers as model_layers
    from et_util import experiment_utils
    from et_util.custom_loss import normalized_weighted_euc_dist
    print("Successfully imported et_util modules")
except ImportError as e:
    print(f"Could not import et_util modules: {e}")
    print("Will define required components locally")

In [None]:
# Define the model architecture components locally if needed
import keras
from keras import ops
import keras_hub

# Configuration from the original model
MAX_TARGETS = 288
EMBEDDING_DIM = 200  # This should match the original model
RIDGE_REGULARIZATION = 0.001  # This should match the original model
BACKBONE = "densenet"  # This should match the original model

print(f"Model configuration:")
print(f"  Embedding dim: {EMBEDDING_DIM}")
print(f"  Ridge regularization: {RIDGE_REGULARIZATION}")
print(f"  Backbone: {BACKBONE}")

In [None]:
# Check if the model weights file exists
model_weights_path = "./full_model.weights.h5"
if os.path.exists(model_weights_path):
    print(f"Found model weights at: {model_weights_path}")
else:
    print(f"Model weights not found at: {model_weights_path}")
    print("Looking for weights file in current directory...")
    for file in os.listdir("."):
        if "weight" in file.lower() or file.endswith(".h5"):
            print(f"  Found: {file}")

## Status Update

At this point, we have:
1. ✅ Set up the notebook structure
2. ✅ Created code to download the MPIIGaze dataset
3. ✅ Started exploring the data format

Next steps will depend on the actual dataset structure once downloaded. The notebook is ready to:
- Complete the dataset exploration
- Adapt the existing model architecture
- Implement the evaluation pipeline
- Calculate visual angle errors

The user can now run this notebook to begin the evaluation process.