In [None]:
# imports
from pathlib import Path
import pandas as pd
import re
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import os
from sklearn.preprocessing import LabelEncoder

In [None]:
# prevent my poor mac from overheating
os.environ["OMP_NUM_THREADS"] = "4" 

In [None]:
# load image metadata
DATA_ROOT = Path("~/Documents/00_210/data/columbia_gaze_dataset").expanduser()

rows = []

# Regex for filename parsing
pattern = re.compile(
    r"(?P<subject>\d+)_"
    r"(?P<distance>\d+)m_"
    r"(?P<head_pose>-?\d+)P_"
    r"(?P<gaze_v>-?\d+)V_"
    r"(?P<gaze_h>-?\d+)H\.jpg"
)

for subject_dir in DATA_ROOT.iterdir():
    if not subject_dir.is_dir():
        continue

    for img_path in subject_dir.glob("*.jpg"):
        match = pattern.match(img_path.name)
        if not match:
            continue  # skip unexpected filenames

        meta = match.groupdict()

        rows.append({
            "path": str(img_path),
            "filename": img_path.name,
            "subject": meta["subject"],
            "distance_m": int(meta["distance"]),
            "head_pose_deg": int(meta["head_pose"]),
            "gaze_vertical_deg": int(meta["gaze_v"]),
            "gaze_horizontal_deg": int(meta["gaze_h"]),
        })

df = pd.DataFrame(rows)

In [None]:
# create labels based on degrees
def create_labels(row):
    '''
    converts per-image gaze metadata into an intent-level
    classification
    '''
    h = row["gaze_horizontal_deg"]
    v = row["gaze_vertical_deg"]

    # straight
    if v==0 and h==0: 
        return "straight"
    
    # horizontal dominates
    if abs(h) > abs(v):
        return "left" if h < 0 else "right"

    # vertical dominates
    if abs(v) > abs(h):
        return "down" if v < 0 else "up"

    # tie â†’ horizontal wins (gaze is steadier in horizontal axis)
    return "left" if h < 0 else "right"

df["label"] = df.apply(create_labels, axis=1)

In [None]:
# load images and corresponding labels
def load_data(path_to_data):
    '''Load 2D images and their corresponding labels
    Parameters:
    path_to_data (str): This is the path to data
    
    Returns:
    images (np.ndarray): A numpy array of shape (N, 64, 64, 3)
    labels (np.ndarray): A numpy array of shape (N)
    
    '''
    
    # initialize lists to store data
    labels = []
    images = []
    
    # iterate over class directories
    for label in os.listdir(path_to_data):
        # construct path into subfolder
        label_path = os.path.join(path_to_data,label)

        # ensure directories only
        if os.path.isdir(label_path):
        
            # iterate over images
            for img_file in os.listdir(label_path):
                # construct path to each image; needed for loading
                img_path = os.path.join(label_path,img_file)
                # load image
                img = load_img(img_path)
                # convert to array
                img_array = img_to_array(img)

                # store data
                labels.append(label)
                images.append(img_array)

    return np.array(images), np.array(labels)

In [None]:
DATA_ROOT = Path("~/Documents/00_210/data/columbia_gaze_dataset").expanduser()