In [None]:
# imports
from pathlib import Path
import pandas as pd
import re
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import os
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator, array_to_img
import albumentations as A
from tensorflow.keras.utils import Sequence
from tqdm import tqdm  # for progress bar


In [2]:
# prevent my poor mac from overheating
os.environ["OMP_NUM_THREADS"] = "4" 

In [3]:
# load image metadata
DATA_ROOT = Path("~/Documents/00_210/data/columbia_gaze_dataset").expanduser()

rows = []

# Regex for filename parsing
pattern = re.compile(
    r"(?P<subject>\d+)_"
    r"(?P<distance>\d+)m_"
    r"(?P<head_pose>-?\d+)P_"
    r"(?P<gaze_v>-?\d+)V_"
    r"(?P<gaze_h>-?\d+)H\.jpg"
)

for subject_dir in DATA_ROOT.iterdir():
    if not subject_dir.is_dir():
        continue

    for img_path in subject_dir.glob("*.jpg"):
        match = pattern.match(img_path.name)
        if not match:
            continue  # skip unexpected filenames

        meta = match.groupdict()

        rows.append({
            "path": str(img_path),
            "filename": img_path.name,
            "subject": meta["subject"],
            "distance_m": int(meta["distance"]),
            "head_pose_deg": int(meta["head_pose"]),
            "gaze_vertical_deg": int(meta["gaze_v"]),
            "gaze_horizontal_deg": int(meta["gaze_h"]),
        })

df = pd.DataFrame(rows)

In [4]:
# create labels based on degrees
def create_labels(row):
    '''
    converts per-image gaze metadata into an intent-level
    classification
    '''
    h = row["gaze_horizontal_deg"]
    v = row["gaze_vertical_deg"]

    # straight
    if v==0 and h==0: 
        return "straight"
    
    # horizontal dominates
    if abs(h) > abs(v):
        return "left" if h < 0 else "right"

    # vertical dominates
    if abs(v) > abs(h):
        return "down" if v < 0 else "up"

    # tie â†’ horizontal wins (gaze is steadier in horizontal axis)
    return "left" if h < 0 else "right"

df["label"] = df.apply(create_labels, axis=1)

In [8]:
def load_data(df):
    '''Load 2D images and their corresponding labels from a DataFrame
    
    Parameters:
    df (pd.DataFrame): DataFrame with 'path' and 'label' columns
    
    Returns:
    images (np.ndarray): A numpy array of shape (N, H, W, 3)
    labels (np.ndarray): A numpy array of shape (N)
    '''
    
    # initialize lists to store data
    images = []
    labels = []
    
    # iterate over DataFrame rows
    for idx, row in df.iterrows():
        img_path = row['path']
        label = row['label']
        
        # load image
        img = load_img(img_path)
        # convert to array
        img_array = img_to_array(img)
        
        # store data
        images.append(img_array)
        labels.append(label)
    
    return np.array(images), np.array(labels)

In [None]:
# # lol no, crashes the kernel
# images, labels = load_data(df)
# print(f"Image shape: {images.shape}")  # (N, 3456, 5184, 3)

: 

In [None]:
import os
import numpy as np
from pathlib import Path
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img
from tqdm import tqdm  # for progress bar

# Define your augmentation strategy
augmentor = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.15,
    height_shift_range=0.15,
    shear_range=0.1,
    zoom_range=0.15,
    horizontal_flip=True,
    brightness_range=[0.7, 1.3],
    fill_mode='nearest'
)

# Create output directory
AUGMENTED_ROOT = Path("~/Documents/00_210/data/columbia_gaze_dataset_augmented").expanduser()
AUGMENTED_ROOT.mkdir(parents=True, exist_ok=True)

def augment_and_save(df, num_augmentations_per_image=5, target_size=(512, 512)):
    '''
    Augment images and save them, preserving subject folder structure
    
    Parameters:
    df (pd.DataFrame): DataFrame with image metadata including 'path', 'subject', etc.
    num_augmentations_per_image (int): How many augmented versions to create per original
    target_size (tuple): Size to resize images to (or None for original size)
    
    Returns:
    augmented_df (pd.DataFrame): New DataFrame with augmented image info
    '''
    
    augmented_rows = []
    
    for idx, row in tqdm(df.iterrows(), total=len(df), desc="Augmenting images"):
        # Load original image
        if target_size:
            img = load_img(row['path'], target_size=target_size)
        else:
            img = load_img(row['path'])
        
        img_array = img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)  # shape: (1, H, W, 3)
        
        # Create subject directory in augmented dataset
        subject_dir = AUGMENTED_ROOT / row['subject']
        subject_dir.mkdir(exist_ok=True)
        
        # Save original image first (optional, but maintains all originals)
        original_filename = f"{row['filename'].replace('.jpg', '')}_original.jpg"
        original_path = subject_dir / original_filename
        array_to_img(img_array[0]).save(original_path)
        
        # Add original to augmented dataset
        augmented_rows.append({
            **row.to_dict(),  # keep all original metadata
            'path': str(original_path),
            'filename': original_filename,
            'augmentation_type': 'original',
            'augmentation_index': 0
        })
        
        # Generate augmented versions
        aug_iter = augmentor.flow(img_array, batch_size=1)
        
        for aug_idx in range(num_augmentations_per_image):
            augmented_img = next(aug_iter)[0]  # get one augmented image
            
            # Create filename that preserves original info
            aug_filename = f"{row['filename'].replace('.jpg', '')}_aug{aug_idx+1}.jpg"
            aug_path = subject_dir / aug_filename
            
            # Save augmented image
            array_to_img(augmented_img).save(aug_path)
            
            # Add to metadata
            augmented_rows.append({
                **row.to_dict(),  # keep all original metadata (subject, gaze angles, etc.)
                'path': str(aug_path),
                'filename': aug_filename,
                'augmentation_type': 'augmented',
                'augmentation_index': aug_idx + 1
            })
    
    augmented_df = pd.DataFrame(augmented_rows)
    return augmented_df

# Run augmentation
print(f"Original dataset size: {len(df)}")
augmented_df = augment_and_save(df, num_augmentations_per_image=5, target_size=(512, 512))

print(f"\nAugmented dataset size: {len(augmented_df)}")
print(f"Images per original: {len(augmented_df) / len(df)}")

# Save the augmented metadata
augmented_df.to_csv(AUGMENTED_ROOT / "metadata.csv", index=False)
print(f"\nMetadata saved to: {AUGMENTED_ROOT / 'metadata.csv'}")

In [None]:
# define augmentation strategy 
# Geometric augmentations
rotation_range=15,  # rotate images randomly up to 15 degrees
width_shift_range=0.15,  # shift horizontally by 15%
height_shift_range=0.15,  # shift vertically by 15%
shear_range=0.1,  # shear transformation
zoom_range=0.15,  # zoom in/out randomly
horizontal_flip=True,  # flip images horizontally (useful for gaze!)

# Photometric augmentations (for lighting variations)
brightness_range=[0.7, 1.3],  # darken or brighten

fill_mode='nearest',  # how to fill in newly created pixels

In [None]:
import albumentations as A
from tensorflow.keras.utils import Sequence

class AugmentedDataGenerator(Sequence):
    def __init__(self, df, batch_size=32, target_size=(512, 512), augment=True):
        self.df = df
        self.batch_size = batch_size
        self.target_size = target_size
        self.augment = augment
        self.indices = np.arange(len(df))
        
        # Label encoding
        self.label_map = {label: idx for idx, label in enumerate(df['label'].unique())}
        self.num_classes = len(self.label_map)
        
        # Advanced augmentations for in-the-wild scenarios
        self.transform = A.Compose([
            A.RandomBrightnessContrast(p=0.5),
            A.HueSaturationValue(p=0.3),
            A.GaussianBlur(blur_limit=(3, 7), p=0.3),
            A.GaussNoise(p=0.2),
            A.RandomGamma(p=0.3),
            A.CLAHE(p=0.2),  # better for varying lighting
            A.Rotate(limit=15, p=0.5),
            A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.15, rotate_limit=15, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.3),  # simulate occlusions
        ]) if augment else None
    
    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))
    
    def __getitem__(self, idx):
        batch_indices = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_df = self.df.iloc[batch_indices]
        
        images = []
        labels = []
        
        for _, row in batch_df.iterrows():
            # Load image
            img = cv2.imread(row['path'])
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, self.target_size)
            
            # Apply augmentation
            if self.transform:
                img = self.transform(image=img)['image']
            
            # Normalize
            img = img / 255.0
            
            images.append(img)
            labels.append(self.label_map[row['label']])
        
        return np.array(images), tf.keras.utils.to_categorical(labels, self.num_classes)
    
    def on_epoch_end(self):
        np.random.shuffle(self.indices)

# Usage
train_gen = AugmentedDataGenerator(df.iloc[:int(0.8*len(df))], augment=True)
val_gen = AugmentedDataGenerator(df.iloc[int(0.8*len(df)):], augment=False)

model.fit(train_gen, validation_data=val_gen, epochs=20)