# GIQ Feature Extraction on Kaggle

This notebook runs the feature extraction pipeline for the GIQ benchmark.
It assumes the data (renderings and splits) is available as a Kaggle dataset.

In [None]:
# Install dependencies if not present
!pip install -q timm transformers
# mitsuba and open3d are likely not needed for feature extraction, only for rendering

In [None]:
import sys
import os
from pathlib import Path

# --- CONFIGURATION ---

# Path to the cloned repository (or where the code resides)
# If you uploaded the code as a dataset, point to it here.
# If you are cloning it in the notebook, use /kaggle/working/repo_name
REPO_PATH = Path("/kaggle/working/giq-project")  # Adjust this!

# Path to the GIQ data root (containing 'renderings', 'splits', etc.)
# On Kaggle, this is usually in /kaggle/input/...
DATA_ROOT = Path("/kaggle/input/probing-vlm-dataset") # Adjust this!

# Output directory for features
OUTPUT_ROOT = Path("/kaggle/working/features")
OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)

# ---------------------

if not REPO_PATH.exists():
    # Clone the repo if it doesn't exist (e.g. if not uploaded as dataset)
    # Assuming the user will clone their repo
    !git clone https://github.com/phongviet/Probing-the-Geometric-Intelligence-of-Vision-Language-Models.git {REPO_PATH}
    # Note: If the repo is private or you have local changes, you might need to upload the code instead.

# Add src to python path
if str(REPO_PATH) not in sys.path:
    sys.path.insert(0, str(REPO_PATH))

print(f"Repo path: {REPO_PATH}")
print(f"Data path: {DATA_ROOT}")

In [None]:
# Monkey-patch REPO_ROOT in src.data.base to point to our DATA_ROOT context
# The original code assumes data is at REPO_ROOT/data/giq
# We want to redirect requests for data to DATA_ROOT.

import src.data.base

# We can't easily change REPO_ROOT because it's used to construct paths like REPO_ROOT / "data" / ...
# Instead, we can monkey-patch the functions that use it, or ensure DATA_ROOT has the expected structure.

# Let's look at how load_split works:
# path = REPO_ROOT / "data" / "giq" / "splits" / f"{split}_shapes.json"

# If we set REPO_ROOT such that REPO_ROOT / "data" / "giq" == DATA_ROOT, it works.
# So REPO_ROOT should be DATA_ROOT.parent.parent

# However, DATA_ROOT in our config points to .../data/giq usually.
# So let's try to trick it.

class MockPath(type(Path("."))):
    def __truediv__(self, other):
        # If code asks for REPO_ROOT / "data" / "giq", redirect to DATA_ROOT
        # This is hard to intercept perfectly with Path objects.
        return super().__truediv__(other)

# Easier approach: Redefine load_split in src.data.base
def custom_load_split(split: str):
    path = DATA_ROOT / "splits" / f"{split}_shapes.json"
    if not path.exists():
        raise FileNotFoundError(f"Split file not found: {path}")
    import json
    with open(path) as f:
        return json.load(f)

src.data.base.load_split = custom_load_split
print("Patched load_split to use DATA_ROOT")


In [None]:
import argparse
import numpy as np
import torch
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm

from src.data.base import GIQBase
from src.models.featurizers import (
    CLIPFeaturizer,
    DINOv3Featurizer,
    SigLIP2Featurizer,
)

# Re-implement ExtractionDataset to accept explicit renderings_root
class ExtractionDataset(GIQBase):
    """
    Dataset that iterates over all views of all shapes in a split.
    Returns: {"image": tensor, "shape_id": str, "view_idx": int}
    """

    def __init__(
        self,
        split: str,
        transform=None,
        output_dir: Path | None = None,
        limit: int | None = None,
        renderings_root: Path | None = None,
    ):
        super().__init__(split=split, transform=transform, renderings_root=renderings_root)
        # We have 20 views per shape (0-19)
        self.n_views = 20
        self.samples = []

        # Filter out already processed shapes if output_dir provided
        processed_shapes = set()
        if output_dir is not None:
            # We assume the output format is {shape_id}.npz
            for p in output_dir.glob("*.npz"):
                processed_shapes.add(p.stem)

        print(f"Found {len(processed_shapes)} already processed shapes. Skipping them.")

        count = 0
        for shape_id in self.shape_ids:
            if shape_id in processed_shapes:
                continue

            # Add all views for this shape
            for view_idx in range(self.n_views):
                self.samples.append((shape_id, view_idx))

            count += 1
            if limit is not None and count >= limit:
                print(f"Stopping after {count} shapes due to --limit")
                break

    def __len__(self) -> int:
        return len(self.samples)

    def __getitem__(self, idx: int):
        shape_id, view_idx = self.samples[idx]
        img = self._load_image(shape_id, view_idx)
        return {
            "image": img,
            "shape_id": shape_id,
            "view_idx": view_idx,
        }

In [None]:
# --- PARAMETERS ---
MODEL_TYPE = "clip"   # choices: ["clip", "siglip2", "dinov3"]
MODEL_NAME = None     # Optional override
SPLIT = "test"        # choices: ["train", "val", "test"]
BATCH_SIZE = 32
NUM_WORKERS = 2
LIMIT = None          # Set to integer to test on few shapes
FP16 = True

# ------------------

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

if MODEL_TYPE == "clip":
    model_name = MODEL_NAME or "openai/clip-vit-base-patch16"
    featurizer = CLIPFeaturizer(model_name=model_name, device=device)
elif MODEL_TYPE == "siglip2":
    model_name = MODEL_NAME or "google/siglip2-base-patch16-224"
    featurizer = SigLIP2Featurizer(model_name=model_name, device=device)
elif MODEL_TYPE == "dinov3":
    model_name = MODEL_NAME or "facebook/dinov3-base"
    featurizer = DINOv3Featurizer(model_name=model_name, device=device)
else:
    raise ValueError(f"Unknown model: {MODEL_TYPE}")

model_safe_name = model_name.replace("/", "__")
output_dir = OUTPUT_ROOT / model_safe_name / SPLIT
output_dir.mkdir(parents=True, exist_ok=True)
print(f"Saving features to: {output_dir}")

renderings_path = DATA_ROOT / "renderings"

transform = featurizer.get_transform()
dataset = ExtractionDataset(
    split=SPLIT, 
    transform=transform, 
    output_dir=output_dir, 
    limit=LIMIT,
    renderings_root=renderings_path
)

loader = DataLoader(
    dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True,
)

print(f"Extracting features for {len(dataset)} images ({len(dataset.shape_ids)} shapes x 20 views)...")

In [None]:
# Run Extraction

shape_buffers = {}  # shape_id -> {'global': [], 'local': [], 'views': []}

for batch in tqdm(loader):
    images = batch["image"].to(device)
    shape_ids = batch["shape_id"]
    view_idxs = batch["view_idx"]

    with torch.no_grad():
        if FP16 and "cuda" in device:
            with torch.autocast(device_type="cuda", dtype=torch.float16):
                features = featurizer(images)
        else:
            features = featurizer(images)

    # Move to CPU
    global_feats = features["global"].cpu().numpy()  # [B, D]
    local_feats = features["local"].cpu().numpy()  # [B, N, D]

    for i, sid in enumerate(shape_ids):
        vid = int(view_idxs[i])

        save_path = output_dir / f"{sid}.npz"
        if save_path.exists():
            continue

        if sid not in shape_buffers:
            shape_buffers[sid] = {"global": [], "local": [], "views": []}

        shape_buffers[sid]["global"].append(global_feats[i])
        shape_buffers[sid]["local"].append(local_feats[i])
        shape_buffers[sid]["views"].append(vid)

        # Check if we have all 20 views
        if len(shape_buffers[sid]["views"]) == 20:
            # Sort by view index
            indices = np.argsort(shape_buffers[sid]["views"])
            g_sorted = np.stack(shape_buffers[sid]["global"])[indices]
            l_sorted = np.stack(shape_buffers[sid]["local"])[indices]

            # Save
            np.savez_compressed(
                save_path, global_features=g_sorted, local_features=l_sorted
            )

            # Clear buffer
            del shape_buffers[sid]

# Flush any remaining
for sid, buf in shape_buffers.items():
    if len(buf["views"]) > 0:
        print(f"Warning: Incomplete views for {sid} (found {len(buf['views'])}). Saving anyway.")
        indices = np.argsort(buf["views"])
        g_sorted = np.stack(buf["global"])[indices]
        l_sorted = np.stack(buf["local"])[indices]
        np.savez_compressed(
            output_dir / f"{sid}.npz",
            global_features=g_sorted,
            local_features=l_sorted,
        )

In [None]:
# Validate output
print(f"Processed {len(list(output_dir.glob('*.npz')))} files.")
if len(list(output_dir.glob('*.npz'))) > 0:
    sample_file = list(output_dir.glob('*.npz'))[0]
    data = np.load(sample_file)
    print(f"Sample {sample_file.name}: Global {data['global_features'].shape}, Local {data['local_features'].shape}")