# Extracting Image Embeddings using DINOv3

In [None]:
# Move up to project root directory (parent directory) for module imports
import os

os.chdir("../")

# Current working directory should now be project root
print("Current working directory:", os.getcwd())

In [None]:
# Imports
import torch

from src.embeddings.dino import (
    extract_dino_embeddings,
    get_dinov3_preprocessor,
    load_dinov3_models,
)

In [None]:
# Device
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Load DINO Model
dino_model, _ = load_dinov3_models(device=DEVICE)
dino_processor = get_dinov3_preprocessor()

In [None]:
DATA_DIR = "coco_data/"
OUTPUT_DIR = DATA_DIR + "embeddings/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

BATCH_SIZE = 64
NUM_WORKERS = 4  # Set to 0 on Windows

In [None]:
# Embed Train Set
extract_dino_embeddings(
    image_dir=DATA_DIR + "train2017/",
    output_path=OUTPUT_DIR + "train_dino_embeddings.pt",
    dino_model=dino_model,
    dino_processor=dino_processor,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,  # Set to 0 on Windows
    device=DEVICE,
)

In [None]:
# Embed Validation Set
extract_dino_embeddings(
    image_dir=DATA_DIR + "val2017/",
    output_path=OUTPUT_DIR + "val_dino_embeddings.pt",
    dino_model=dino_model,
    dino_processor=dino_processor,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,  # Set to 0 on Windows
    device=DEVICE,
)

In [None]:
# Embed Test Set
extract_dino_embeddings(
    image_dir=DATA_DIR + "val2014/", # We use val2014 as test set
    output_path=OUTPUT_DIR + "test_dino_embeddings.pt",
    dino_model=dino_model,
    dino_processor=dino_processor,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,  # Set to 0 on Windows
    device=DEVICE,
)