# Extracting Image Embeddings using DINOv3

In [1]:
# Move up to project root directory (parent directory) for module imports
import os

os.chdir("../")

# Current working directory should now be project root
print("Current working directory:", os.getcwd())

Current working directory: c:\Users\Ryan Lee\Desktop\50.040 Natural Language Processing\gpt2-image-captioning


In [2]:
# Imports
import torch

from src.embeddings.dino import (
    extract_dino_embeddings,
    get_dinov3_preprocessor,
    load_dinov3_models,
)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Device
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# Load DINO Model
dino_model, _ = load_dinov3_models("./dino_weights", device=DEVICE)
dino_processor = get_dinov3_preprocessor()

Loading DINOv3 model from './dino_weights' on device: cuda...


Using cache found in C:\Users\Ryan Lee/.cache\torch\hub\facebookresearch_dinov3_main


In [5]:
DATA_DIR = "coco_data/"
OUTPUT_DIR = DATA_DIR + "embeddings/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

BATCH_SIZE = 64
NUM_WORKERS = 4  # Set to 0 on Windows

In [None]:
# Embed Train Set
extract_dino_embeddings(
    image_dir=DATA_DIR + "train2017/",
    output_path=OUTPUT_DIR + "train_dino_embeddings.pt",
    dino_model=dino_model,
    dino_processor=dino_processor,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,  # Set to 0 on Windows
    device=DEVICE,
)

In [None]:
# Embed Validation Set
extract_dino_embeddings(
    image_dir=DATA_DIR + "val2017/",
    output_path=OUTPUT_DIR + "val_dino_embeddings.pt",
    dino_model=dino_model,
    dino_processor=dino_processor,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,  # Set to 0 on Windows
    device=DEVICE,
)

In [8]:
# Embed Test Set
extract_dino_embeddings(
    image_dir=DATA_DIR + "val2014/",  # We use val2014 as test set
    output_path=OUTPUT_DIR + "test_dino_embeddings.pt",
    dino_model=dino_model,
    dino_processor=dino_processor,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,  # Set to 0 on Windows
    device=DEVICE,
)

Starting DINO embedding extraction for 40504 images...


DINO Embedding Extraction: 100%|██████████| 633/633 [24:58<00:00,  2.37s/it]


Saving 40504 embeddings to coco_data/embeddings/test_dino_embeddings.pt...
