# Extracting Image Embeddings using CLIP

In [None]:
# Move up to project root directory (parent directory) for module imports
import os

os.chdir("../")

# Current working directory should now be project root
print("Current working directory:", os.getcwd())

In [None]:
# Imports
import torch

from src.embeddings.clip import extract_clip_embeddings, load_clip_model

In [None]:
# Device
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Load CLIP Model and Processor
clip_model, clip_processor = load_clip_model(device=DEVICE)

In [None]:
DATA_DIR = "coco_data/"
OUTPUT_DIR = DATA_DIR + "embeddings/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

BATCH_SIZE = 64
NUM_WORKERS = 4  # Set to 0 on Windows

In [None]:
# Embed Train Set
extract_clip_embeddings(
    image_dir=DATA_DIR + "train2017/",
    output_path=OUTPUT_DIR + "train_clip_embeddings.pt",
    clip_model=clip_model,
    clip_processor=clip_processor,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,  # Set to 0 on Windows
)

In [None]:
# Embed Validation Set
extract_clip_embeddings(
    image_dir=DATA_DIR + "val2017/",
    output_path=OUTPUT_DIR + "val_clip_embeddings.pt",
    clip_model=clip_model,
    clip_processor=clip_processor,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,  # Set to 0 on Windows
)

In [None]:
# Embed Test Set
extract_clip_embeddings(
    image_dir=DATA_DIR + "test2017/",
    output_path=OUTPUT_DIR + "test_clip_embeddings.pt",
    clip_model=clip_model,
    clip_processor=clip_processor,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,  # Set to 0 on Windows
)