In [1]:
import os, json
from PIL import Image
from tqdm import tqdm
import numpy as np
import torch
import clip

device = "cuda" if torch.backends.mps.is_available() else "cpu"

/opt/amdgpu/share/libdrm/amdgpu.ids: No such file or directory


# IMAGES DOWNLOAD

In [2]:
!mkdir -p coco

In [3]:
!curl -L "http://images.cocodataset.org/zips/train2017.zip" -o coco/train2017.zip
!unzip -q coco/train2017.zip -d coco

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 18441M 100 18441M   0     0 13120k     0   0:23:59  0:23:59 --:--:-- 20282k


# EMBED USING CLIP VIT/B32

In [4]:
model, preprocess = clip.load("ViT-B/32", device=device)

In [5]:
IMAGE_DIR = "data/coco/train2017"
IMG_JSON = "data/image_clip_b32.json"

In [6]:
results = []
def embed_batch(images):
    # Preprocess each image. stack into a batch
    imgs = torch.stack([preprocess(img).to(device) for img in images])

    with torch.no_grad():
        vec = model.encode_image(imgs).float()

    # norm for cosine similarity
    vec = vec / vec.norm(dim=-1, keepdim=True)

    return vec.cpu().numpy()


In [7]:
files = sorted(os.listdir(IMAGE_DIR))
batch_imgs = []
batch_paths = []

for fname in tqdm(files):
    if not fname.endswith(".jpg"):
        continue

    img_path = os.path.join(IMAGE_DIR, fname)

    try:
        img = Image.open(img_path).convert("RGB")
        batch_imgs.append(img)
        batch_paths.append(img_path)

        if len(batch_imgs) == 32:
            vecs = embed_batch(batch_imgs)
            for p, v in zip(batch_paths, vecs):
                results.append({
                    "image_path": p,
                    "embedding": v.tolist()
                })
            batch_imgs, batch_paths = [], []  # reset

    except Exception as e:
        print("E:", fname, e)

# Process final partial batch
if batch_imgs:
    vecs = embed_batch(batch_imgs)
    for p, v in zip(batch_paths, vecs):
        results.append({
            "image_path": p,
            "embedding": v.tolist()
        })

FileNotFoundError: [Errno 2] No such file or directory: 'data/coco/train2017'

In [None]:
with open(IMG_JSON, "w") as f:
    json.dump(results, f)

print("Saved:", IMG_JSON)
print("Total images embedded:", len(results))
