## Setup

In [1]:
import os
import torch
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
from tqdm import tqdm
import numpy as np

## Load Pre-trained ResNet18 as Feature Extractor

In [8]:
from torchvision.models import resnet18, ResNet18_Weights

weights = ResNet18_Weights.DEFAULT
model = resnet18(weights=weights)
model.fc = torch.nn.Identity()

model = model.to(device)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

## Image Preprocessing (ResNet18 Standard)

In [10]:
preprocess = weights.transforms()
transform = preprocess

## Embed a Single Image

In [11]:
def embed_image(img_path):
    img = Image.open(img_path).convert("RGB")
    img = transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        embedding = model(img)

    return embedding.squeeze().cpu().numpy()  # shape: (512,)

## Embed the Entire Caltech-101 Dataset

In [13]:
dataset_path = "caltech101"
embeddings = []
image_ids = []

for class_name in os.listdir(dataset_path):
    class_dir = os.path.join(dataset_path, class_name)
    if not os.path.isdir(class_dir):
        continue

    for img_name in tqdm(os.listdir(class_dir), desc=class_name):
        img_path = os.path.join(class_dir, img_name)

        try:
            vec = embed_image(img_path)
            embeddings.append(vec)
            image_ids.append(img_path)
        except Exception as e:
            print(f"Skipping {img_path}: {e}")

accordion: 100%|███████████████████████████████████████████████████████████████████████| 55/55 [00:03<00:00, 18.07it/s]
airplanes: 100%|█████████████████████████████████████████████████████████████████████| 800/800 [00:26<00:00, 30.29it/s]
anchor: 100%|██████████████████████████████████████████████████████████████████████████| 42/42 [00:01<00:00, 33.67it/s]
ant: 100%|█████████████████████████████████████████████████████████████████████████████| 42/42 [00:01<00:00, 28.54it/s]
BACKGROUND_Google: 100%|█████████████████████████████████████████████████████████████| 468/468 [00:17<00:00, 26.18it/s]
barrel: 100%|██████████████████████████████████████████████████████████████████████████| 47/47 [00:01<00:00, 37.88it/s]
bass: 100%|████████████████████████████████████████████████████████████████████████████| 54/54 [00:01<00:00, 35.09it/s]
beaver: 100%|██████████████████████████████████████████████████████████████████████████| 46/46 [00:01<00:00, 26.27it/s]
binocular: 100%|████████████████████████

## Save to Disk

In [14]:
import numpy as np

embeddings = np.vstack(embeddings)  # (N, 512)
image_ids = np.array(image_ids)

np.save("caltech101_embeddings.npy", embeddings)
np.save("caltech101_image_ids.npy", image_ids)

## Load Later

In [15]:
embeddings2 = np.load("caltech101_embeddings.npy")
image_ids2 = np.load("caltech101_image_ids.npy")

print(embeddings2.shape)      # (N, 512)
print(image_ids2.shape)       # (N,)
print(image_ids2[0])

(9145, 512)
(9145,)
caltech101\accordion\image_0001.jpg


## (Optional but Recommended) Normalize for Cosine Similarity

In [16]:
from sklearn.preprocessing import normalize

embeddings = normalize(embeddings, axis=1)