In [1]:
import pandas as pd
import numpy as np
from PIL import Image
from torchvision import models, transforms
import faiss
import torch

device = torch.device("cpu")

data = [
        ['img/tiger.jpg', 'Animal'],
        ['img/donkey.jpg', 'Animal'],
        ['img/vehicle.jpg', 'Vehicle'],
        ['img/landscape.jpg', 'Landscape']
       ]

df = pd.DataFrame(data, columns=['image_path', 'category'])

In [2]:
df.head()

Unnamed: 0,image_path,category
0,img/tiger.jpg,Animal
1,img/donkey.jpg,Animal
2,img/vehicle.jpg,Vehicle
3,img/landscape.jpg,Landscape


In [3]:
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def load_image(image_path):
    image = Image.open(image_path).convert('RGB')
    image = preprocess(image)
    image = image.unsqueeze(0).to(device)
    return image

model = models.resnet50(pretrained=True)
model = model.to(device)
model.eval()



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [4]:
def get_image_embedding(image_tensor):
    with torch.no_grad():
        embedding = model(image_tensor)
    return embedding.cpu().numpy()

In [5]:
embeddings = np.vstack([get_image_embedding(load_image(path)) for path in df['image_path']])

In [6]:
embeddings.shape

(4, 1000)

In [8]:
vector_dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(vector_dimension)
index.add(embeddings)

In [9]:
search_image_path = 'img/landscape2.jpg'
search_image = load_image(search_image_path)
search_vector = get_image_embedding(search_image)

search_vector = np.array([search_vector.squeeze()])

D, I = index.search(search_vector, k=4)

print(D, I)

[[1770.3181 6114.4766 7326.0547 7768.5684]] [[3 1 0 2]]


In [10]:
top_indices = I[0] 

top_results = df.iloc[top_indices]

top_results['distance'] = D[0]

print(top_results)

          image_path   category     distance
3  img/landscape.jpg  Landscape  1770.318115
1     img/donkey.jpg     Animal  6114.476562
0      img/tiger.jpg     Animal  7326.054688
2    img/vehicle.jpg    Vehicle  7768.568359


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['distance'] = D[0]
