In [1]:
import torch
from transformers import AutoImageProcessor, AutoModel
from PIL import Image
import faiss
import numpy as np
import os

#load the model and processor
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
processor = AutoImageProcessor.from_pretrained('facebook/dinov2-small')
model = AutoModel.from_pretrained('facebook/dinov2-small').to(device)

#Populate the images variable with all the images in the dataset folder
images = []
for root, dirs, files in os.walk('data/images'):
    for file in files:
        if file.endswith('jpg'):
            images.append(root  + '/'+ file)

# print(images)

#Define a function that normalizes embeddings and add them to the index
def add_vector_to_index(embedding, index):
    #convert embedding to numpy
    vector = embedding.detach().cpu().numpy()
    #Convert to float32 numpy
    vector = np.float32(vector)
    #Normalize vector: important to avoid wrong results when searching
    faiss.normalize_L2(vector)
    #Add to index
    index.add(vector)

#Create Faiss index using FlatL2 type with 384 dimensions as this
#is the number of dimensions of the features
index = faiss.IndexFlatL2(384)

import time
t0 = time.time()
for image_path in images:
    # print(image_path)
    img = Image.open(image_path).convert('RGB')
    with torch.no_grad():
        inputs = processor(images=img, return_tensors="pt").to(device)
        outputs = model(**inputs)
    features = outputs.last_hidden_state
    add_vector_to_index( features.mean(dim=1), index)

print('Extraction done in :', time.time()-t0)

#Store the index locally
faiss.write_index(index,"vector.index")

  from .autonotebook import tqdm as notebook_tqdm


['data/images//add2d174e53d0c63552c.jpg', 'data/images//72fe5d5b6a12834cda03.jpg', 'data/images//cb27f682c1cb289571da.jpg', 'data/images//98439ec9a98040de1991.jpg', 'data/images//f221c938f2711b2f4260.jpg', 'data/images//dafa20781731fe6fa720.jpg', 'data/images//1c056e915ad8b386eac9.jpg', 'data/images//719d77f147b8aee6f7a9.jpg', 'data/images//7d19fea8cae123bf7af0.jpg', 'data/images//cb34d8a9ece005be5cf1.jpg', 'data/images//6488ac919bd872862bc9.jpg', 'data/images//cdc0bf46880f6151381e.jpg', 'data/images//dce57a744e3da763fe2c.jpg', 'data/images//683a3477033eea60b32f.jpg', 'data/images//83368da6b7ef5eb107fe.jpg', 'data/images//2244a254951d7c43250c.jpg', 'data/images//0f0dceb5fefc17a24eed.jpg', 'data/images//5f5fe5d0d2993bc76288.jpg', 'data/images//e7a24f2d7b64923acb75.jpg', 'data/images//65d672094540ac1ef551.jpg', 'data/images//67b7f85cc015294b7004.jpg', 'data/images//eeca7e684921a07ff930.jpg', 'data/images//7cb6fe64cb2d22737b3c.jpg', 'data/images//59121b3d2c74c52a9c65.jpg', 'data/images//9

In [3]:
import faiss
import numpy as np
import torch
from transformers import AutoImageProcessor, AutoModel
from PIL import Image

#input image
image = Image.open('/home/hieutm8/Projects/DEEPFAKE_SIMILARITY_SEARCH/data/images/0ae050ad67e48ebad7f5.jpg')

#Load the model and processor
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
processor = AutoImageProcessor.from_pretrained('facebook/dinov2-small')
model = AutoModel.from_pretrained('facebook/dinov2-small').to(device)

#Extract the features
with torch.no_grad():
    inputs = processor(images=image, return_tensors="pt").to(device)
    outputs = model(**inputs)

#Normalize the features before search
embeddings = outputs.last_hidden_state
embeddings = embeddings.mean(dim=1)
vector = embeddings.detach().cpu().numpy()
vector = np.float32(vector)
faiss.normalize_L2(vector)

#Read the index file and perform search of top-3 images
# index = faiss.read_index("vector.index")
d,i = index.search(vector,3)
print('distances:', d, 'indexes:', i)

distances: [[0.         0.00604684 0.00885429]] indexes: [[196  13 120]]
