In [1]:
%pip install faiss-cpu

Note: you may need to restart the kernel to use updated packages.


In [None]:
import faiss
import numpy as np
from PIL import Image

In [None]:
def extract_pixel_blocks(image_path,block_size):
    image = Image.open(image_path).convert('RGB')
    image_np = np.array(image)

    height,width,_ = image_np.shape

    pixel_blocks = []
    for i in range(0,height - block_size[0] + 1, block_size[0]):
        for j in range(0,width - block_size[1]+1, block_size[1]):
            block = image_np[i:i + block_size[0],j:j + block_size[1],:].flatten()
            pixel_blocks.append(block)

    return np.array(pixel_blocks,dtype=np.float32)

In [None]:
input_path = r'/content/DALL·E-2.jpeg'
pixel_blocks = extract_pixel_blocks(input_path, (3, 4))

In [None]:
d = pixel_blocks.shape[1]
index = faiss.IndexFlatL2(d)
index.add(pixel_blocks)

In [None]:
query_block = pixel_blocks[0].reshape(1, -1)
k = 4
distances, indices = index.search(query_block, k)

In [None]:
print(f"Indices of nearest neighbors: {indices}")
print(f"Distances to nearest neighbors: {distances}")

Indices of nearest neighbors: [[    0 14759 11752 67581]]
Distances to nearest neighbors: [[    0. 17262. 18087. 19349.]]


In [None]:
nlist = 100
quantizer = faiss.IndexFlatL2(d)
index = faiss.IndexIVFFlat(quantizer, d, nlist)
index.train(pixel_blocks)
index.add(pixel_blocks)


index.nprobe = 10
distances, indices = index.search(query_block, k)
print(f"Indices of nearest neighbors: {indices}")
print(f"Distances to nearest neighbors: {distances}")

Indices of nearest neighbors: [[    0 14759 11752 67581]]
Distances to nearest neighbors: [[    0. 17262. 18087. 19349.]]


In [None]:
import time

start_time = time.time()
index.train(pixel_blocks)
index.add(pixel_blocks)
print(f"Indexing time: {time.time() - start_time} seconds")

start_time = time.time()
distances, indices = index.search(query_block, k)
print(f"Search time: {time.time() - start_time} seconds")


index_flat = faiss.IndexFlatL2(d)
index_flat.add(pixel_blocks)
start_time = time.time()
bf_distances, bf_indices = index_flat.search(query_block, k)
print(f"Brute-force search time: {time.time() - start_time} seconds")

print(f"FAISS indices: {indices}")
print(f"Brute-force indices: {bf_indices}")


Indexing time: 0.20741677284240723 seconds
Search time: 0.0008058547973632812 seconds
Brute-force search time: 0.009924888610839844 seconds
FAISS indices: [[     0  87296  14759 102055]]
Brute-force indices: [[    0 14759 11752 67581]]


Approximate Search:
FAISS has support for approximate nearest neighbor search. Instead of searching the whole dataset, it narrows down the search to the most promising sections of data (similar to what a KD-tree or Ball-tree does). This is done by organizing data into cells or clusters and searching only within the nearest clusters, making it faster.

Product Quantization (PQ):
FAISS also uses Product Quantization to compress the data into smaller chunks, which makes searching even faster. This compression loses a bit of accuracy but speeds up the search dramatically—perfect when you care about speed over exact accuracy.