In [2]:
import cv2
import numpy as np
import faiss
import pickle
import requests
import os

In [31]:
FAISS_INDEX_PATH = "../data/faiss_index.bin"
FAISS_METADATA_PATH = "../data/faiss_metadata.pkl"
DIMENSIONS = 128  # Descriptor dimensions (e.g., SIFT: 128)

In [None]:
def get_image_from_url(url):
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Could not fetch image. Status code: {response.status_code}")
        return
    return cv2.imdecode(np.frombuffer(response.content,dtype=np.int8),cv2.IMREAD_COLOR)

In [33]:
def load_faiss_index():
    try:
        # Check if the file exists and is not empty
        if os.path.exists(FAISS_INDEX_PATH) and os.path.getsize(FAISS_INDEX_PATH) > 0:
            index = faiss.read_index(FAISS_INDEX_PATH)
            with open(FAISS_METADATA_PATH, "rb") as f:
                metadata = pickle.load(f)
            product_ids = metadata["product_ids"]
            return index, product_ids
        else:
            raise FileNotFoundError("Index file is empty or doesn't exist.")
    except FileNotFoundError:
        # Create a new index with L2 distance metric if the file is missing or empty
        index = faiss.IndexFlatL2(DIMENSIONS)
        return index, []

# Save Faiss index and metadata
def save_faiss_index(index, product_ids):
    faiss.write_index(index, FAISS_INDEX_PATH)
    metadata = {"product_ids": product_ids}
    with open(FAISS_METADATA_PATH, "wb") as f:
        pickle.dump(metadata, f)
    print("Faiss index and metadata saved.")

### CREATE HISTOGRAM DESCRIPTOR FOR IMAGES

In [None]:
def compute_descriptors(image, detector):
    keypoints, descriptors = detector.detectAndCompute(image, None)
    return descriptors

def build_visual_vocabulary_faiss(descriptors_list, num_words):
    all_descriptors = np.vstack(descriptors_list).astype('float32')  # Combine all descriptors
    dimension = all_descriptors.shape[1]
    
    kmeans = faiss.Kmeans(d=dimension, k=num_words, niter=20, verbose=True, gpu=False)
    kmeans.train(all_descriptors)
    return kmeans

def compute_histogram_faiss(descriptors, kmeans):
    _, words = kmeans.index.search(descriptors.astype('float32'), 1)  # Assign descriptors to visual words
    histogram, _ = np.histogram(words, bins=np.arange(kmeans.k + 1), range=(0, kmeans.k))
    return histogram / np.linalg.norm(histogram)  # Normalize histogram

### UPDATE FIASS MODEL WITH NEW INPUTS

In [6]:
def add_descriptor_to_faiss(new_images):
    index, product_ids = load_faiss_index()
    sift = cv2.SIFT_create()
    kmeans = build_visual_vocabulary_faiss(descriptors_list, num_words)
    # Extract descriptors from new images
    descriptors_list = []
    for product_id, image_path in new_images:
        image = get_image_from_url(image_path)
        if image is None:
            print(f"Error loading image: {image_path}")
            continue
        histo_descriptor = compute_histogram_faiss(sift, kmeans)
        if histo_descriptor is not None:
            descriptors_list.append(histo_descriptor)
            product_ids.extend([product_id])
        else:
            print(f"No descriptors found for image: {image_path}")

    if descriptors_list:
        # Combine descriptors into a single matrix
        combined_descriptors = np.vstack(descriptors_list).astype(np.float32)

        # Add descriptors to Faiss index
        index.add(combined_descriptors)

        # Save updated index and metadata
        save_faiss_index(index, product_ids)
        print(f"Added {len(descriptors_list)} images to Faiss index.")
    else:
        print("No valid descriptors to add.")

# Search for similar images


In [None]:
def search_similar_images(url, k=5):
    index, product_ids = load_faiss_index()
    sift = cv2.SIFT_create()
    
    # Extract descriptors from the query image
    query_image = get_image_from_url(url)
    _, query_descriptors = sift.detectAndCompute(query_image, None)

    if query_descriptors is not None:
        # Search the index
        query_descriptors = query_descriptors.astype(np.float32)
        distances, indices = index.search(query_descriptors, k)

        # Map indices to product IDs
        results = {}
        for i in range(len(indices)):
            for j, idx in enumerate(indices[i]):
                if idx < len(product_ids):
                    product_id = product_ids[idx]
                    results[product_id] = results.get(product_id, 0) + 1

        # Sort and return most frequent matches
        sorted_results = sorted(results.items(), key=lambda x: x[1], reverse=True)
        print(f"Top {k} matches: {sorted_results[:k]}")
    else:
        print("No descriptors found for the query image.")

In [None]:
new_images = [
        (300001, "https://m.media-amazon.com/images/I/31UISB90sYL._AC_UL320_.jpg"),
        (300002, "https://m.media-amazon.com/images/I/51JFb7FctDL._AC_UL320_.jpg"),
        (300003, "https://m.media-amazon.com/images/I/61ZzcguzB1L._AC_UL320_.jpg"),
        (300004, "https://m.media-amazon.com/images/I/41O8pnwGG+L._AC_UL320_.jpg"),
        (300005, "https://m.media-amazon.com/images/I/41e6oqY5-ZL._AC_UL320_.jpg"),
        (300006, "https://m.media-amazon.com/images/I/81ZZPvIWnYL._AC_UL320_.jpg"),
        (300007, "https://m.media-amazon.com/images/I/51PVY5idbhL._AC_UL320_.jpg"),
        (300008, "https://m.media-amazon.com/images/I/51yB+3-eJwL._AC_UL320_.jpg"),
        (300009,"https://m.media-amazon.com/images/I/71e1SSUhZeL._AC_UL320_.jpg"),
    ]

sift = cv2.SIFT_create()
num_words = 100
dimension = num_words
add_descriptor_to_faiss(new_images)


    # Search for similar images
# search_similar_images("query_image.jpg", k=5)

Faiss index and metadata saved.
Added 9 images to Faiss index.


In [46]:
search_similar_images("https://m.media-amazon.com/images/I/6161gqzzYlL._AC_UL320_.jpg", k=5)

Top 5 matches: [(300002, 92), (300006, 66), (300007, 41), (300008, 41), (300005, 31)]


In [11]:
with open("../data/faiss_index.bin", "wb") as f:
    data = b"Hello, world!"  # Example binary data (string converted to bytes)
    f.write(data)

In [5]:
print(np.vstack(np.array([[1,2,3,4],[2,3,4,6]])).astype(np.float32))

[[1. 2. 3. 4.]
 [2. 3. 4. 6.]]
