In [None]:
import cv2
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity

# Define the paths to the dataset and query images
dataset_path = "path/to/dataset/folder/"
query_path = "path/to/query/folder/"

# Define the parameters for SIFT
sift = cv2.xfeatures2d.SIFT_create()

# Define the number of clusters for K-means
k = 100

# Load the dataset images and extract SIFT features
dataset_images = []
dataset_features = []
for i in range(1, 1001):
    img_path = dataset_path + str(i) + ".jpg"
    img = cv2.imread(img_path)
    img = cv2.resize(img, (300, 300))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    kp, desc = sift.detectAndCompute(gray, None)
    dataset_images.append(img)
    dataset_features.append(desc)

# Stack all the feature descriptors vertically and cluster them with K-means
dataset_features = np.vstack(dataset_features)
kmeans = KMeans(n_clusters=k).fit(dataset_features)

# Compute the BoF histograms for all dataset images
dataset_histograms = []
for desc in dataset_features:
    labels = kmeans.predict(desc.reshape(1, -1))
    hist, _ = np.histogram(labels, bins=k, range=(0, k-1))
    dataset_histograms.append(hist)

# Load the query images and compute their BoF histograms
query_images = []
query_histograms = []
for i in range(1, 4):
    img_path = query_path + "query" + str(i) + ".jpg"
    img = cv2.imread(img_path)
    img = cv2.resize(img, (300, 300))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    kp, desc = sift.detectAndCompute(gray, None)
    labels = kmeans.predict(desc)
    hist, _ = np.histogram(labels, bins=k, range=(0, k-1))
    query_images.append(img)
    query_histograms.append(hist)

# Compute the similarities between query histograms and dataset histograms
similarities = []
for query_hist in query_histograms:
    similarity = cosine_similarity(query_hist.reshape(1, -1), dataset_histograms)
    similarities.append(similarity[0])

# Rank the dataset images by their similarity to each query image and display the top 3
for i in range(len(query_images)):
    query_img = query_images[i]
    similarity_scores = similarities[i]
    ranked_indices = np.argsort(similarity_scores)[::-1][:3]
    ranked_images = [dataset_images[idx] for idx in ranked_indices]

    # Display the query image and the 3 most similar images
    cv2.imshow("Query Image", query_img)
    for j in range(len(ranked_images)):
        ranked_img = ranked_images[j]
        cv2.imshow("Similar Image {}".format(j+1), ranked_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
