In [1]:
import os
import cv2 as cv
import numpy as np
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from glob import glob
import pickle
# --- Constants ---
IMG_WIDTH, IMG_HEIGHT = 224, 224  # ResNet50 default input size
EMBEDDINGS_FILE = "db_embeddings.pkl"




In [2]:
# ---------------------------
# 1. Load pretrained model
# ---------------------------
# Exclude top layers to get embeddings
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')





In [3]:
def extract_resnet_features(img_path, model):
    img = cv.imread(img_path)
    img_resized = cv.resize(img, (IMG_WIDTH, IMG_HEIGHT))
    img_array = img_resized.astype(np.float32)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)  # preprocess for ResNet50
    features = model.predict(img_array)
    features = features.flatten()
    # Normalize to unit vector (optional, helps with cosine similarity)
    features = features / np.linalg.norm(features)
    return features

# --- Precompute embeddings for database images ---
def build_database_embeddings(database_dir="image.orig"):
    database_files = sorted(glob(os.path.join(database_dir, "*.jpg")))
    db_features_list = []

    for file in database_files:
        features = extract_resnet_features(file, model)
        db_features_list.append(features)

    db_features = np.array(db_features_list)
    # Save embeddings and file paths
    with open(EMBEDDINGS_FILE, "wb") as f:
        pickle.dump({"features": db_features, "paths": database_files}, f)
    print(f"Saved {len(database_files)} embeddings to {EMBEDDINGS_FILE}")

In [27]:
build_database_embeddings("image.orig")

Saved 1000 embeddings to db_embeddings.pkl


In [9]:
def retrieval():
    # --- Load precomputed embeddings ---
    if not os.path.exists(EMBEDDINGS_FILE):
        print("Embeddings not found! Run build_database_embeddings() first.")
        return

    with open(EMBEDDINGS_FILE, "rb") as f:
        data = pickle.load(f)
    db_features = data["features"]
    database_files = data["paths"]

    # --- Choose query image ---
    print("1: beach\n2: mountain\n3: food\n4: dinosaur\n5: flower\n6: horse\n7: elephant")
    choice = input("Type the number to choose a category: ")

    query_file_map = {
        '1': 'beach.jpg',
        '2': 'mountain.jpg',
        '3': 'food.jpg',
        '4': 'dinosaur.jpg',
        '5': 'flower.jpg',
        '6': 'horse.jpg',
        '7': 'elephant.jpg'
    }

    if choice not in query_file_map:
        print("Invalid choice")
        return

    src_path = os.path.join("image.query", query_file_map[choice])
    query_features = extract_resnet_features(src_path, model)
    print(f"You chose: {query_file_map[choice]}")

    cv.imshow("Query", cv.resize(cv.imread(src_path), (256, 256)))

    # --- Compute distances ---
    distances = np.linalg.norm(db_features - query_features, axis=1)
    closest_idx = np.argmin(distances)
    closest_file = database_files[closest_idx]

    print(f"The most similar image is {closest_file} with distance {distances[closest_idx]:.4f}")

    closest_img = cv.imread(closest_file)
    cv.imshow("Closest Match", cv.resize(closest_img, (256, 256)))
    cv.waitKey(0)
    cv.destroyAllWindows()

    exit

# --- Example usage ---
# 1) Run this once to compute and save embeddings
# build_database_embeddings("image.orig")

# 2) Then run retrieval as many times as you want
retrieval()

1: beach
2: mountain
3: food
4: dinosaur
5: flower
6: horse
7: elephant


You chose: dinosaur.jpg
The most similar image is image.orig\428.jpg with distance 0.7173
