In [3]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def load_dataset_from_folders(root_dir, target_size=(224,224)):
    """
    Load dataset from categorical folder structure:
    root_dir/class1/*.jpg
    root_dir/class2/*.jpg
    Returns:
        images: numpy array of shape (N, H, W, C)
        labels: numpy array of shape (N,)
        class_names: mapping index -> class
    """
    images, labels = [], []
    class_names = sorted(os.listdir(root_dir))
    class_to_idx = {cls: i for i, cls in enumerate(class_names)}
    
    for cls in class_names:
        cls_path = os.path.join(root_dir, cls)
        if not os.path.isdir(cls_path):
            continue
        for fname in os.listdir(cls_path):
            if fname.lower().endswith((".jpg", ".jpeg", ".png", ".bmp")):
                img_path = os.path.join(cls_path, fname)
                img = load_img(img_path, target_size=target_size)
                img = img_to_array(img) / 255.0
                images.append(img)
                labels.append(class_to_idx[cls])
    
    return np.array(images), np.array(labels), class_names


In [5]:
import tensorflow.keras
import tensorflow as tf

from tensorflow.keras.models import Model
import numpy as np

# Load pre-trained SqueezeNet model without top (fully connected) layers
pure_model = tf.keras.applications.MobileNet(weights='imagenet', include_top=True, input_shape = (224,224,3))
pure_model.summary()

In [7]:
from tensorflow.keras.layers import GlobalAveragePooling2D,Dense
embedding_size = 1024

# Load pre-trained SqueezeNet model without top (fully connected) layers
base_model = tf.keras.applications.MobileNet(weights='imagenet', include_top=False, input_shape = (224,224,3))

#Remove the last layer of MobileNet
base_model_output = base_model.layers[-1].output

x = base_model.output

# Step 3: Adjust the architecture for retrieval
x = GlobalAveragePooling2D()(x)

# Add the final Dense layer with embedding_size
embeddings = Dense(embedding_size)(x)

# Add L2 normalization layer


class L2Normalization(tf.keras.layers.Layer):
    def __init__(self, axis=-1, epsilon=1e-12, **kwargs):
        super(L2Normalization, self).__init__(**kwargs)
        self.axis = axis
        self.epsilon = epsilon

    def call(self, inputs):
        return tf.math.l2_normalize(inputs, axis=self.axis, epsilon=self.epsilon)
    
    def get_config(self):
        config = super(L2Normalization, self).get_config()
        config.update({'axis': self.axis, 'epsilon': self.epsilon})
        return config

    # Apply L2 normalization
    
embeddings = tf.keras.layers.Dense(embedding_size)(x)
embeddings = L2Normalization()(embeddings)


feature_extraction_model1 = Model(inputs=base_model.input, outputs=embeddings)

feature_extraction_model1.summary()




In [8]:
from tensorflow.keras.applications import EfficientNetB0
# Load the base model
base_model2 = EfficientNetB0(weights='imagenet', include_top=False,input_shape = (224,224,3))
base_model2.summary()

In [10]:
from tensorflow.keras.layers import GlobalAveragePooling2D, Concatenate
from tensorflow.keras.models import Model

# Extract intermediate layers
layer4 = base_model2.get_layer('block2a_expand_activation').output
layer5 = base_model2.get_layer('block4a_expand_activation').output
layer6 = base_model2.get_layer('block6a_expand_activation').output
last_layer = base_model2.get_layer('top_activation').output  # Usually the last conv layer before classification

# Apply Global Average Pooling to each layer
layer4 = GlobalAveragePooling2D()(layer4)
layer5 = GlobalAveragePooling2D()(layer5)
layer6 = GlobalAveragePooling2D()(layer6)
last_layer = GlobalAveragePooling2D()(last_layer)  # 1280 features

# Concatenate all features
concatenated_features2 = Concatenate()([layer4, layer5, layer6, last_layer])

# Create the feature extraction model
feature_extraction_model2 = Model(inputs=base_model2.input, outputs=concatenated_features2)

# Check the model summary
feature_extraction_model2.summary()


In [11]:
#obtain file names 
import os
# path to the datasets
db_root_dir = 'C:\\Data Drive\\Datasets\\corel-1k\\dataset\\training_set'


In [15]:
query_root_dir = "C:\\Data Drive\\Datasets\\corel-1k\\dataset\\test_set"   # <-- put your query images here

In [17]:
def extract_and_save_features(model, images, labels, save_path=None):
    """Extract features from a model, optionally save to NPZ."""
    features = model.predict(images, batch_size=32, verbose=1)
    if save_path:
        np.savez(save_path, features=features, labels=labels)
        print(f"✅ Saved features to {save_path}")
    return features, labels



In [19]:

# Database set
db_images, db_labels, class_names = load_dataset_from_folders(db_root_dir)
print("DB loaded:", db_images.shape, db_labels.shape, "Classes:", len(class_names))

# Query set
query_images, query_labels, _ = load_dataset_from_folders(query_root_dir)
print("Queries loaded:", query_images.shape, query_labels.shape)


DB loaded: (900, 224, 224, 3) (900,) Classes: 10
Queries loaded: (100, 224, 224, 3) (100,)


In [21]:
# MobileNet
db_feats_mobilenet, _ = extract_and_save_features(feature_extraction_model1, db_images, db_labels, "mobilenet_features.npz")
query_feats_mobilenet, _ = extract_and_save_features(feature_extraction_model1, query_images, query_labels)

# EfficientNet
db_feats_efficientnet, _ = extract_and_save_features(feature_extraction_model2, db_images, db_labels, "efficientnet_features.npz")
query_feats_efficientnet, _ = extract_and_save_features(feature_extraction_model2, query_images, query_labels)

# Combined
db_feats_combined = np.concatenate([db_feats_mobilenet, db_feats_efficientnet], axis=1)
query_feats_combined = np.concatenate([query_feats_mobilenet, query_feats_efficientnet], axis=1)
np.savez("combined_features.npz", features=db_feats_combined, labels=db_labels)


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 737ms/step
✅ Saved features to mobilenet_features.npz
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 503ms/step
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 1s/step
✅ Saved features to efficientnet_features.npz
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 834ms/step


In [27]:
import time
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import precision_score, recall_score, f1_score, average_precision_score
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve, roc_curve, auc

# -------------------
# Retrieval Function
# -------------------
def run_retrieval_with_queries(features_db, labels_db, features_query, labels_query, algo="brute", metric="euclidean", k=90):
    start = time.time()
    nbrs = NearestNeighbors(n_neighbors=k, algorithm="brute" if algo=="bruteforce" else algo, metric=metric)
    nbrs.fit(features_db)

    distances, indices = nbrs.kneighbors(features_query)
    scores = -distances  # higher is better

    mAPs, recalls, precisions, f1s = [], [], [], []
    for i in range(len(labels_query)):
        retrieved_labels = labels_db[indices[i]]
        q_label = labels_query[i]

        # binary relevance
        relevant = (retrieved_labels == q_label).astype(int)

        total_relevant = np.sum(labels_db == q_label)
        if total_relevant == 0:
            continue

        # Precision@k, Recall@k
        prec = np.sum(relevant) / len(relevant)
        rec = np.sum(relevant) / total_relevant
        f1 = (2 * prec * rec) / (prec + rec + 1e-8)

        # Average Precision
        ap = average_precision_score(relevant, scores[i])

        precisions.append(prec)
        recalls.append(rec)
        f1s.append(f1)
        mAPs.append(ap)

    end = time.time()
    art = (end - start) * 1000 / len(labels_query)  # ms per query

    return np.mean(mAPs), np.mean(recalls), np.mean(f1s), art, scores, indices


# -------------------
# Curve Plotting
# -------------------
def plot_curves(query_labels, scores, indices, db_labels, title, outdir, prefix, k=90):
    y_true, y_score = [], []
    for i in range(len(query_labels)):
        retrieved_labels = db_labels[indices[i]]
        rels = (retrieved_labels == query_labels[i]).astype(int)
        y_true.extend(rels)
        y_score.extend(scores[i])

    # Precision-Recall
    precision, recall, _ = precision_recall_curve(y_true, y_score)
    plt.figure()
    plt.plot(recall, precision, label="PR curve")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title(f"Precision-Recall: {title}")
    plt.legend()
    plt.savefig(f"{outdir}/{prefix}_PR.png")
    plt.close()

    # ROC
    fpr, tpr, _ = roc_curve(y_true, y_score)
    roc_auc = auc(fpr, tpr)
    plt.figure()
    plt.plot(fpr, tpr, label=f"ROC curve (AUC={roc_auc:.2f})")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title(f"ROC: {title}")
    plt.legend()
    plt.savefig(f"{outdir}/{prefix}_ROC.png")
    plt.close()


# -------------------
# Run Experiments
# -------------------
outdir = "results"
cutoffs = [1, 5, 10, 20, 50]
k_curve = 90

results = []

for model_name, db_feats, query_feats in [
    ("MobileNet", db_feats_mobilenet, query_feats_mobilenet),
    ("EfficientNetB0-Intermediate", db_feats_efficientnet, query_feats_efficientnet),
    ("Combined", db_feats_combined, query_feats_combined)
]:
    for algo in ["bruteforce", "ball_tree", "kd_tree"]:
        print(f"\nRunning {model_name} with {algo}...")

        mAP, mAR, F1, art, scores, indices = run_retrieval_with_queries(
            db_feats, db_labels, query_feats, query_labels,
            algo=algo, metric="euclidean", k=k_curve
        )

        # Store results
        results.append([model_name, algo, mAP, mAR, F1, art])
        print(f"  mAP={mAP:.4f}, Recall={mAR:.4f}, F1={F1:.4f}, ART={art:.2f} ms/query")

        # Plot curves
        plot_curves(query_labels, scores, indices, db_labels, model_name, outdir, f"{model_name}_{algo}", k=k_curve)

# Save summary
summary_df = pd.DataFrame(results, columns=["Model", "Algorithm", "mAP", "mAR", "F1", "ART(ms)"])
summary_df.to_csv("retrieval_summary.csv", index=False)
print("\nSaved retrieval_summary.csv")



Running MobileNet with bruteforce...
  mAP=0.9012, Recall=0.7889, F1=0.7889, ART=4.49 ms/query

Running MobileNet with ball_tree...
  mAP=0.9012, Recall=0.7889, F1=0.7889, ART=3.22 ms/query

Running MobileNet with kd_tree...
  mAP=0.9012, Recall=0.7889, F1=0.7889, ART=4.14 ms/query

Running EfficientNetB0-Intermediate with bruteforce...




  mAP=0.4804, Recall=0.3340, F1=0.3340, ART=5.74 ms/query

Running EfficientNetB0-Intermediate with ball_tree...




  mAP=0.4804, Recall=0.3340, F1=0.3340, ART=6.33 ms/query

Running EfficientNetB0-Intermediate with kd_tree...




  mAP=0.4804, Recall=0.3340, F1=0.3340, ART=10.50 ms/query

Running Combined with bruteforce...
  mAP=0.9013, Recall=0.7900, F1=0.7900, ART=4.65 ms/query

Running Combined with ball_tree...
  mAP=0.9013, Recall=0.7900, F1=0.7900, ART=13.02 ms/query

Running Combined with kd_tree...
  mAP=0.9013, Recall=0.7900, F1=0.7900, ART=20.68 ms/query

Saved retrieval_summary.csv


In [28]:
import time
from tensorflow.keras.applications import MobileNet  
from tensorflow.keras.applications.mobilenet import preprocess_input as mobilenet_preprocess
from tensorflow.keras.applications.efficientnet import preprocess_input as efficientnet_preprocess
import time
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree
# Paths
features_dir = "C:\\Users\\asus"

outdir = "C:\\Users\\asus\\results"
k=5
for algo in ["bruteforce", "ball_tree", "kd_tree"]:
    print(f"Running MobileNet with {algo}...")
    mAP, mAR, F1, art, scores, indices = run_retrieval_with_queries(
        db_feats_mobilenet, db_labels, query_feats_mobilenet, query_labels,
        algo=algo, metric="euclidean", k=k
    )
    plot_curves(query_labels, scores, indices, db_labels, "MobileNet", outdir, f"mobilenet_{algo}", k=k)

    print(f"Running EfficientNet with {algo}...")
    mAP, mAR, F1, art, scores, indices = run_retrieval_with_queries(
        db_feats_efficientnet, db_labels, query_feats_efficientnet, query_labels,
        algo=algo, metric="euclidean", k=k
    )
    plot_curves(query_labels, scores, indices, db_labels, "EfficientNet", outdir, f"efficientnet_{algo}", k=k)

    print(f"Running Combined with {algo}...")
    mAP, mAR, F1, art, scores, indices = run_retrieval_with_queries(
        db_feats_combined, db_labels, query_feats_combined, query_labels,
        algo=algo, metric="euclidean", k=k
    )
    plot_curves(query_labels, scores, indices, db_labels, "Combined", outdir, f"combined_{algo}", k=k)


Running MobileNet with bruteforce...




Running EfficientNet with bruteforce...




Running Combined with bruteforce...




Running MobileNet with ball_tree...




Running EfficientNet with ball_tree...




Running Combined with ball_tree...




Running MobileNet with kd_tree...




Running EfficientNet with kd_tree...




Running Combined with kd_tree...


