In [120]:
import cv2 as cv
import numpy as np
from glob import glob
import tensorflow	as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.models import Model
import os, random
from tensorflow.nn import softmax

In [121]:
files = []

folder_path = "image.orig"

for item in os.listdir(folder_path):
    item_path = os.path.join(folder_path, item)  # Get full path
    if item.split(".")[1] == "jpg":
        files += [f"{item}"]

print(files)

['0.jpg', '1.jpg', '10.jpg', '100.jpg', '101.jpg', '102.jpg', '103.jpg', '104.jpg', '105.jpg', '106.jpg', '107.jpg', '108.jpg', '109.jpg', '11.jpg', '110.jpg', '111.jpg', '112.jpg', '113.jpg', '114.jpg', '115.jpg', '116.jpg', '117.jpg', '118.jpg', '119.jpg', '12.jpg', '120.jpg', '121.jpg', '122.jpg', '123.jpg', '124.jpg', '125.jpg', '126.jpg', '127.jpg', '128.jpg', '129.jpg', '13.jpg', '130.jpg', '131.jpg', '132.jpg', '133.jpg', '134.jpg', '135.jpg', '136.jpg', '137.jpg', '138.jpg', '139.jpg', '14.jpg', '140.jpg', '141.jpg', '142.jpg', '143.jpg', '144.jpg', '145.jpg', '146.jpg', '147.jpg', '148.jpg', '149.jpg', '15.jpg', '150.jpg', '151.jpg', '152.jpg', '153.jpg', '154.jpg', '155.jpg', '156.jpg', '157.jpg', '158.jpg', '159.jpg', '16.jpg', '160.jpg', '161.jpg', '162.jpg', '163.jpg', '164.jpg', '165.jpg', '166.jpg', '167.jpg', '168.jpg', '169.jpg', '17.jpg', '170.jpg', '171.jpg', '172.jpg', '173.jpg', '174.jpg', '175.jpg', '176.jpg', '177.jpg', '178.jpg', '179.jpg', '18.jpg', '180.jpg', 

In [122]:

images = []
labels = ["africa", "beach", "ancient", "bus", "dinosaur", "elephant",  "flower"    , "horse", "mountain", "food"]
train_labels = []

# Target size 
IMG_HEIGHT = 128
IMG_WIDTH = 128

for file in files:
    img = cv.imread("image.orig/" + file)
    if img is not None:
        img = cv.resize(img, (IMG_WIDTH, IMG_HEIGHT))  # Resize to 32x32
        images.append(img)

for i in range(len(labels)):
    train_labels += [i] * 100



In [123]:
# --- Helper functions for augmentation ---
def rotate_image(img, angle_range=(-15, 15)):
    angle = np.random.uniform(*angle_range)
    h, w = img.shape[:2]
    M = cv.getRotationMatrix2D((w/2, h/2), angle, 1)
    return cv.warpAffine(img, M, (w, h))

def random_flip(img):
    if np.random.rand() > 0.5:
        img = cv.flip(img, 1)  # horizontal flip
    return img

def random_brightness(img, factor=0.2):
    # Ensure image is 3-channel uint8
    if len(img.shape) != 3 or img.shape[2] != 3:
        img = cv.cvtColor(img, cv.COLOR_GRAY2BGR)

    img = img.astype(np.uint8)
    hsv = cv.cvtColor(img, cv.COLOR_BGR2HSV)
    h, s, v = cv.split(hsv)
    # Ensure v is uint8
    v = np.clip(v.astype(np.float32) * (1 + np.random.uniform(-factor, factor)), 0, 255).astype(np.uint8)
    hsv = cv.merge([h, s, v])
    img_out = cv.cvtColor(hsv, cv.COLOR_HSV2BGR)
    return img_out

def augment_image(img):
    img = rotate_image(img)
    img = random_flip(img)
    img = random_brightness(img)
    return img

# --- Shuffle dataset ---
combined = list(zip(images, train_labels))
random.shuffle(combined)
images, train_labels = zip(*combined)
images = list(images)
train_labels = list(train_labels)

# --- Split into training and testing sets ---
split_ratio = 0.8
split_index = int(len(images) * split_ratio)

original_train_images = images[:split_index]
train_labels_final = train_labels[:split_index]

test_images = np.array(images[split_index:])
test_labels_final = np.array(train_labels[split_index:])

# --- Augment training images to increase dataset size ---
augmented_images = []
augmented_labels = []

AUG_PER_IMAGE = 3  # number of augmented copies per original image

for img, label in zip(original_train_images, train_labels_final):
    augmented_images.append(img)          # keep original
    augmented_labels.append(label)
    for _ in range(AUG_PER_IMAGE):
        aug_img = augment_image(img)
        augmented_images.append(aug_img)
        augmented_labels.append(label)

# Convert to NumPy arrays and normalize
train_images = np.array(augmented_images).astype(np.float32) / 255.0
train_labels_final = np.array(augmented_labels)

# Test set remains unchanged and normalized
test_images = test_images.astype(np.float32) / 255.0

print(f"Training set after augmentation: {len(train_images)} images")
print(f"Testing set: {len(test_images)} images")

Training set after augmentation: 3200 images
Testing set: 200 images


In [124]:
model = models.Sequential([
    layers.Conv2D(4, (3,3), activation='relu', input_shape=(128,128,3)),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(8, (5,5), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(16, (5 ,5), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(16, (5 ,5), activation='relu'),
    layers.Flatten(),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(128)
])

In [125]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
model.fit(train_images, train_labels_final, epochs=10, batch_size=32, 
                    validation_data=(test_images, test_labels_final))

Epoch 1/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 30ms/step - accuracy: 0.0687 - loss: 3.9729 - val_accuracy: 0.1500 - val_loss: 2.2488
Epoch 2/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - accuracy: 0.1313 - loss: 2.6071 - val_accuracy: 0.2550 - val_loss: 2.0970
Epoch 3/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - accuracy: 0.2033 - loss: 2.2902 - val_accuracy: 0.3850 - val_loss: 1.9083
Epoch 4/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - accuracy: 0.2420 - loss: 2.1185 - val_accuracy: 0.3850 - val_loss: 1.7847
Epoch 5/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - accuracy: 0.3232 - loss: 1.9176 - val_accuracy: 0.4250 - val_loss: 1.6696
Epoch 6/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - accuracy: 0.3386 - loss: 1.8246 - val_accuracy: 0.4300 - val_loss: 1.6077
Epoch 7/10
[1m100/100

<keras.src.callbacks.history.History at 0x1995ba2e4d0>

In [117]:
def retrieval(model):
    # --- Choose query image ---
    print("1: beach\n2: mountain\n3: food\n4: dinosaur\n5: flower\n6: horse\n7: elephant")
    choice = input("Type the number to choose a category: ")

    query_file_map = {
        '1': 'beach.jpg',
        '2': 'mountain.jpg',
        '3': 'food.jpg',
        '4': 'dinosaur.jpg',
        '5': 'flower.jpg',
        '6': 'horse.jpg',
        '7': 'elephant.jpg'
    }

    if choice not in query_file_map:
        print("Invalid choice")
        return

    src_path = os.path.join("image.query", query_file_map[choice])
    src_input = cv.imread(src_path)
    src_input_resized = cv.resize(src_input, (IMG_WIDTH, IMG_HEIGHT))
    src_input_resized = src_input_resized.astype(np.float32) / 255.0
    src_input_resized = np.expand_dims(src_input_resized, axis=0)  # batch dimension

    print(f"You chose: {query_file_map[choice]}")

    cv.imshow("Query", cv.resize(src_input, (256, 256)))

    # --- Load database images ---
    database_dir = "image.orig"
    database_files = sorted(glob(os.path.join(database_dir, "*.jpg")))

    db_images = []
    for file in database_files:
        img = cv.imread(file)
        img = cv.resize(img, (IMG_WIDTH, IMG_HEIGHT))
        img = img.astype(np.float32) / 255.0
        db_images.append(img)
    db_images = np.array(db_images)

    # --- Predict logits ---
    query_logit = model.predict(src_input_resized)  # shape: (1, 10)
    db_logits = model.predict(db_images)            # shape: (num_images, 10)

    # --- Compute Euclidean distances ---
    distances = np.linalg.norm(db_logits - query_logit, axis=1)
    closest_idx = np.argmin(distances)
    closest_file = database_files[closest_idx]

    print(f"The most similar image is {closest_file} with distance {distances[closest_idx]:.4f}")

    # --- Show result ---
    closest_img = cv.imread(closest_file)
    cv.imshow("Closest Match", cv.resize(closest_img, (256, 256)))
    cv.waitKey(0)
    cv.destroyAllWindows()
    exit
    
retrieval(model)
exit

1: beach
2: mountain
3: food
4: dinosaur
5: flower
6: horse
7: elephant


You chose: beach.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 242ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step
The most similar image is image.orig\264.jpg with distance 15.7288


<IPython.core.autocall.ZMQExitAutocall at 0x1992ed19c10>