# Siamese network


In [122]:
import random
import cv2
import numpy as np
import PIL
import os
import sys
import glob
from PIL import Image

from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Lambda
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split

In [None]:
# import kagglehub

# # Download latest version
# path = kagglehub.dataset_download("jessicali9530/caltech256")

# print("Path to dataset files:", path)

In [82]:
def imshow(a, size=1.0):
    # if image was normalized
    if a.max() <= 1.0:
        a = (a * 255).astype("uint8")

    # Clip and convert the image to uint8
    elif a.dtype != np.uint8:
        a = np.clip(a, 0, 255).astype("uint8")
    
    # Resize the image if a size factor is provided
    if size != 1.0:
        new_dim = (int(a.shape[1] * size), int(a.shape[0] * size))
        a = cv2.resize(a, new_dim, interpolation=cv2.INTER_AREA)
    
    # Convert color format if needed
    if a.ndim == 3:
        if a.shape[2] == 4:
            a = cv2.cvtColor(a, cv2.COLOR_BGRA2RGBA)
        else:
            a = cv2.cvtColor(a, cv2.COLOR_BGR2RGB)
    
    # Display the image
    display(PIL.Image.fromarray(a))

1. load data 

In [74]:
base_dir = "Data"

def read_images(base_dir, number_of_folders):
    folder_names = os.listdir(base_dir)
    folder_names = folder_names[:number_of_folders]
    images = []
    labels = []
    for folder in folder_names:
        label = folder[:3]
        image_files = glob.glob(os.path.join(base_dir, folder, "*.jpg"))
        for image_file in image_files:
            image = cv2.imread(image_file)
            images.append(image)
            labels.append(label)
    return images, labels

images, labels = read_images(base_dir, 10)

2. preprocessing
- normalizing 0 - 1 or mean center
- resizing or cropping (resizing may cause streching), to a size 256x256
- augmentations if necessary

In [53]:
def resize_crop(image, size):

    target_height, target_width = size
    original_height, original_width = image.shape[:2]

    original_aspect = original_width / original_height
    target_aspect = target_width / target_height

    if original_aspect > target_aspect:
        # the image is too wide and we need to crop the width
        new_width = original_height
        crop_x = (original_width - new_width) // 2
        cropped_image = image[:, crop_x:crop_x + new_width]

    elif original_aspect < target_aspect:
        # the image is too narrow and we need to crop the height
        new_height = original_width
        crop_y = (original_height - new_height) // 2
        cropped_image = image[crop_y:crop_y + new_height, :]
    else:
        cropped_image = image

    resized_image = cv2.resize(cropped_image, (target_width, target_height))

    return resized_image


In [87]:
# i = 258
# imshow(images[i])
# image_cropped = resize_crop(images[i], (256, 256))
# imshow(image_cropped)

In [88]:
def preprocess(image):
    
    image = image / 255.0
    image = resize_crop(image, (256, 256))
    image = cv2.GaussianBlur(image, (5, 5), 0)

    return image


In [93]:
# i = 1102
# image = images[i]
# image_processed = preprocess(image)
# imshow(image)
# imshow(image_processed)

In [94]:
images = [preprocess(image) for image in images]

3. create image pairs, such that if the pair is of the same type of object the pair gets a label 1 and if of different type gets label 0, e.g. if the pair consists of two cats (from the same folder) it get a label 1

In [101]:
# prepare the pairs of images, choose randomly, label for the pair is 1 if the images are from the same class, 0 otherwise
# maybe a good idea to ensure that the number of pairs with label 1 is equal to the number of pairs with label 0, or other ratio


In [113]:
def prepare_pairs(images, labels, num_pairs=1000):

    images = np.array(images)
    labels = np.array(labels)

    label_to_images = {} # dictionary with labels as keys and list of images from a group as values
    for i, label in enumerate(labels):
        label_to_images.setdefault(label, []).append(images[i])
    
    pairs = []
    pair_labels = []

    # generate more or less the same number of positive and negative pairs
    num_positive_pairs = num_pairs // 2 
    num_negative_pairs = num_pairs - num_positive_pairs

    for _ in range(num_positive_pairs):
        label = random.choice(list(label_to_images.keys())) 
        image1, image2 = random.sample(label_to_images[label], 2) # two images from the same class
        pairs.append((image1, image2))
        pair_labels.append(1)

    for _ in range(num_negative_pairs):
        label1, label2 = random.sample(list(label_to_images.keys()), 2) # two different classes
        image1 = random.choice(label_to_images[label1])
        image2 = random.choice(label_to_images[label2])
        pairs.append((image1, image2))
        pair_labels.append(0)
    
    # shuffle the pairs
    combined = list(zip(pairs, pair_labels))
    random.shuffle(combined)
    pairs, pair_labels = zip(*combined)

    return np.array(pairs), np.array(pair_labels)



In [114]:
pairs, pair_labels = prepare_pairs(images, labels, num_pairs=1000)

In [115]:
# pairs[0]
# for i in range(5):
#     imshow(pairs[i][0])
#     imshow(pairs[i][1])
#     print(pair_labels[i])


numpy.ndarray

4. siamese model 

In [118]:
# base model that will be used to extract features from the images, creates a feature vector of size 128
def build_base_network(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv2D(64, (7, 7), activation="relu", padding="same")(inputs)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(128, (5, 5), activation="relu", padding="same")(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(256, (3, 3), activation="relu", padding="same")(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Flatten()(x)
    outputs = Dense(128, activation="relu")(x) # 128-dimensional feature vector
    return Model(inputs, outputs)

# full siamese network that takes two images as input and outputs a binary classification
def build_siamese_network(input_shape):
    base_network = build_base_network(input_shape)
    
    input_a = Input(shape=input_shape)
    input_b = Input(shape=input_shape)
    
    encoded_a = base_network(input_a)
    encoded_b = base_network(input_b)
    
    # Compute the L1 distance between the encoded vectors
    l1_distance = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1]))([encoded_a, encoded_b])
    
    outputs = Dense(1, activation="sigmoid")(l1_distance)
    
    return Model(inputs=[input_a, input_b], outputs=outputs)


In [119]:
input_shape = (256, 256, 3) 
siamese_model = build_siamese_network(input_shape)
siamese_model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])





In [120]:
train_pairs, val_pairs, train_labels, val_labels = train_test_split(
    pairs, pair_labels, test_size=0.2, random_state=42
)

train_pairs_1 = np.array([pair[0] for pair in train_pairs])
train_pairs_2 = np.array([pair[1] for pair in train_pairs])
val_pairs_1 = np.array([pair[0] for pair in val_pairs])
val_pairs_2 = np.array([pair[1] for pair in val_pairs])


In [121]:
early_stopping = EarlyStopping(
    monitor="val_loss", 
    patience=5,         
    restore_best_weights=True  
)

history = siamese_model.fit(
    [train_pairs_1, train_pairs_2],
    train_labels,
    validation_data=([val_pairs_1, val_pairs_2], val_labels),
    batch_size=32,
    epochs=10,
    callbacks=[early_stopping]  
)


Epoch 1/10
[1m 1/25[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6:02[0m 15s/step - accuracy: 0.6562 - loss: 0.6901

KeyboardInterrupt: 