In [1]:
import os
import cv2
import random
import numpy as np
import glob

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from PIL import Image as PILImage
from PIL.ExifTags import TAGS

from IPython.display import display

import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from keras_resnet.models import ResNet18 
from tensorflow.keras.models import Model
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Input, GlobalAveragePooling2D
from tensorflow.keras import regularizers

from keras.layers import Layer

from tensorflow.keras.models import load_model

2025-01-29 15:55:32.113204: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-29 15:55:32.210179: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-29 15:55:32.239388: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-29 15:55:32.449232: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
BATCH_SIZE = 32
IMAGE_SIZE = (256, 256)

In [3]:
def imshow(a, size=1.0):
    # Clip and convert the image to uint8
    a = a.clip(0, 255).astype("uint8")
    
    # Resize the image if a size factor is provided
    if size != 1.0:
        new_dim = (int(a.shape[1] * size), int(a.shape[0] * size))
        a = cv2.resize(a, new_dim, interpolation=cv2.INTER_AREA)
    
    
    # Display the image
    display(PILImage.fromarray(a))

In [4]:
def get_label(file_path):
    label = file_path.split("/")[-2]
    label = label.split(".")[-2]
    label = int(label)
    return label

In [5]:
#Use different read image function if model needs

def read_image(file_path):
    img = cv2.imread(file_path)
    img = cv2.resize(img, IMAGE_SIZE)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

In [6]:
def generate_triplets(file_paths, labels):
    label_to_indices = {}
    for idx, label in enumerate(labels):
        if label not in label_to_indices:
            label_to_indices[label] = []
        label_to_indices[label].append(idx)
    
    triplets = []
    
    for i in range(len(file_paths)):
        # Select an anchor image and its label
        anchor_idx = i
        anchor_label = labels[anchor_idx]

        # Select a positive image (same label)
        positive_idx = random.choice(label_to_indices[anchor_label])
        while positive_idx == anchor_idx:
            positive_idx = random.choice(label_to_indices[anchor_label])

        # Select a negative image (different label)
        negative_label = random.choice([l for l in label_to_indices.keys() if l != anchor_label])
        negative_idx = random.choice(label_to_indices[negative_label])

        triplets.append((file_paths[anchor_idx], file_paths[positive_idx], file_paths[negative_idx]))
    
    random.shuffle(triplets)
    return triplets

In [7]:
class DataGenerator(Sequence):
    def __init__(self, triplets, batch_size, image_size, **kwargs):
        super().__init__(**kwargs)
        self.triplets = triplets
        self.batch_size = batch_size
        self.image_size = image_size
      
    def __len__(self):
        return len(self.triplets) // self.batch_size

    def __getitem__(self, index):
        # Get batch of triplets
        batch_triplets = self.triplets[index * self.batch_size : (index + 1) * self.batch_size]
        
        anchors, positives, negatives = [], [], []
        for anchor_path, positive_path, negative_path in batch_triplets:
            anchors.append(read_image(anchor_path) / 255.0)
            positives.append(read_image(positive_path) / 255.0)
            negatives.append(read_image(negative_path) / 255.0)

        anchors = np.array(anchors)
        positives = np.array(positives)
        negatives = np.array(negatives)
        
     
        return (anchors, positives, negatives), np.zeros((self.batch_size, 1))
        

In [8]:
data_folder = os.path.abspath(os.path.join(os.getcwd(), "..", "Data_Final/*"))

image_files = glob.glob(os.path.join(data_folder, "*.jpg"), recursive=True)

labels = [get_label(file_path) for file_path in image_files]

train_x, val_x, train_y, val_y = train_test_split(image_files, labels, test_size=0.1, random_state=42, stratify=labels)   

print(len(image_files))

11496


In [9]:
train_triplets = generate_triplets(train_x, train_y)
val_triplets = generate_triplets(val_x, val_y)

train_gen = DataGenerator(train_triplets, batch_size=BATCH_SIZE, image_size=(256, 256, 3))
val_gen = DataGenerator(val_triplets, batch_size=BATCH_SIZE, image_size=(256, 256, 3))

In [10]:
class L2Normalization(Layer):
    def call(self, inputs):
        return tf.math.l2_normalize(inputs, axis=1)

In [11]:
def build_embedding_network(input_shape=(256, 256, 3), embedding_dim=512):
    inputs = layers.Input(shape=input_shape)

    # Feature extraction block
    x = layers.Conv2D(32, (7, 7), strides=2, padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((3, 3), strides=2, padding='same')(x)

    # Second block
    x = layers.Conv2D(64, (5, 5), strides=2, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((3, 3), strides=2, padding='same')(x)

    # Third block
    x = layers.Conv2D(128, (3, 3), strides=1, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(128, (3, 3), strides=1, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2), strides=2, padding='same')(x)
    
    # Feature extraction block 4 
    x = layers.Conv2D(256, (3, 3), strides=1, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(256, (3, 3), strides=1, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2), strides=2, padding='same')(x)
    

    # Bottleneck and global pooling
    x = layers.Conv2D(512, (1, 1), activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.GlobalAveragePooling2D()(x)

    # Dense layers for embedding
    x = layers.Dense(embedding_dim, activation='relu')(x)
    x = L2Normalization()(x)
    x = layers.Dropout(0.5)(x)

    model = models.Model(inputs, x, name="EmbeddingNetwork")
    return model


embedding_model = build_embedding_network(input_shape=(256, 256, 3), embedding_dim=512)
embedding_model.summary()

I0000 00:00:1738162535.685222  268710 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738162535.924217  268710 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738162535.924306  268710 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738162535.928658  268710 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738162535.928771  268710 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:0

In [12]:
def build_siamese_model_with_resnet(embedding_model, input_shape=(256, 256, 3)):
    # Inputs for anchor, positive, and negative images
    anchor_input = layers.Input(name="anchor", shape=input_shape)
    positive_input = layers.Input(name="positive", shape=input_shape)
    negative_input = layers.Input(name="negative", shape=input_shape)
    
    # Pass each input through the embedding network
    anchor_embedding = embedding_model(anchor_input)
    positive_embedding = embedding_model(positive_input)
    negative_embedding = embedding_model(negative_input)

    embeddings = layers.Lambda(lambda x: tf.concat(x, axis=1))(
        [anchor_embedding, positive_embedding, negative_embedding]
    )
    
    # Combine embeddings into a Siamese model
    siamese_model = models.Model(
        inputs=[anchor_input, positive_input, negative_input],
        outputs=embeddings
    )

    return siamese_model

#Use the embedding model of your choice, initially trained on classificaiton embedding models sometimes work better
siamese_model = build_siamese_model_with_resnet(embedding_model)
siamese_model.summary()

In [13]:
def triplet_loss(y_true, y_pred, margin=0.4):
    # Split y_pred into anchor, positive, and negative
    anchor, positive, negative = tf.split(y_pred, num_or_size_splits=3, axis=1)
    
    # # Compute distances
    pos_similarity = tf.reduce_sum(anchor * positive, axis=1)  # Dot product
    neg_similarity = tf.reduce_sum(anchor * negative, axis=1)
    
    # Convert similarity to distance
    pos_dist = 1 - pos_similarity
    neg_dist = 1 - neg_similarity
    

    # # Compute triplet loss
    loss = tf.maximum(pos_dist - neg_dist + margin, 0.0)
    return tf.reduce_mean(loss)

In [14]:
siamese_model.compile(optimizer='adam', loss=triplet_loss)

early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)

siamese_model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=50,
    callbacks=[early]
)

Epoch 1/50


I0000 00:00:1738162544.629862  268873 service.cc:146] XLA service 0x7f9dec005a20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1738162544.629937  268873 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6
2025-01-29 15:55:44.883930: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-01-29 15:55:46.166325: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907
I0000 00:00:1738162557.827649  268873 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


KeyboardInterrupt: 

In [31]:
embedding_model.save("resnet_exp_quick.keras")