In [1]:
import os
import cv2
import random
import numpy as np
import glob

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from PIL import Image as PILImage
from PIL.ExifTags import TAGS

from IPython.display import display

import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from keras_resnet.models import ResNet18 
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Input
from tensorflow.keras import regularizers

from keras.layers import Layer

from tensorflow.keras.models import load_model

2025-01-28 17:27:55.821101: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-28 17:27:55.949975: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-28 17:27:55.991164: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-28 17:27:56.251014: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
BATCH_SIZE = 32
IMAGE_SIZE = (256, 256)

In [3]:
def imshow(a, size=1.0):
    # Clip and convert the image to uint8
    a = a.clip(0, 255).astype("uint8")
    
    # Resize the image if a size factor is provided
    if size != 1.0:
        new_dim = (int(a.shape[1] * size), int(a.shape[0] * size))
        a = cv2.resize(a, new_dim, interpolation=cv2.INTER_AREA)
    
    
    # Display the image
    display(PILImage.fromarray(a))

In [4]:
def get_label(file_path):
    label = file_path.split("/")[-2]
    label = label.split(".")[-2]
    label = int(label)
    return label

In [5]:
def resize_crop(image, image_size=(256, 256)):
    target_height, target_width = image_size
    original_height, original_width = image.shape[:2]

    original_aspect = original_width / original_height
    target_aspect = target_width / target_height

    if original_aspect > target_aspect:
        new_width = original_height
        crop_x = (original_width - new_width) // 2
        cropped_image = image[:, crop_x:crop_x + new_width]
    elif original_aspect < target_aspect:
        new_height = original_width
        crop_y = (original_height - new_height) // 2
        cropped_image = image[crop_y:crop_y + new_height, :]
    else:
        cropped_image = image

    resized_image = cv2.resize(cropped_image, (target_width, target_height))
    return resized_image

def preprocess(image):
    image = image / 255.0
    image = resize_crop(image)
    image = cv2.GaussianBlur(image, (5, 5), 0)
    return image

def read_image(image_file):
    image = cv2.imread(image_file)
    image = preprocess(image) 
    return image  

In [6]:
def generate_triplets(file_paths, labels):
    label_to_indices = {}
    for idx, label in enumerate(labels):
        if label not in label_to_indices:
            label_to_indices[label] = []
        label_to_indices[label].append(idx)
    
    triplets = []
    
    for i in range(len(file_paths)):
        # Select an anchor image and its label
        anchor_idx = i
        anchor_label = labels[anchor_idx]

        # Select a positive image (same label)
        positive_idx = random.choice(label_to_indices[anchor_label])
        while positive_idx == anchor_idx:
            positive_idx = random.choice(label_to_indices[anchor_label])

        # Select a negative image (different label)
        negative_label = random.choice([l for l in label_to_indices.keys() if l != anchor_label])
        negative_idx = random.choice(label_to_indices[negative_label])

        triplets.append((file_paths[anchor_idx], file_paths[positive_idx], file_paths[negative_idx]))
    
    random.shuffle(triplets)
    return triplets

In [7]:
class DataGenerator(Sequence):
    def __init__(self, triplets, batch_size, image_size, **kwargs):
        super().__init__(**kwargs)
        self.triplets = triplets
        self.batch_size = batch_size
        self.image_size = image_size
      
    def __len__(self):
        return len(self.triplets) // self.batch_size

    def __getitem__(self, index):
        # Get batch of triplets
        batch_triplets = self.triplets[index * self.batch_size : (index + 1) * self.batch_size]
        
        # Prepare arrays for images
        anchors, positives, negatives = [], [], []
        for anchor_path, positive_path, negative_path in batch_triplets:
            # Load and normalize images
            anchors.append(read_image(anchor_path) / 255.0)
            positives.append(read_image(positive_path) / 255.0)
            negatives.append(read_image(negative_path) / 255.0)

        # Convert lists to arrays and return
        anchors = np.array(anchors)
        positives = np.array(positives)
        negatives = np.array(negatives)
        
     
        
        #print(f"Anchors shape: {anchors.shape}, Positives shape: {positives.shape}, Negatives shape: {negatives.shape}")
        return (anchors, positives, negatives), np.zeros((self.batch_size, 1))
        

In [8]:
data_folder = "Data_Final/*"
image_files = glob.glob(os.path.join(data_folder, "*.jpg"), recursive=True)

labels = [get_label(file_path) for file_path in image_files]

train_x, val_x, train_y, val_y = train_test_split(image_files, labels, test_size=0.1, random_state=42, stratify=labels)   

In [9]:
train_triplets = generate_triplets(train_x, train_y)
val_triplets = generate_triplets(val_x, val_y)

train_gen = DataGenerator(train_triplets, batch_size=BATCH_SIZE, image_size=(256, 256, 3))
val_gen = DataGenerator(val_triplets, batch_size=BATCH_SIZE, image_size=(256, 256, 3))

In [10]:
class L2Normalization(Layer):
    def call(self, inputs):
        return tf.math.l2_normalize(inputs, axis=1)

In [11]:
# def build_embedding_network(input_shape=(256, 256, 3), embedding_dim=512):
#     inputs = layers.Input(shape=input_shape)

#     # Feature extraction block
#     x = layers.Conv2D(32, (7, 7), strides=2, padding='same', activation='relu')(inputs)
#     x = layers.BatchNormalization()(x)
#     x = layers.MaxPooling2D((3, 3), strides=2, padding='same')(x)

#     # Second block
#     x = layers.Conv2D(64, (5, 5), strides=2, padding='same', activation='relu')(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.MaxPooling2D((3, 3), strides=2, padding='same')(x)

#     # Third block
#     x = layers.Conv2D(128, (3, 3), strides=1, padding='same', activation='relu')(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.Conv2D(128, (3, 3), strides=1, padding='same', activation='relu')(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.MaxPooling2D((2, 2), strides=2, padding='same')(x)
    
#     # Feature extraction block 4 (Additional deeper block)
#     x = layers.Conv2D(256, (3, 3), strides=1, padding='same', activation='relu')(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.Conv2D(256, (3, 3), strides=1, padding='same', activation='relu')(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.MaxPooling2D((2, 2), strides=2, padding='same')(x)
    
#      # Feature extraction block 5 (Even deeper block)
#     x = layers.Conv2D(512, (3, 3), strides=1, padding='same', activation='relu')(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.Conv2D(512, (3, 3), strides=1, padding='same', activation='relu')(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.MaxPooling2D((2, 2), strides=2, padding='same')(x)

#     # Bottleneck and global pooling
#     x = layers.Conv2D(1024, (1, 1), activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
#     x = layers.BatchNormalization()(x)
#     x = layers.GlobalAveragePooling2D()(x)

#     # Dense layers for embedding
#     x = layers.Dense(embedding_dim, activation='relu')(x)
#     x = L2Normalization()(x)

#     # Add dropout to prevent overfitting
#     x = layers.Dropout(0.5)(x)

#     # Create the model
#     model = models.Model(inputs, x, name="EmbeddingNetwork")
#     return model

# Build the model
#embedding_model = load_model("Models/embedding_extractor_custom_big.keras",
#                             custom_objects={"L2Normalization": L2Normalization})
#embedding_model.summary()

In [12]:
embedding_model = load_model("Models/embedding_model_julka.keras")
embedding_model.summary()

I0000 00:00:1738081684.897750    1325 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738081685.142527    1325 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738081685.142647    1325 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738081685.145654    1325 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738081685.145765    1325 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:0

In [13]:
def build_siamese_model_with_resnet(embedding_model, input_shape=(256, 256, 3)):
    # Inputs for anchor, positive, and negative images
    anchor_input = layers.Input(name="anchor", shape=input_shape)
    positive_input = layers.Input(name="positive", shape=input_shape)
    negative_input = layers.Input(name="negative", shape=input_shape)
    

    # Pass each input through the embedding network
    anchor_embedding = embedding_model(anchor_input)
    positive_embedding = embedding_model(positive_input)
    negative_embedding = embedding_model(negative_input)

    embeddings = layers.Lambda(lambda x: tf.concat(x, axis=1))(
        [anchor_embedding, positive_embedding, negative_embedding]
    )
    
    # Combine embeddings into a Siamese model
    siamese_model = models.Model(
        inputs=[anchor_input, positive_input, negative_input],
        outputs=embeddings
    )

    return siamese_model

siamese_model = build_siamese_model_with_resnet(embedding_model)
siamese_model.summary()

In [14]:
def triplet_loss(y_true, y_pred, margin=0.2):
    # Split y_pred into anchor, positive, and negative
    anchor, positive, negative = tf.split(y_pred, num_or_size_splits=3, axis=1)
    
    # # Compute distances
    pos_similarity = tf.reduce_sum(anchor * positive, axis=1)  # Dot product
    neg_similarity = tf.reduce_sum(anchor * negative, axis=1)
    
    # Convert similarity to distance
    pos_dist = 1 - pos_similarity
    neg_dist = 1 - neg_similarity
    

    # # Compute triplet loss
    loss = tf.maximum(pos_dist - neg_dist + margin, 0.0)
    return tf.reduce_mean(loss)

In [15]:
siamese_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), loss=triplet_loss)

early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)

# Train the model
siamese_model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=50,
    callbacks=[early]
)

Epoch 1/50


I0000 00:00:1738081698.450921    1745 service.cc:146] XLA service 0x7f7c78019040 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1738081698.450963    1745 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6
2025-01-28 17:28:18.600144: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-01-28 17:28:19.399142: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907
2025-01-28 17:28:20.721299: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.02GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2025-01-28 17:28:20.747028: W tensorflow/core/framework/op_kernel.cc:1840] O

UnknownError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/runpy.py", line 86, in _run_code

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 712, in start

  File "/home/marroj/.local/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/asyncio/base_events.py", line 1906, in _run_once

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 499, in process_one

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 730, in execute_request

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 383, in do_execute

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 528, in run_cell

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3075, in run_cell

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3130, in _run_cell

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3334, in run_cell_async

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3517, in run_ast_nodes

  File "/home/marroj/miniconda3/envs/CV2/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code

  File "/tmp/ipykernel_1325/2497158694.py", line 6, in <module>

  File "/home/marroj/.local/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/marroj/.local/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 320, in fit

  File "/home/marroj/.local/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator

Failed to determine best cudnn convolution algorithm for:
%cudnn-conv-bias-activation.45 = (f32[32,128,256,256]{3,2,1,0}, u8[0]{0}) custom-call(f32[32,3,256,256]{3,2,1,0} %transpose.76, f32[128,3,3,3]{3,2,1,0} %transpose.77, f32[128]{0} %arg5.6), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", metadata={op_type="Conv2D" op_name="functional_1/functional_1_1/conv2d_85_1/convolution" source_file="/home/marroj/.local/lib/python3.10/site-packages/tensorflow/python/framework/ops.py" source_line=1177}, backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"conv_result_scale":1,"activation_mode":"kNone","side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false}

Original error: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 1090519040 bytes.

To ignore this failure and try to use a fallback algorithm (which may have suboptimal performance), use XLA_FLAGS=--xla_gpu_strict_conv_algorithm_picker=false.  Please also file a bug for the root cause of failing autotuning.
	 [[{{node StatefulPartitionedCall}}]] [Op:__inference_one_step_on_iterator_6692]

In [16]:
#siamese_model.save("siamese_resnet_model.keras")

In [18]:
embedding_model.save("Models/contrastive_triplet.keras")