In [15]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import math
import cv2
import PIL
import os
import sys
import glob
import random

from PIL import Image as PILImage
from PIL.ExifTags import TAGS

from IPython.display import display

import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50

In [16]:
def imshow(a, size=1.0):
    # Clip and convert the image to uint8
    a = a.clip(0, 255).astype("uint8")
    
    # Resize the image if a size factor is provided
    if size != 1.0:
        new_dim = (int(a.shape[1] * size), int(a.shape[0] * size))
        a = cv2.resize(a, new_dim, interpolation=cv2.INTER_AREA)
    
    # Convert color format if needed
    # if a.ndim == 3:
    #     if a.shape[2] == 4:
    #         a = cv2.cvtColor(a, cv2.COLOR_BGRA2RGBA)
    #     else:
    #         a = cv2.cvtColor(a, cv2.COLOR_BGR2RGB)
    
    # Display the image
    display(PIL.Image.fromarray(a))

In [17]:
def preproccess(image):
    image = cv2.resize(image, (256, 256))
    return image

In [18]:
data_folder = "Data/00*"
image_files = glob.glob(os.path.join(data_folder, "*.jpg"), recursive=True)


data_x = []
data_y = []

for image_file in image_files:
    image = cv2.imread(image_file)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = preproccess(image)
    
    label = image_file.split("/")[-2]
    label = label.split(".")[-2]
    label = int(label)
    
    data_x.append(image)
    data_y.append(label)

print(len(data_x), len(data_y))

1151 1151


In [19]:
def preprocess(image, label):
    # Perform preprocessing on the image
    image = tf.image.resize(image, [256, 256])  # Resize to 256x256
    image = tf.cast(image, tf.float32) / 255.0  # Normalize to [0, 1]
    return image, label

# Augmentation function
def augment(image, label):
    # Apply augmentations in [0, 1]
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    image = tf.image.rot90(image, k=tf.random.uniform([], 0, 4, dtype=tf.int32))
    image = tf.image.random_brightness(image, max_delta=0.2)
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
    image = tf.image.random_hue(image, max_delta=0.1)
    image = tf.image.random_saturation(image, lower=0.9, upper=1.1)
    noise = tf.random.normal(shape=tf.shape(image), mean=0.0, stddev=0.02, dtype=tf.float32)
    image = tf.add(image, noise)
    image = tf.clip_by_value(image, 0.0, 1.0)  # Ensure valid pixel range
    return image, label

# Data loading function
def load_data(filepaths, labels, batch_size=32):
    def parse_function(filepath, label):
        image = tf.io.read_file(filepath)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize(image, [256, 256])  # Ensure consistent shape
        return image, label

    # Create dataset
    dataset = tf.data.Dataset.from_tensor_slices((filepaths, labels))
    dataset = dataset.map(parse_function, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.map(augment, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.shuffle(buffer_size=1000).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

    return dataset

In [20]:
train_x, val_x, train_y, val_y = train_test_split(
    data_x, data_y, test_size=0.2, random_state=42, stratify=data_y
)

print(f"Training set size: {len(train_x)}")
print(f"Validation set size: {len(val_x)}")

Training set size: 920
Validation set size: 231


In [21]:
def generate_triplets(images, labels, num_triplets=10000):
    label_to_indices = {label: np.where(np.array(labels) == label)[0] for label in np.unique(labels)}
    
    triplets = []
    for _ in range(num_triplets):
        # Select an anchor image and its label
        anchor_idx = random.choice(range(len(images)))
        anchor_label = labels[anchor_idx]

        # Select a positive image (same label)
        positive_idx = random.choice(label_to_indices[anchor_label])
        while positive_idx == anchor_idx:
            positive_idx = random.choice(label_to_indices[anchor_label])

        # Select a negative image (different label)
        negative_label = random.choice([l for l in label_to_indices.keys() if l != anchor_label])
        negative_idx = random.choice(label_to_indices[negative_label])

        triplets.append((images[anchor_idx], images[positive_idx], images[negative_idx]))
    
    return triplets

In [22]:
def visualize_triplets(triplets):
    for anchor, positive, negative in triplets[:5]:
        print("Anchor:")
        imshow(anchor)
        print("Positive:")
        imshow(positive)
        print("Negative:")
        imshow(negative)
        


In [23]:
train_triplets = generate_triplets(train_x, train_y, num_triplets=1000)

In [24]:
def create_triplet_dataset(triplets, batch_size=32):
    # Separate the triplets into anchors, positives, and negatives
    anchors = [triplet[0] for triplet in triplets]
    positives = [triplet[1] for triplet in triplets]
    negatives = [triplet[2] for triplet in triplets]

    # Convert to TensorFlow tensors
    anchors = tf.convert_to_tensor(anchors, dtype=tf.float32) / 255.0  # Normalize images
    positives = tf.convert_to_tensor(positives, dtype=tf.float32) / 255.0
    negatives = tf.convert_to_tensor(negatives, dtype=tf.float32) / 255.0

    # Create a dataset from the triplets
    dataset = tf.data.Dataset.from_tensor_slices((anchors, positives, negatives))
    dataset = dataset.shuffle(buffer_size=len(triplets))  # Shuffle for randomness
    dataset = dataset.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

    return dataset

# Example usage
batch_size = 32
train_dataset = create_triplet_dataset(train_triplets, batch_size=batch_size)

2025-01-24 18:16:53.591035: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 786432000 exceeds 10% of free system memory.
2025-01-24 18:18:04.090668: W external/local_tsl/tsl/framework/bfc_allocator.cc:482] Allocator (GPU_0_bfc) ran out of memory trying to allocate 750.00MiB (rounded to 786432000)requested by op RealDiv
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2025-01-24 18:18:04.090903: I external/local_tsl/tsl/framework/bfc_allocator.cc:1039] BFCAllocator dump for GPU_0_bfc
2025-01-24 18:18:04.091267: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (256): 	Total Chunks: 21, Chunks in use: 21. 5.2KiB allocated for chunks. 5.2KiB in use in bin. 489B client-requested in use in bin.
2025-01-24 18:18:04.091367: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (512): 	Total C

ResourceExhaustedError: {{function_node __wrapped__RealDiv_device_/job:localhost/replica:0/task:0/device:GPU:0}} failed to allocate memory [Op:RealDiv] name: 

In [12]:
def build_embedding_network(input_shape=(256, 256, 3)):
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.GlobalAveragePooling2D(),
        layers.Dense(128, activation='relu'),  # Embedding dimension
        layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1))  # Normalize embeddings
    ])
    return model

embedding_model = build_embedding_network()
embedding_model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
def build_siamese_model(embedding_model, input_shape=(256, 256, 3)):
    # Inputs for anchor, positive, and negative images
    anchor_input = layers.Input(name="anchor", shape=input_shape)
    positive_input = layers.Input(name="positive", shape=input_shape)
    negative_input = layers.Input(name="negative", shape=input_shape)

    # Pass each input through the embedding network
    anchor_embedding = embedding_model(anchor_input)
    positive_embedding = embedding_model(positive_input)
    negative_embedding = embedding_model(negative_input)

    # Combine embeddings into a Siamese model
    siamese_model = models.Model(
        inputs=[anchor_input, positive_input, negative_input],
        outputs=[anchor_embedding, positive_embedding, negative_embedding]
    )

    return siamese_model

siamese_model = build_siamese_model(embedding_model)
siamese_model.summary()

For now lets use ready use resnet

In [None]:
def build_resnet_embedding_network(input_shape=(256, 256, 3), embedding_dim=128):
    # Load pre-trained ResNet50 without the top layer
    base_model = ResNet50(weights="imagenet", include_top=False, input_shape=input_shape)
    
    # Freeze the base model's weights (optional, for fine-tuning later)
    base_model.trainable = False
    
    # Add custom layers for embedding extraction
    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(embedding_dim, activation='relu'),  # Embedding dimension
        layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1))  # L2 normalize embeddings
    ])
    
    return model

# Create the embedding network
embedding_model = build_resnet_embedding_network()
embedding_model.summary()

In [None]:
def build_siamese_model_with_resnet(embedding_model, input_shape=(256, 256, 3)):
    # Inputs for anchor, positive, and negative images
    anchor_input = layers.Input(name="anchor", shape=input_shape)
    positive_input = layers.Input(name="positive", shape=input_shape)
    negative_input = layers.Input(name="negative", shape=input_shape)

    # Pass each input through the embedding network
    anchor_embedding = embedding_model(anchor_input)
    positive_embedding = embedding_model(positive_input)
    negative_embedding = embedding_model(negative_input)

    # Combine embeddings into a Siamese model
    siamese_model = models.Model(
        inputs=[anchor_input, positive_input, negative_input],
        outputs=[anchor_embedding, positive_embedding, negative_embedding]
    )

    return siamese_model

siamese_model = build_siamese_model_with_resnet(embedding_model)
siamese_model.summary()