In [1]:
import os
import cv2
import random
import numpy as np
import glob

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from PIL import Image as PILImage
from PIL.ExifTags import TAGS

from IPython.display import display

import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from keras_resnet.models import ResNet18 
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Input
from tensorflow.keras import regularizers

from keras.layers import Layer

2025-01-27 14:15:10.881915: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-27 14:15:11.001124: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-27 14:15:11.080723: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-27 14:15:11.485019: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
BATCH_SIZE = 32
IMAGE_SIZE = (256, 256)

In [3]:
def imshow(a, size=1.0):
    # Clip and convert the image to uint8
    a = a.clip(0, 255).astype("uint8")
    
    # Resize the image if a size factor is provided
    if size != 1.0:
        new_dim = (int(a.shape[1] * size), int(a.shape[0] * size))
        a = cv2.resize(a, new_dim, interpolation=cv2.INTER_AREA)
    
    
    # Display the image
    display(PILImage.fromarray(a))

In [4]:
def get_label(file_path):
    label = file_path.split("/")[-2]
    label = label.split(".")[-2]
    label = int(label)
    return label

In [5]:
def read_image(file_path):
    img = cv2.imread(file_path)
    img = cv2.resize(img, IMAGE_SIZE)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

In [6]:
class DataGenerator(Sequence):
    def __init__(self, img_files, labels, batch_size, image_size, augment = False, **kwargs):
        super().__init__(**kwargs)
        self.img_files = img_files
        self.labels = labels
        self.batch_size = batch_size
        self.image_size = image_size
        self.augment = augment
      
    def __len__(self):
        return len(self.img_files) // self.batch_size

    def __getitem__(self, index):
        start = index * self.batch_size
        end = start + self.batch_size
        batch_images = self.img_files[start:end]
        batch_labels = self.labels[start:end]
        
        imgs, labels = [], []
        
        for i in range(len(batch_images)):
            img = read_image(batch_images[i])
            
            label = batch_labels[i]
            
            if self.augment:
                img_aug = self.default_augmentations(img)
                img_aug = img_aug / 255.0
                
                imgs.append(img_aug)
                labels.append(label)
            
            img = img / 255.0
            
            imgs.append(img)
            labels.append(label)
        
        imgs = np.array(imgs)
        labels = np.array(labels)
     
        return imgs, tf.keras.utils.to_categorical(labels, num_classes=264)

    def default_augmentations(self, img):
        # Define small augmentations manually and apply them directly to the image
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        img = tf.image.random_brightness(img, max_delta=0.1)  # Adjust brightness
        img = tf.image.random_contrast(img, lower=0.9, upper=1.1)  # Adjust contrast
        img = tf.image.random_saturation(img, lower=0.9, upper=1.1)  # Adjust saturation
        img = tf.image.rot90(img, k=np.random.randint(0, 4))  # Random 90° rotations
        
        img = tf.cast(img, tf.float32)
        img = img / 255.0
    
        return img

In [7]:
data_folder = "Data/*"

image_files = glob.glob(os.path.join(data_folder, "*.jpg"), recursive=True)

labels = [get_label(file_path) for file_path in image_files]

print("Number of images found: ", len(image_files))
print("Number of labels found: ", len(labels))
print("Number of unique labels: ", len(set(labels)))

Number of images found:  31128
Number of labels found:  31128
Number of unique labels:  263


In [8]:
train_x, val_x, train_y, val_y = train_test_split(image_files, labels, test_size=0.2, random_state=42, stratify=labels)

val_x, test_x, val_y, test_y = train_test_split(val_x, val_y, test_size=0.5, random_state=42, stratify=val_y)

train_gen = DataGenerator(train_x, train_y, BATCH_SIZE, IMAGE_SIZE, augment=True)
val_gen = DataGenerator(val_x, val_y, BATCH_SIZE * 2, IMAGE_SIZE)
test_gen = DataGenerator(test_x, test_y, BATCH_SIZE * 2, IMAGE_SIZE)

In [9]:
class L2Normalization(Layer):
    def call(self, inputs):
        return tf.math.l2_normalize(inputs, axis=1)

In [10]:
def build_embedding_network(input_shape=(256, 256, 3), embedding_dim=512):
    inputs = layers.Input(shape=input_shape)

    # Feature extraction block
    x = layers.Conv2D(32, (7, 7), strides=2, padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((3, 3), strides=2, padding='same')(x)

    # Second block
    x = layers.Conv2D(64, (5, 5), strides=2, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((3, 3), strides=2, padding='same')(x)

    # Third block
    x = layers.Conv2D(128, (3, 3), strides=1, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(128, (3, 3), strides=1, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2), strides=2, padding='same')(x)
    
    # Feature extraction block 4 (Additional deeper block)
    x = layers.Conv2D(256, (3, 3), strides=1, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(256, (3, 3), strides=1, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2), strides=2, padding='same')(x)
    
    # Bottleneck and global pooling
    x = layers.Conv2D(256, (1, 1), activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.GlobalAveragePooling2D()(x)

    # Dense layers for embedding
    x = layers.Dense(embedding_dim, activation='relu')(x)
    x = L2Normalization()(x)

    # Add dropout to prevent overfitting
    x = layers.Dropout(0.5)(x)

    # Create the model
    model = models.Model(inputs, x, name="EmbeddingNetwork")
    return model

# Build the model
embedding_model = build_embedding_network()
embedding_model.summary()

I0000 00:00:1737983716.148627  392115 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1737983716.724049  392115 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1737983716.724225  392115 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1737983716.740347  392115 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1737983716.740607  392115 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:0

In [11]:
def build_label_predictor(embedding_model, num_classes, input_shape=(256,256,3)):
    embedding_model = embedding_model
    
    inputs = Input(shape=input_shape)
    embedding = embedding_model(inputs)
    outputs = layers.Dense(num_classes, activation='softmax')(embedding)
    
    model = models.Model(inputs, outputs, name="LabelPredictor")
    return model

predictor_model = build_label_predictor(embedding_model, 264)
predictor_model.summary()

In [12]:
predictor_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy',tf.keras.metrics.AUC(), tf.keras.metrics.TopKCategoricalAccuracy(k=3) ])

early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)

predictor_model.fit(train_gen, validation_data=val_gen, epochs=50, callbacks=[early])

Epoch 1/50


I0000 00:00:1737983729.279227  392446 service.cc:146] XLA service 0x7efbbc001d30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1737983729.279569  392446 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6
2025-01-27 14:15:29.758146: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-01-27 14:15:30.910957: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907





I0000 00:00:1737983751.904454  392446 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m778/778[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m935s[0m 1s/step - accuracy: 0.0381 - auc: 0.5689 - loss: 5.8639 - top_k_categorical_accuracy: 0.0744 - val_accuracy: 0.0273 - val_auc: 0.5939 - val_loss: 5.4651 - val_top_k_categorical_accuracy: 0.0674
Epoch 2/50
[1m778/778[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m734s[0m 943ms/step - accuracy: 0.0592 - auc: 0.6617 - loss: 5.1835 - top_k_categorical_accuracy: 0.1064 - val_accuracy: 0.0602 - val_auc: 0.6801 - val_loss: 5.1395 - val_top_k_categorical_accuracy: 0.1074
Epoch 3/50
[1m778/778[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m640s[0m 822ms/step - accuracy: 0.0641 - auc: 0.6965 - loss: 5.0643 - top_k_categorical_accuracy: 0.1174 - val_accuracy: 0.0677 - val_auc: 0.7042 - val_loss: 5.0102 - val_top_k_categorical_accuracy: 0.1185
Epoch 4/50
[1m778/778[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m644s[0m 828ms/step - accuracy: 0.0736 - auc: 0.7228 - loss: 4.9432 - top_k_categorical_accuracy: 0.1321 - val_accur

<keras.src.callbacks.history.History at 0x7efc956a1b10>

In [15]:
test_loss, test_acc, _,_ = predictor_model.evaluate(test_gen)

print("Test accuracy: {:.2f}%".format(test_acc * 100))
print("Test loss: {:.2f}".format(test_loss))

[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 569ms/step - accuracy: 0.2355 - auc: 0.8693 - loss: 3.6920 - top_k_categorical_accuracy: 0.3733
Test accuracy: 25.78%
Test loss: 3.64


In [16]:
embedding_model.save("Models/embedding_extractor_custom_big.keras")