In [17]:
import os
import cv2
import random
import numpy as np
import glob

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from PIL import Image as PILImage
from PIL.ExifTags import TAGS

from IPython.display import display

import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from keras_resnet.models import ResNet18 
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Input
from tensorflow.keras import regularizers

from keras.layers import Layer

In [18]:
BATCH_SIZE = 32
IMAGE_SIZE = (256, 256)

In [19]:
def imshow(a, size=1.0):
    # Clip and convert the image to uint8
    a = a.clip(0, 255).astype("uint8")
    
    # Resize the image if a size factor is provided
    if size != 1.0:
        new_dim = (int(a.shape[1] * size), int(a.shape[0] * size))
        a = cv2.resize(a, new_dim, interpolation=cv2.INTER_AREA)
    
    
    # Display the image
    display(PILImage.fromarray(a))

In [20]:
def get_label(file_path):
    label = file_path.split("/")[-2]
    label = label.split(".")[-2]
    label = int(label)
    return label

In [21]:
def read_image(file_path):
    img = cv2.imread(file_path)
    img = cv2.resize(img, IMAGE_SIZE)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

In [22]:
class DataGenerator(Sequence):
    def __init__(self, img_files, labels, batch_size, image_size, augment = False, **kwargs):
        super().__init__(**kwargs)
        self.img_files = img_files
        self.labels = labels
        self.batch_size = batch_size
        self.image_size = image_size
        self.augment = augment
      
    def __len__(self):
        return len(self.img_files) // self.batch_size

    def __getitem__(self, index):
        start = index * self.batch_size
        end = start + self.batch_size
        batch_images = self.img_files[start:end]
        batch_labels = self.labels[start:end]
        
        imgs, labels = [], []
        
        for i in range(len(batch_images)):
            img = read_image(batch_images[i])
            
            label = batch_labels[i]
            
            if self.augment:
                img_aug = self.default_augmentations(img)
                img_aug = img_aug / 255.0
                
                imgs.append(img_aug)
                labels.append(label)
            
            img = img / 255.0
            
            imgs.append(img)
            labels.append(label)
        
        imgs = np.array(imgs)
        labels = np.array(labels)
     
        return imgs, tf.keras.utils.to_categorical(labels, num_classes=10)

    def default_augmentations(self, img):
        # Define small augmentations manually and apply them directly to the image
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        img = tf.image.random_brightness(img, max_delta=0.1)  # Adjust brightness
        img = tf.image.random_contrast(img, lower=0.9, upper=1.1)  # Adjust contrast
        img = tf.image.random_saturation(img, lower=0.9, upper=1.1)  # Adjust saturation
        img = tf.image.rot90(img, k=np.random.randint(0, 4))  # Random 90° rotations
        
        img = tf.cast(img, tf.float32)
        img = img / 255.0
    
        return img

In [23]:
data_folder = "Data/00*"

image_files = glob.glob(os.path.join(data_folder, "*.jpg"), recursive=True)

labels = [get_label(file_path) for file_path in image_files]

In [24]:
train_x, val_x, train_y, val_y = train_test_split(image_files, labels, test_size=0.2, random_state=42)

val_x, test_x, val_y, test_y = train_test_split(val_x, val_y, test_size=0.5, random_state=42)

train_gen = DataGenerator(train_x, train_y, BATCH_SIZE, IMAGE_SIZE, augment=True)
val_gen = DataGenerator(val_x, val_y, BATCH_SIZE * 2, IMAGE_SIZE)
test_gen = DataGenerator(test_x, test_y, BATCH_SIZE * 2, IMAGE_SIZE)

In [25]:
class L2Normalization(Layer):
    def call(self, inputs):
        return tf.math.l2_normalize(inputs, axis=1)

In [26]:
def build_embedding_network(input_shape=(256, 256, 3), embedding_dim=512):
    inputs = layers.Input(shape=input_shape)

    # Feature extraction block
    x = layers.Conv2D(32, (7, 7), strides=2, padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((3, 3), strides=2, padding='same')(x)

    # Second block
    x = layers.Conv2D(64, (5, 5), strides=2, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((3, 3), strides=2, padding='same')(x)

    # Third block
    x = layers.Conv2D(128, (3, 3), strides=1, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(128, (3, 3), strides=1, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2), strides=2, padding='same')(x)
    
    # Feature extraction block 4 (Additional deeper block)
    x = layers.Conv2D(256, (3, 3), strides=1, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(256, (3, 3), strides=1, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2), strides=2, padding='same')(x)
    
    # Bottleneck and global pooling
    x = layers.Conv2D(256, (1, 1), activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.GlobalAveragePooling2D()(x)

    # Dense layers for embedding
    x = layers.Dense(embedding_dim, activation='relu')(x)
    x = L2Normalization()(x)

    # Add dropout to prevent overfitting
    x = layers.Dropout(0.5)(x)

    # Create the model
    model = models.Model(inputs, x, name="EmbeddingNetwork")
    return model

# Build the model
embedding_model = build_embedding_network()
embedding_model.summary()

In [27]:
def build_label_predictor(embedding_model, num_classes, input_shape=(256,256,3)):
    embedding_model = embedding_model
    
    inputs = Input(shape=input_shape)
    embedding = embedding_model(inputs)
    outputs = layers.Dense(num_classes, activation='softmax')(embedding)
    
    model = models.Model(inputs, outputs, name="LabelPredictor")
    return model

predictor_model = build_label_predictor(embedding_model, 10)
predictor_model.summary()

In [28]:
predictor_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy',tf.keras.metrics.AUC(), tf.keras.metrics.TopKCategoricalAccuracy(k=3) ])

early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)

predictor_model.fit(train_gen, validation_data=val_gen, epochs=50, callbacks=[early])

Epoch 1/50







[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 1s/step - accuracy: 0.2033 - auc_1: 0.6301 - loss: 4.4137 - top_k_categorical_accuracy: 0.4397 - val_accuracy: 0.2344 - val_auc_1: 0.5648 - val_loss: 3.4793 - val_top_k_categorical_accuracy: 0.3906
Epoch 2/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 980ms/step - accuracy: 0.3249 - auc_1: 0.7416 - loss: 3.0618 - top_k_categorical_accuracy: 0.6034 - val_accuracy: 0.1875 - val_auc_1: 0.5728 - val_loss: 2.7714 - val_top_k_categorical_accuracy: 0.3750
Epoch 3/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 966ms/step - accuracy: 0.3293 - auc_1: 0.7506 - loss: 2.4350 - top_k_categorical_accuracy: 0.6048 - val_accuracy: 0.1562 - val_auc_1: 0.5783 - val_loss: 2.4756 - val_top_k_categorical_accuracy: 0.3438
Epoch 4/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 924ms/step - accuracy: 0.3812 - auc_1: 0.7877 - loss: 2.1008 - top_k_categorical_accuracy: 0.6496 - val_acc

<keras.src.callbacks.history.History at 0x7fd758619f00>

In [31]:
test_loss, test_acc, _,_ = predictor_model.evaluate(test_gen)

print("Test accuracy: {:.2f}%".format(test_acc * 100))
print("Test loss: {:.2f}".format(test_loss))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 837ms/step - accuracy: 0.2812 - auc_1: 0.7249 - loss: 2.0665 - top_k_categorical_accuracy: 0.5781
Test accuracy: 28.12%
Test loss: 2.07


In [None]:
embedding_model.save("Models/embedding_extractor_custom.keras")

: 