In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models, initializers, callbacks
from tensorflow.keras.callbacks import ModelCheckpoint

2025-04-01 17:20:18.336027: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-01 17:20:18.345491: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743508218.358547   29601 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743508218.362541   29601 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-01 17:20:18.375482: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
# Load Dataset
def preprocess_image(image, label):
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = image / 255.0  # Normalize
    return image, label

In [3]:
# Build Custom VGG16 Model
def build_vgg16(num_classes):
    model = models.Sequential([
        # Block 1
        layers.Conv2D(64, (3, 3), activation='relu', padding='same',
                      kernel_initializer=initializers.GlorotUniform(), input_shape=(IMG_SIZE, IMG_SIZE, 3)),
        layers.BatchNormalization(),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),

        # Block 2
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),

        # Block 3
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),

        # Block 4
        layers.Conv2D(512, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(512, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(512, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),

        # Block 5
        layers.Conv2D(512, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(512, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(512, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),

        # Fully Connected Layers
        layers.Flatten(),
        layers.Dense(4096, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(4096, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    return model

In [4]:
# Hyperparameters
IMG_SIZE = 64  # Tiny ImageNet image size
BATCH_SIZE = 64
EPOCHS = 100  # Increased to 100
NUM_CLASSES = 200  
LEARNING_RATE = 1e-4  # Adjusted for better training stability

In [5]:
train_dataset = tf.keras.utils.image_dataset_from_directory(
    "tiny-imagenet-200/train",
    label_mode="int",
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE
).map(preprocess_image).shuffle(1000)

Found 100000 files belonging to 200 classes.


I0000 00:00:1743508224.128340   29601 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4143 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


In [6]:
val_dataset = tf.keras.utils.image_dataset_from_directory(
    "tiny-imagenet-200/val",  # Structured validation folder
    label_mode="int",
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE
).map(preprocess_image)

Found 10000 files belonging to 1 classes.


In [8]:
# Compile Model
model = build_vgg16(NUM_CLASSES)

# compile with adam
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

# compile with sgd
#lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
#    initial_learning_rate=0.01, decay_steps=10000, decay_rate=0.9
#)
#optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9)
#model.compile(optimizer=tf.keras.optimizers.SGD(momentum=0.9, learning_rate=0.01),
#            loss="sparse_categorical_crossentropy",
#              metrics=["accuracy"])

In [9]:
# Learning Rate Scheduler
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-6)

In [10]:
# Early Stopping
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [11]:
# Define the callback to save the model after each epoch
checkpoint_callback = ModelCheckpoint(
    "cnn_model_{epoch:02d}.keras",  # Save model with epoch number in filename and .keras extension
    save_best_only=True,  # Set to True to save only the best model (based on validation loss/accuracy)
    save_freq='epoch',  # Save after every epoch
    verbose=1  # Print a message when the model is saved
)

In [None]:
# Train Model
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=EPOCHS,
    verbose=1,
    batch_size=BATCH_SIZE,
    callbacks=[early_stopping, checkpoint_callback]  # Include the checkpoint callback
)

Epoch 1/100


I0000 00:00:1743508822.051350   29709 service.cc:148] XLA service 0x7a8cb8002540 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1743508822.051371   29709 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 4050 Laptop GPU, Compute Capability 8.9
2025-04-01 17:30:22.158542: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1743508822.796941   29709 cuda_dnn.cc:529] Loaded cuDNN version 90300







[1m   1/1563[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m7:25:08[0m 17s/step - accuracy: 0.0000e+00 - loss: 6.9562

I0000 00:00:1743508833.854821   29709 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1083/1563[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m49s[0m 103ms/step - accuracy: 0.0245 - loss: 5.2886




[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step - accuracy: 0.0315 - loss: 5.1552










Epoch 1: val_loss improved from inf to 6.72190, saving model to cnn_model_01.keras
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 117ms/step - accuracy: 0.0315 - loss: 5.1549 - val_accuracy: 0.0017 - val_loss: 6.7219
Epoch 2/100
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - accuracy: 0.1176 - loss: 4.1238
Epoch 2: val_loss did not improve from 6.72190
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 109ms/step - accuracy: 0.1176 - loss: 4.1237 - val_accuracy: 0.0111 - val_loss: 7.1339
Epoch 3/100
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step - accuracy: 0.1893 - loss: 3.6338
Epoch 3: val_loss did not improve from 6.72190
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 110ms/step - accuracy: 0.1893 - loss: 3.6338 - val_accuracy: 0.0066 - val_loss: 7.8415
Epoch 4/100
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step - accuracy

In [None]:
# Save Final Model
model.save("cnn_model.h5")
print("✅ Final model saved as 'cnn_model.h5'")