In [126]:
# Import TensorFlow, Keras components, and other utilities.
# - tensorflow as tf: The core TensorFlow library.
# - tensorflow.keras : TensorFlow's high-level API for building and training models.
# - layers: Module containing standard neural network layers (Conv2D, Dense, etc.).
# - models: Module for creating models (Sequential, Functional API).
# - datasets: Module containing built-in datasets like CIFAR-100.
# - optimizers: Module containing optimization algorithms (Adam, SGD, etc.).

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, datasets, optimizers, losses
from tensorflow.keras.regularizers import l2
import numpy as np
import os

# Suppress TensorFlow informational messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0' # 0=all, 1=info, 2=warning, 3=error

print("Libraries imported successfully.")
print(f"TensorFlow Version: {tf.__version__}")
print(f"Keras Version: {keras.__version__}")

# Check for GPU availability
gpu_devices = tf.config.list_physical_devices('GPU')
if gpu_devices:
    print(f"GPU available: {gpu_devices}")
    # Optional: Configure GPU memory growth to avoid allocating all memory at once
    try:
        for gpu in gpu_devices:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth configured.")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)
else:
    print("GPU not available, using CPU.")

Libraries imported successfully.
TensorFlow Version: 2.16.2
Keras Version: 3.9.0
GPU available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
GPU memory growth configured.


In [118]:
# Configuration and Hyperparameters
# - BATCH_SIZE: Number of images processed in one training step.
# - LEARNING_RATE: Controls the step size during optimization.
# - NUM_EPOCHS: How many times the entire training dataset is passed through the model.
# - NUM_CLASSES: CIFAR-100 has 100 distinct image categories.
# - INPUT_SHAPE: The dimensions of each input image (Height, Width, Channels).
# - L2_LAMBDA

BATCH_SIZE = 64          # Number of images per batch
LEARNING_RATE = 0.001    # Learning rate for the optimizer
NUM_EPOCHS = 10          # Number of times to iterate over the entire dataset
NUM_CLASSES = 100        # CIFAR-100 has 100 classes
INPUT_SHAPE = (32, 32, 3) # CIFAR images are 32x32 pixels with 3 color channels (RGB)
L2_LAMBDA = 1e-4 # Define L2 regularization strength

print(f"Configuration:")
print(f"  Batch Size: {BATCH_SIZE}")
print(f"  Learning Rate: {LEARNING_RATE}")
print(f"  Number of Epochs: {NUM_EPOCHS}")
print(f"  Number of Classes: {NUM_CLASSES}")
print(f"  Input Shape: {INPUT_SHAPE}")

Configuration:
  Batch Size: 64
  Learning Rate: 0.001
  Number of Epochs: 10
  Number of Classes: 100
  Input Shape: (32, 32, 3)


In [120]:
# Load the dataset directly using `tf.keras.datasets.cifar100`.
# This function returns NumPy arrays for training and testing images and labels.
# - Images (`x_train`, `x_test`) are NumPy arrays of shape (num_samples, 32, 32, 3) with pixel values in [0, 255].
# - Labels (`y_train`, `y_test`) are NumPy arrays of shape (num_samples, 1) containing integer labels from 0 to 99.

print("Loading CIFAR-100 dataset...")
(x_train, y_train), (x_test, y_test) = datasets.cifar100.load_data()

print("Dataset loaded successfully.")
print(f"  x_train shape: {x_train.shape}") # (50000, 32, 32, 3)
print(f"  y_train shape: {y_train.shape}") # (50000, 1)
print(f"  x_test shape: {x_test.shape}")   # (10000, 32, 32, 3)
print(f"  y_test shape: {y_test.shape}")   # (10000, 1)
print(f"  Number of training samples: {x_train.shape[0]}")
print(f"  Number of test samples: {x_test.shape[0]}")
print(f"  Image data type: {x_train.dtype}") # uint8
print(f"  Label data type: {y_train.dtype}") # int64
print(f"  Min/Max pixel values: {x_train.min()}/{x_train.max()}") # 0/255

Loading CIFAR-100 dataset...
Dataset loaded successfully.
  x_train shape: (50000, 32, 32, 3)
  y_train shape: (50000, 1)
  x_test shape: (10000, 32, 32, 3)
  y_test shape: (10000, 1)
  Number of training samples: 50000
  Number of test samples: 10000
  Image data type: uint8
  Label data type: int64
  Min/Max pixel values: 0/255


In [122]:
# Prepare the data for training:
# - Convert Image Type: Change image data type from `uint8` to `float32` for calculations.
# - Normalize Pixels: Scale pixel values from the range [0, 255] to [0, 1]. This helps stabilize training. Alternatively, you could scale to [-1, 1] by dividing by 127.5 and subtracting 1.
# - Labels: The labels are already integers (0-99), which is the format expected by `SparseCategoricalCrossentropy` loss. No changes needed for `y_train`, `y_test`.

# Convert image data types to float32
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# Normalize pixel values to the range [0, 1]
x_train /= 255.0
x_test /= 255.0

print(f"  x_train data type after conversion: {x_train.dtype}") # float32
print(f"  Min/Max pixel values after normalization: {x_train.min():.1f}/{x_train.max():.1f}") # 0.0/1.0

# Note: Labels y_train and y_test remain as integer arrays of shape (N, 1)
print(f"  y_train shape remains: {y_train.shape}")

  x_train data type after conversion: float32
  Min/Max pixel values after normalization: 0.0/1.0
  y_train shape remains: (50000, 1)


In [128]:
# Building the CNN using the `keras.Sequential` model, stacking layers linearly.
# - input_shape: Specified in the first layer.
# - layers.Conv2D: 2D convolution layer.
# - layers.MaxPooling2D: Max pooling layer.
# - layers.Flatten: Converts 3D features to 1D vector.
# - layers.Dense: Fully connected layer.
# - layers.Dropout: Applies dropout regularization.
# - Final Dense Layer: Has `NUM_CLASSES` units and activation='softmax'. This makes the model output probabilities for each class.

print(f"Defining the Keras Sequential model (with Batch Norm, L2, Softmax output)...")

model = models.Sequential([
    # Input Layer shape is defined in the first Conv2D layer
    # Block 1
    layers.Conv2D(32, kernel_size=(3, 3), padding='same',
                  kernel_regularizer=l2(L2_LAMBDA), input_shape=INPUT_SHAPE), # Removed activation, added regularizer
    layers.BatchNormalization(), # Added Batch Norm
    layers.Activation('relu'),   # Added separate Activation
    layers.MaxPooling2D(pool_size=(2, 2)),
    # Output shape: (None, 16, 16, 32)

    # Block 2
    layers.Conv2D(64, kernel_size=(3, 3), padding='same',
                  kernel_regularizer=l2(L2_LAMBDA)), # Removed activation, added regularizer
    layers.BatchNormalization(), # Added Batch Norm
    layers.Activation('relu'),   # Added separate Activation
    layers.MaxPooling2D(pool_size=(2, 2)),
    # Output shape: (None, 8, 8, 64)

    # Block 3
    layers.Conv2D(128, kernel_size=(3, 3), padding='same',
                  kernel_regularizer=l2(L2_LAMBDA)), # Removed activation, added regularizer
    layers.BatchNormalization(), # Added Batch Norm
    layers.Activation('relu'),   # Added separate Activation
    layers.MaxPooling2D(pool_size=(2, 2)),
    # Output shape: (None, 4, 4, 128)

    # Classifier Head
    layers.Flatten(),
    # Output shape: (None, 4*4*128 = 2048)
    layers.Dense(512, kernel_regularizer=l2(L2_LAMBDA)), # Removed activation, added regularizer
    layers.BatchNormalization(), # Added Batch Norm
    layers.Activation('relu'),   # Added separate Activation
    layers.Dropout(0.5), # Dropout for regularization (keep after activation)
    layers.Dense(NUM_CLASSES, activation='softmax') # Output layer with Softmax activation!
    # Output shape: (None, NUM_CLASSES) - Values are now probabilities
])

print("Model defined successfully with Batch Norm, L2, and Softmax output.")

# Print a summary of the model's layers and parameters
model.summary()

Defining the Keras Sequential model (with Batch Norm, L2, Softmax output)...
Model defined successfully with Batch Norm, L2, and Softmax output.


In [130]:
# Configuring the model for training.
# - optimizer: Adam optimizer.
# - loss: The loss function.
# - SparseCategoricalCrossentropy: Used for multi-class classification with integer labels (0-99).
# - metrics: [accuracy] to monitor classification accuracy.

# Define the optimizer
optimizer = optimizers.Adam(learning_rate=LEARNING_RATE)

# Define the loss function suitable for probability outputs from Softmax
# Use SparseCategoricalCrossentropy because labels are integers (0-99)
loss_fn = losses.SparseCategoricalCrossentropy()

# Compile the model
model.compile(
    optimizer=optimizer,
    loss=loss_fn,
    metrics=['accuracy']
)

print("Model compiled successfully")

Model compiled successfully


In [132]:
# Training the model using the `model.fit()` method.

print(f"\nStarting training for {NUM_EPOCHS} epochs...")

model.fit(
    train_dataset,
    epochs=NUM_EPOCHS,
    validation_data=test_dataset # Evaluate on test set after each epoch
)
print("Training finished!")


Starting training for 10 epochs...
Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 44ms/step - accuracy: 0.1387 - loss: 3.9824 - val_accuracy: 0.2715 - val_loss: 3.1300
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 41ms/step - accuracy: 0.3345 - loss: 2.7775 - val_accuracy: 0.3196 - val_loss: 2.8908
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 41ms/step - accuracy: 0.4109 - loss: 2.4431 - val_accuracy: 0.4180 - val_loss: 2.4425
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 43ms/step - accuracy: 0.4647 - loss: 2.2210 - val_accuracy: 0.3429 - val_loss: 2.9127
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 43ms/step - accuracy: 0.5064 - loss: 2.0592 - val_accuracy: 0.3872 - val_loss: 2.6904
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 43ms/step - accuracy: 0.5451 - loss: 1.9518 - val_accuracy: 0.4354

In [134]:
# Evaluating the trained model's performance on the test dataset using `model.evaluate()`.
# - Pass the test data (`test_dataset`).
# - It returns the final loss and metric values (e.g., accuracy) calculated on the test set.

# %%
print("\nEvaluating the model on the test dataset...")

# Evaluate the model
loss, accuracy = model.evaluate(
    test_dataset, 
    verbose=1
)

print(f"\nTest Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy*100:.2f}%")


Evaluating the model on the test dataset...
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.4525 - loss: 2.6939  

Test Loss: 2.6729
Test Accuracy: 44.96%
