## Learning Classic ConvNet architectures (and associated layers) as per DLS course of Andrew Ng

In [2]:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers, models

In [3]:
x = np.random.randn(10, 10)

# Add batch dimension
x_expanded = np.expand_dims(x, axis=0)

# Define Flatten layer
flatten_layer = layers.Flatten()

# Apply flattening
flattened = flatten_layer(x_expanded)

# Print results
print("Original shape:", x.shape)
print("After expanding:", x_expanded.shape)
print("Flattened shape:", flattened.shape)
# print("Flattened output:", flattened.numpy())

Original shape: (10, 10)
After expanding: (1, 10, 10)
Flattened shape: (1, 100)


### Le-net 5

In [None]:
# Define layers of LeNet-5 as a list
lenet_5_layers = [
    layers.Input(shape=(32, 32, 1)),
    layers.Conv2D(6, kernel_size=5, activation='tanh'),
    layers.AveragePooling2D(pool_size=2, strides=2),
    layers.Conv2D(16, kernel_size=5, activation='tanh'),
    layers.AveragePooling2D(pool_size=2, strides=2),
    layers.Conv2D(120, kernel_size=5, activation='tanh'),
    layers.Flatten(),
    layers.Dense(84, activation='tanh'),
    layers.Dense(10, activation='softmax')
]

model = models.Sequential(lenet_5_layers,name="LeNet-5")
model.summary()

Step through the individual layers to understand better

In [None]:
# Input batch (single 32x32 grayscale image) - note the 1 added in first dimension which is the batch dimension
input_batch = np.random.rand(1, 32, 32, 1).astype(np.float32)

# Manual forward pass
layer_outputs = [None] * len(lenet_5_layers)
layer_outputs[0] = input_batch

print(f"Layer 0: Input Layer | Output Shape: {layer_outputs[0].shape} | Nodes: -")

for l in range(1, len(lenet_5_layers)):
    layer_outputs[l] = lenet_5_layers[l](layer_outputs[l - 1])
    
    # Determine "nodes"
    layer = lenet_5_layers[l]
    shape = layer_outputs[l].shape
    
    if isinstance(layer, layers.Conv2D):
        nodes = layer.filters
    elif isinstance(layer, layers.Dense):
        nodes = layer.units
    elif isinstance(layer, layers.Flatten):
        nodes = shape[-1]
    elif isinstance(layer, layers.AveragePooling2D):
        shape = layer_outputs[l].shape
        nodes = shape[1] * shape[2] * shape[3]
    else:
        nodes = "-"

    print(f"Layer {l}: {layer.__class__.__name__:<20} | Output Shape: {shape} | Nodes: {nodes}")

### AlexNet

In [None]:
# Define AlexNet-style architecture
alexnet_layers = [
    layers.Input(shape=(227, 227, 1)),                           # Layer 0
    layers.Conv2D(96, kernel_size=11, strides=4, activation='relu'),   # Layer 1
    layers.MaxPooling2D(pool_size=3, strides=2),                        # Layer 2
    layers.Conv2D(256, kernel_size=5, padding='same', activation='relu'),  # Layer 3
    layers.MaxPooling2D(pool_size=3, strides=2),                        # Layer 4
    layers.Conv2D(384, kernel_size=3, padding='same', activation='relu'),  # Layer 5
    layers.Conv2D(384, kernel_size=3, padding='same', activation='relu'),  # Layer 6
    layers.Conv2D(256, kernel_size=3, padding='same', activation='relu'),  # Layer 7
    layers.MaxPooling2D(pool_size=3, strides=2),                        # Layer 8
    layers.Flatten(),                                                  # Layer 9
    layers.Dense(4096, activation='relu'),                             # Layer 10
    layers.Dense(4096, activation='relu'),                             # Layer 11
    layers.Dense(1000, activation='softmax')                           # Layer 12
]
model = models.Sequential(alexnet_layers,name="AlexNet")
model.summary()

In [None]:
# Simulated grayscale image (1 sample)
input_batch = np.random.rand(1, 227, 227, 1).astype(np.float16)

# Manual forward pass with output shape and node count
layer_outputs = [None] * len(alexnet_layers)
layer_outputs[0] = input_batch

print(f"Layer 0: Input Layer          | Output Shape: {layer_outputs[0].shape} | Nodes: -")

for l in range(1, len(alexnet_layers)):
    layer_outputs[l] = alexnet_layers[l](layer_outputs[l - 1])
    shape = layer_outputs[l].shape
    layer = alexnet_layers[l]

    if isinstance(layer, layers.Conv2D):
        nodes = shape[1] * shape[2] * shape[3]
    elif isinstance(layer, layers.Dense):
        nodes = layer.units
    elif isinstance(layer, layers.Flatten):
        nodes = shape[-1]
    elif isinstance(layer, layers.MaxPooling2D):
        nodes = shape[1] * shape[2] * shape[3]
    else:
        nodes = "-"

    print(f"Layer {l:2}: {layer.__class__.__name__:<20} | Output Shape: {shape} | Nodes: {nodes}")


### VGG-16

In [4]:
# VGG-16 layer sequence (simplified for grayscale: input shape (224, 224, 1))
vgg16_layers = [
    layers.Input(shape=(224, 224, 1)),                             # Layer 0
    layers.Conv2D(64, kernel_size=3, padding='same', activation='relu'),  # Layer 1
    layers.Conv2D(64, kernel_size=3, padding='same', activation='relu'),  # Layer 2
    layers.MaxPooling2D(pool_size=2, strides=2),                   # Layer 3

    layers.Conv2D(128, kernel_size=3, padding='same', activation='relu'), # Layer 4
    layers.Conv2D(128, kernel_size=3, padding='same', activation='relu'), # Layer 5
    layers.MaxPooling2D(pool_size=2, strides=2),                   # Layer 6

    layers.Conv2D(256, kernel_size=3, padding='same', activation='relu'), # Layer 7
    layers.Conv2D(256, kernel_size=3, padding='same', activation='relu'), # Layer 8
    layers.Conv2D(256, kernel_size=3, padding='same', activation='relu'), # Layer 9
    layers.MaxPooling2D(pool_size=2, strides=2),                   # Layer 10

    layers.Conv2D(512, kernel_size=3, padding='same', activation='relu'), # Layer 11
    layers.Conv2D(512, kernel_size=3, padding='same', activation='relu'), # Layer 12
    layers.Conv2D(512, kernel_size=3, padding='same', activation='relu'), # Layer 13
    layers.MaxPooling2D(pool_size=2, strides=2),                   # Layer 14

    layers.Conv2D(512, kernel_size=3, padding='same', activation='relu'), # Layer 15
    layers.Conv2D(512, kernel_size=3, padding='same', activation='relu'), # Layer 16
    layers.Conv2D(512, kernel_size=3, padding='same', activation='relu'), # Layer 17
    layers.MaxPooling2D(pool_size=2, strides=2),                   # Layer 18

    layers.Flatten(),                                              # Layer 19
    layers.Dense(4096, activation='relu'),                         # Layer 20
    layers.Dense(4096, activation='relu'),                         # Layer 21
    layers.Dense(1000, activation='softmax')                       # Layer 22
]
model = models.Sequential(vgg16_layers,name="VGG-16")
model.summary()

2025-03-27 20:05:12.509616: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 411041792 exceeds 10% of free system memory.
2025-03-27 20:05:12.943534: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 411041792 exceeds 10% of free system memory.
2025-03-27 20:05:13.045083: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 411041792 exceeds 10% of free system memory.


In [5]:
# Simulated grayscale input image
input_batch = np.random.rand(1, 224, 224, 1).astype(np.float16)

# Manual forward pass
layer_outputs = [None] * len(vgg16_layers)
layer_outputs[0] = input_batch

print(f"Layer  0: Input Layer           | Output Shape: {layer_outputs[0].shape} | Nodes: -")

for l in range(1, len(vgg16_layers)):
    layer_outputs[l] = vgg16_layers[l](layer_outputs[l - 1])
    shape = layer_outputs[l].shape
    layer = vgg16_layers[l]

    if isinstance(layer, layers.Conv2D) or isinstance(layer, layers.MaxPooling2D):
        nodes = shape[1] * shape[2] * shape[3]
    elif isinstance(layer, layers.Flatten):
        nodes = shape[-1]
    elif isinstance(layer, layers.Dense):
        nodes = layer.units
    else:
        nodes = "-"

    print(f"Layer {l:2}: {layer.__class__.__name__:<20} | Output Shape: {shape} | Nodes: {nodes}")

Layer  0: Input Layer           | Output Shape: (1, 224, 224, 1) | Nodes: -
Layer  1: Conv2D               | Output Shape: (1, 224, 224, 64) | Nodes: 3211264
Layer  2: Conv2D               | Output Shape: (1, 224, 224, 64) | Nodes: 3211264
Layer  3: MaxPooling2D         | Output Shape: (1, 112, 112, 64) | Nodes: 802816
Layer  4: Conv2D               | Output Shape: (1, 112, 112, 128) | Nodes: 1605632
Layer  5: Conv2D               | Output Shape: (1, 112, 112, 128) | Nodes: 1605632
Layer  6: MaxPooling2D         | Output Shape: (1, 56, 56, 128) | Nodes: 401408
Layer  7: Conv2D               | Output Shape: (1, 56, 56, 256) | Nodes: 802816
Layer  8: Conv2D               | Output Shape: (1, 56, 56, 256) | Nodes: 802816
Layer  9: Conv2D               | Output Shape: (1, 56, 56, 256) | Nodes: 802816
Layer 10: MaxPooling2D         | Output Shape: (1, 28, 28, 256) | Nodes: 200704
Layer 11: Conv2D               | Output Shape: (1, 28, 28, 512) | Nodes: 401408
Layer 12: Conv2D               | 

### Tiny ResNet (ResNet-18 Style)

In [7]:
def simple_residual_block(x, filters):
    shortcut = x  # Save input for skip connection

    x = layers.Conv2D(filters, kernel_size=3, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    x = layers.Conv2D(filters, kernel_size=3, padding='same')(x)
    x = layers.BatchNormalization()(x)

    x = layers.Add()([x, shortcut])  # Add skip connection
    x = layers.ReLU()(x)
    return x


In [8]:
def build_mini_resnet(input_shape=(32, 32, 3), num_classes=10):
    inputs = layers.Input(shape=input_shape)

    x = layers.Conv2D(64, 3, padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    # Add two residual blocks
    x = simple_residual_block(x, 64)
    x = simple_residual_block(x, 64)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(num_classes, activation='softmax')(x)

    return tf.keras.Model(inputs, x, name="MiniResNet")

model = build_mini_resnet()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()