# Lab 4.3: Introduction to Convolutional Neural Networks

## Duration: 45 minutes

## Learning Objectives
By the end of this lab, you will be able to:
- Understand why CNNs are perfect for image data
- Build your first CNN using TensorFlow/Keras
- Compare CNN performance with regular Dense networks
- Use Conv2D, MaxPooling, and Flatten layers
- Apply CNNs to real image classification problems

## Prerequisites
- **Lab 4.1 & 4.2 completed** (TensorFlow basics and deep networks)
- Understanding of neural networks
- Basic knowledge of images as data

## Key Concepts
- **Convolution**: Feature detection using filters/kernels
- **Pooling**: Dimensionality reduction and translation invariance
- **Feature Maps**: How CNNs detect patterns at different scales
- **Spatial Hierarchy**: Learning from simple edges to complex objects
- **Parameter Sharing**: Why CNNs need fewer parameters than Dense layers

## Setup and Introduction

Let's start by understanding why we need CNNs for image data:

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("Lab 4.3: Introduction to Convolutional Neural Networks")
print("=" * 60)
print(f"TensorFlow version: {tf.__version__}")

# Load the MNIST dataset
print("\nLoading MNIST dataset...")
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Normalize pixel values to [0, 1]
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

print(f"Training data shape: {X_train.shape}")
print(f"Training labels shape: {y_train.shape}")
print(f"Test data shape: {X_test.shape}")
print(f"Test labels shape: {y_test.shape}")
print(f"Number of classes: {len(np.unique(y_train))}")

# Visualize some sample images
print("\n📊 Sample MNIST Images:")
fig, axes = plt.subplots(2, 5, figsize=(12, 5))
for i, ax in enumerate(axes.flat):
    ax.imshow(X_train[i], cmap='gray')
    ax.set_title(f'Label: {y_train[i]}')
    ax.axis('off')
plt.tight_layout()
plt.show()

print("\n✅ Data loaded and ready for CNN experiments!")

In [None]:
print("The Dense Network Approach:")
print("=" * 40)

# For Dense layers, we need to flatten images
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

print(f"Original image shape: {X_train.shape[1:]}")
print(f"Flattened shape: {X_train_flat.shape[1:]}")
print(f"Total pixels per image: {X_train_flat.shape[1]}")

# Build a Dense network
dense_model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(784,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
], name='Dense_Network')

dense_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print(f"\nDense Model Summary:")
dense_model.summary()

print(f"\nTotal parameters in Dense model: {dense_model.count_params():,}")
print("\n❗ Problems with Dense approach:")
print("  • Loses spatial information (pixel relationships)")
print("  • Treats each pixel independently")
print("  • Lots of parameters (prone to overfitting)")
print("  • Not translation invariant")
print("  • Computationally expensive")

# Train briefly
print("\nTraining Dense network (this may take a moment...)")
dense_history = dense_model.fit(
    X_train_flat, y_train,
    validation_data=(X_test_flat, y_test),
    epochs=5,
    batch_size=128,
    verbose=1
)

dense_accuracy = dense_history.history['val_accuracy'][-1]
print(f"Dense network accuracy: {dense_accuracy:.4f}")

## Step 5: Building Different CNN Architectures\n\nLet's experiment with different CNN designs and see how they perform:"

In [None]:
# Function to visualize feature maps\ndef visualize_feature_maps(model, image, layer_name):\n    \"\"\"Visualize what a specific layer detects\"\"\"\n    \n    # Create a model that outputs the feature maps\n    layer_output = model.get_layer(layer_name).output\n    feature_model = keras.Model(inputs=model.input, outputs=layer_output)\n    \n    # Get feature maps for our image\n    image = image.reshape(1, 28, 28, 1)  # Add batch dimension\n    feature_maps = feature_model.predict(image, verbose=0)\n    \n    # Plot the feature maps\n    n_features = min(16, feature_maps.shape[-1])  # Show up to 16 features\n    fig, axes = plt.subplots(4, 4, figsize=(12, 10))\n    \n    for i in range(n_features):\n        ax = axes[i // 4, i % 4]\n        ax.imshow(feature_maps[0, :, :, i], cmap='viridis')\n        ax.set_title(f'Filter {i+1}')\n        ax.axis('off')\n    \n    # Remove unused subplots\n    for i in range(n_features, 16):\n        axes[i // 4, i % 4].remove()\n    \n    plt.suptitle(f'Feature Maps from {layer_name}', fontsize=16)\n    plt.tight_layout()\n    plt.show()\n    \n    return feature_maps\n\n# Visualize features for a sample image\nsample_image = X_train_cnn[0]  # First training image\nprint(f\"Analyzing image of digit: {y_train[0]}\")\n\n# Show original image\nplt.figure(figsize=(6, 4))\nplt.subplot(1, 2, 1)\nplt.imshow(sample_image.squeeze(), cmap='gray')\nplt.title('Original Image')\nplt.axis('off')\n\n# Show what the model predicts\nprediction = cnn_model.predict(sample_image.reshape(1, 28, 28, 1), verbose=0)\npredicted_class = np.argmax(prediction[0])\nconfidence = prediction[0][predicted_class]\n\nplt.subplot(1, 2, 2)\nplt.bar(range(10), prediction[0])\nplt.title(f'Predictions (Predicted: {predicted_class}, Confidence: {confidence:.2%})')\nplt.xlabel('Digit Class')\nplt.ylabel('Probability')\nplt.xticks(range(10))\nplt.tight_layout()\nplt.show()\n\nprint(\"\\n🔍 Understanding Feature Maps:\")\nprint(\"Feature maps show what each filter detects:\")\nprint(\"  • Bright areas = filter strongly activated\")\nprint(\"  • Dark areas = filter not activated\")\nprint(\"  • Different filters detect different patterns\")\n\n# Visualize first convolutional layer\nprint(\"\\nFirst Convolutional Layer - Basic Features (edges, corners):\")\nfeature_maps_1 = visualize_feature_maps(cnn_model, sample_image, 'conv2d')"

## Step 4: Visualizing CNN Features\n\nLet's see what the CNN actually learns to detect:"

In [None]:
print("Understanding CNN Layers:")
print("=" * 50)

# Let's build a simple CNN and examine each layer
simple_cnn = keras.Sequential([
    layers.Conv2D(8, (3, 3), activation='relu', input_shape=(28, 28, 1), name='conv1'),
    layers.MaxPooling2D((2, 2), name='pool1'),
    layers.Conv2D(16, (3, 3), activation='relu', name='conv2'),
    layers.MaxPooling2D((2, 2), name='pool2'),
    layers.Flatten(name='flatten'),
    layers.Dense(10, activation='softmax', name='classifier')
])

print("Layer-by-layer analysis:")
print("-" * 30)

# Trace through the network
input_shape = (28, 28, 1)
print(f"Input: {input_shape} → Raw 28x28 grayscale image")

for i, layer in enumerate(simple_cnn.layers):
    # Build the model up to this layer to see output shape
    temp_model = keras.Sequential(simple_cnn.layers[:i+1])
    temp_model.build(input_shape=(None,) + input_shape)
    output_shape = temp_model.output_shape[1:]  # Remove batch dimension
    
    if 'conv' in layer.name:
        filters = layer.filters
        kernel_size = layer.kernel_size
        print(f"{layer.name}: {output_shape} → {filters} filters of {kernel_size} detecting features")
    elif 'pool' in layer.name:
        pool_size = layer.pool_size
        print(f"{layer.name}: {output_shape} → Downsample by {pool_size}, keep strongest signals")
    elif 'flatten' in layer.name:
        print(f"{layer.name}: {output_shape} → Convert 2D feature maps to 1D vector")
    elif 'dense' in layer.name or 'classifier' in layer.name:
        units = layer.units
        print(f"{layer.name}: {output_shape} → Final classification into {units} classes")

print("\n🔍 Key Insights:")
print("  • Each Conv2D layer detects increasingly complex features")
print("  • MaxPooling reduces size while keeping important information")
print("  • Flatten converts spatial features to vector for classification")
print("  • Final Dense layer maps features to class probabilities")

## Step 3: Understanding CNN Components\n\nLet's dive deeper into what makes CNNs work so well:"

In [None]:
print("The CNN Approach:")
print("=" * 40)

# For CNNs, we keep the spatial structure
print(f"CNN input shape: {X_train.shape[1:]} (height, width, channels)")
print(f"We DON'T flatten the images!")

# Reshape for CNN (add channel dimension)
X_train_cnn = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test_cnn = X_test.reshape(X_test.shape[0], 28, 28, 1)

print(f"CNN data shape: {X_train_cnn.shape}")

# Build a CNN
cnn_model = keras.Sequential([
    # First convolutional block
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D((2, 2)),
    
    # Second convolutional block  
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    # Third convolutional block
    layers.Conv2D(64, (3, 3), activation='relu'),
    
    # Flatten and classify
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
], name='CNN_Network')

cnn_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print(f"\nCNN Model Summary:")
cnn_model.summary()

print(f"\nTotal parameters in CNN model: {cnn_model.count_params():,}")
print("\n✅ Advantages of CNN approach:")
print("  • Preserves spatial relationships")
print("  • Parameter sharing (same filter across image)")
print("  • Translation invariant")
print("  • Hierarchical feature learning")
print("  • Fewer parameters than Dense")

# Train the CNN
print("\nTraining CNN (this will take a moment...)")
cnn_history = cnn_model.fit(
    X_train_cnn, y_train,
    validation_data=(X_test_cnn, y_test),
    epochs=5,
    batch_size=128,
    verbose=1
)

cnn_accuracy = cnn_history.history['val_accuracy'][-1]
print(f"CNN accuracy: {cnn_accuracy:.4f}")

# Compare the results
print("\n" + "=" * 60)
print("COMPARISON RESULTS:")
print("=" * 60)
print(f"Dense Network Accuracy: {dense_accuracy:.4f}")
print(f"CNN Network Accuracy:   {cnn_accuracy:.4f}")
print(f"Improvement: {((cnn_accuracy - dense_accuracy) * 100):.2f} percentage points")
print(f"Parameters - Dense: {dense_model.count_params():,}")
print(f"Parameters - CNN:   {cnn_model.count_params():,}")
print("\n🎉 CNN wins with better accuracy AND fewer parameters!")

## Step 2: The CNN Solution - Convolutional Layers\n\nNow let's build a CNN and see why it's so much better for image data:"

## Step 1: The Problem with Dense Layers for Images

Let's first see what happens when we use a regular Dense network on image data:

In [None]:
print("The Dense Network Approach:")
print("=" * 40)

# For Dense layers, we need to flatten images
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

print(f"Original image shape: {X_train.shape[1:]}")
print(f"Flattened shape: {X_train_flat.shape[1:]}")
print(f"Total pixels per image: {X_train_flat.shape[1]}")

# Build a Dense network
dense_model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(784,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
], name='Dense_Network')

dense_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print(f"\nDense Model Summary:")
dense_model.summary()

print(f"\nTotal parameters in Dense model: {dense_model.count_params():,}")
print("\n❗ Problems with Dense approach:")
print("  • Loses spatial information (pixel relationships)")
print("  • Treats each pixel independently")
print("  • Lots of parameters (prone to overfitting)")
print("  • Not translation invariant")
print("  • Computationally expensive")

# Train briefly
print("\nTraining Dense network (this may take a moment...)")
dense_history = dense_model.fit(
    X_train_flat, y_train,
    validation_data=(X_test_flat, y_test),
    epochs=5,
    batch_size=128,
    verbose=1
)

dense_accuracy = dense_history.history['val_accuracy'][-1]
print(f"Dense network accuracy: {dense_accuracy:.4f}")