# Module 4 — Neural Network Fundamentals & Small CNNs (Expanded)

This notebook teaches:

- basic MLP on MNIST (Keras)
- small CNN on CIFAR-10 (Keras)
- equivalent small CNN training loop in PyTorch
- visualizing filters and feature maps
- saving and loading models

Designed for classroom demos: cells run quickly (few epochs).

## 1 — Setup (install packages and imports)

In [1]:
# Install necessary packages
!pip -q install -U torch torchvision matplotlib --quiet

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models
print('TF version:', tf.__version__)
import torch
import torchvision
print('Torch version:', torch.__version__)

# fix random seeds for reproducibility
np.random.seed(42)
import random
random.seed(42)
import os
os.environ['PYTHONHASHSEED'] = '42'


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m899.7/899.7 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m594.3/594.3 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m72.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.0/88.0 MB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m954.8/954.8 kB[0m [31m66.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.1/193.1 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m70.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.6/63.6 MB[0m [31m37.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

## 2 — MLP on MNIST (Keras) — quick demo

In [None]:
from tensorflow.keras.datasets import mnist
(x_train,y_train),(x_test,y_test)=mnist.load_data()
# use small subset for demo speed
x_train = x_train[:10000].reshape(-1,28*28)/255.0
y_train = y_train[:10000]
x_test = x_test[:2000].reshape(-1,28*28)/255.0

def build_mlp():
    model = models.Sequential([
        layers.Input(28*28),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(128, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

mlp = build_mlp()
mlp.summary()
history = mlp.fit(x_train, y_train, epochs=5, validation_split=0.1, batch_size=128)
plt.plot(history.history['accuracy'], label='train_acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.legend(); plt.title('MLP accuracy')


## 3 — Small CNN on CIFAR-10 (Keras)

In [None]:
from tensorflow.keras.datasets import cifar10
(x_train,y_train),(x_test,y_test)=cifar10.load_data()
# use subset for demo speed
x_train = x_train[:10000].astype('float32')/255.0
y_train = y_train[:10000]
x_test = x_test[:2000].astype('float32')/255.0

def build_small_cnn():
    model = models.Sequential([
        layers.Input(shape=(32,32,3)),
        layers.Conv2D(32,3,activation='relu', padding='same'),
        layers.Conv2D(32,3,activation='relu', padding='same'),
        layers.MaxPooling2D(),
        layers.Dropout(0.25),
        layers.Conv2D(64,3,activation='relu', padding='same'),
        layers.Conv2D(64,3,activation='relu', padding='same'),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

cnn = build_small_cnn()
cnn.summary()
history = cnn.fit(x_train, y_train, epochs=6, batch_size=128, validation_split=0.1)
plt.figure(figsize=(10,4))
plt.subplot(1,2,1); plt.plot(history.history['loss'], label='train_loss'); plt.plot(history.history['val_loss'], label='val_loss'); plt.legend()
plt.subplot(1,2,2); plt.plot(history.history['accuracy'], label='train_acc'); plt.plot(history.history['val_accuracy'], label='val_acc'); plt.legend()
plt.show()

# Evaluate
print('Evaluate on test subset:')
print(cnn.evaluate(x_test, y_test[:len(x_test)]))


## 4 — Visualize filters and feature maps (Keras)

In [None]:
# Visualize first conv layer filters
first_conv = cnn.layers[0]
weights = first_conv.get_weights()[0]  # shape (3,3,3,32)
print('Weights shape:', weights.shape)

# Normalize and plot first 8 filters
fig, axes = plt.subplots(1,8, figsize=(12,3))
for i in range(8):
    f = weights[:,:,:,i]
    f_min, f_max = f.min(), f.max()
    f_img = (f - f_min) / (f_max - f_min)
    axes[i].imshow(f_img)
    axes[i].axis('off')
plt.suptitle('First conv filters (visualized)')
plt.show()

# Feature map output for one image
from tensorflow.keras import Model
img = x_test[0:1]
layer_outputs = [layer.output for layer in cnn.layers if 'conv' in layer.name]
activation_model = Model(inputs=cnn.input, outputs=layer_outputs)
activations = activation_model.predict(img)
print('Number of conv layers activations:', len(activations))
# show first 6 feature maps of first conv layer
act = activations[0]
fig, axes = plt.subplots(1,6, figsize=(12,2))
for i in range(6):
    axes[i].imshow(act[0,:,:,i], cmap='viridis')
    axes[i].axis('off')
plt.suptitle('Feature maps from first conv layer')
plt.show()


## 5 — Small CNN training loop in PyTorch (quick demo)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset

# Prepare CIFAR-10 with torchvision
transform = transforms.Compose([transforms.ToTensor()])
trainset = datasets.CIFAR10(root='/tmp/cifar', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='/tmp/cifar', train=False, download=True, transform=transform)
# use small subsets
train_subset = Subset(trainset, range(2000))
test_subset = Subset(testset, range(500))
train_loader = DataLoader(train_subset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_subset, batch_size=64, shuffle=False)

class SmallCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3,32,3,padding=1), nn.ReLU(),
            nn.Conv2d(32,32,3,padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32,64,3,padding=1), nn.ReLU(), nn.Conv2d(64,64,3,padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(nn.Flatten(), nn.Linear(64*8*8, 128), nn.ReLU(), nn.Linear(128,10))
    def forward(self,x):
        x = self.conv(x)
        x = self.fc(x)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_t = SmallCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_t.parameters(), lr=1e-3)

# training loop
for epoch in range(3):
    model_t.train()
    total_loss = 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model_t(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f'Epoch {epoch+1} train loss:', total_loss/len(train_loader))

# evaluation
model_t.eval()
correct = 0; total=0
with torch.no_grad():
    for imgs, labels in test_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model_t(imgs)
        preds = outputs.argmax(dim=1)
        correct += (preds==labels).sum().item()
        total += labels.size(0)
print('PyTorch test accuracy (subset):', correct/total)


## 6 — Save models (Keras & PyTorch)

In [None]:
# Save Keras model
cnn.save('/mnt/data/keras_small_cnn.h5')
print('Saved Keras model to /mnt/data/keras_small_cnn.h5')

# Save PyTorch model state dict
torch.save(model_t.state_dict(), '/mnt/data/torch_small_cnn.pth')
print('Saved PyTorch model to /mnt/data/torch_small_cnn.pth')


## 7 — Exercises & Instructor Notes

- Compare MLP vs CNN on visual tasks: observe accuracy differences.
- Increase dataset size and epochs for better results; use data augmentation.
- Visualize misclassified images and discuss potential causes (label noise, insufficient capacity).
- Optional: convert Keras model to TFLite for edge demo.