# SIGNA Chapter 6 Implementation
In this notebook, we explore the basic architecture of a CNN. Our design will include the following.
1. Kernel/Filter + Bias, resulting in a Feature Map, which will be passed onto an Activation Function with ReLU.
3. Pooling Layer with max pooling.
4. Finally we take the output and use a Feed-Forward Network (with 4 inputs, a ReLU layer, and 2 outputs).
5. We can optionally use SoftMax or ArgMax to simplify the classification output.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

import lightning as L
from torch.utils.data import TensorDataset, DataLoader

## Designing Our CNN

In [2]:
class myCNN(L.LightningModule):
    def __init__(self):
        super().__init__()

        # For Part 1
        # Note: Since our input is black and white, we have one input channel
        # If we had three channels, e.g. RGB, then in_channels=3
        # Similar to the output_channel, we can decide how many channels based on our needs
        # If we wanted a non-square kernel, use tuples, e.g. (2,3)
        self.conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3)

        # For Part 2
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # For Part 3
        self.input_to_hidden = nn.Linear(in_features=4, out_features=1)
        self.hidden_to_output = nn.Linear(in_features=1, out_features=2)

        # Determine the loss function
        self.loss = nn.CrossEntropyLoss()
        
    def forward(self, input_data):
        # Part 1
        output_data = self.conv(input_data)
        output_data = F.relu(output_data)

        # Part 2
        output_data = self.pool(output_data)

        # Now, at this point we have a square matrix of values.
        # Use torch.flatten() to turn the matrix into a vector.
        output_data = torch.flatten(output_data, 1) # flatten all dimensions except batch 

        # Part 3
        output_data = self.input_to_hidden(output_data)
        output_data = F.relu(output_data)
        output_data = self.hidden_to_output(output_data)

        return output_data

    def configure_optimizers(self):
        return Adam(self.parameters(), lr=0.001)

    def training_step(self, batch):
        inputs, labels = batch
        outputs = self.forward(inputs)
        loss = self.loss(outputs, labels)

        # Output training step loss
        self.log("CE Loss: ", loss) 
        return loss

## Prepare Basic Training Data

In [3]:
# Create a 6x6 matrix of numbers where 0 represents white and 1 represents black.
o_image = [[0, 0, 1, 1, 0, 0],
           [0, 1, 0, 0, 1, 0],
           [1, 0, 0, 0, 0, 1],
           [1, 0, 0, 0, 0, 1],
           [0, 1, 0, 0, 1, 0],
           [0, 0, 1, 1, 0, 0]]

x_image = [[1, 0, 0, 0, 0, 1],
           [0, 1, 0, 0, 1, 0],
           [0, 0, 1, 1, 0, 0],
           [0, 0, 1, 1, 0, 0],
           [0, 1, 0, 0, 1, 0],
           [1, 0, 0, 0, 0, 1]]

# Convert the images into tensors...
input_images = torch.tensor([o_image, x_image]).type(torch.float32)

# Create the labels for the input images
input_labels = torch.tensor([[1.0, 0.0], [0.0, 1.0]]).type(torch.float32)

# Now combine input_images and input_labels into a TensorDataset and create a DataLoader
dataset = TensorDataset(input_images, input_labels) 
dataloader = DataLoader(dataset)

# Just show the dataloader contents
for batch_num, (images, labels) in enumerate(dataloader): 
    print("batch_num:", batch_num)
    print(images)
    print(labels)
    print()

batch_num: 0
tensor([[[0., 0., 1., 1., 0., 0.],
         [0., 1., 0., 0., 1., 0.],
         [1., 0., 0., 0., 0., 1.],
         [1., 0., 0., 0., 0., 1.],
         [0., 1., 0., 0., 1., 0.],
         [0., 0., 1., 1., 0., 0.]]])
tensor([[1., 0.]])

batch_num: 1
tensor([[[1., 0., 0., 0., 0., 1.],
         [0., 1., 0., 0., 1., 0.],
         [0., 0., 1., 1., 0., 0.],
         [0., 0., 1., 1., 0., 0.],
         [0., 1., 0., 0., 1., 0.],
         [1., 0., 0., 0., 0., 1.]]])
tensor([[0., 1.]])



## Training Our CNN: Part 1

In [4]:
# Create a model
model = myCNN()

# Create a trainer for the model
trainer = L.Trainer(max_epochs=100)
trainer.fit(model, train_dataloaders=dataloader)

ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/usr/local/lib/python3.13/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default

  | Name             | Type             | Params | Mode 
--------------------------------------------------------------
0 | conv             | Conv2d       

Epoch 99: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:00<00:00, 269.76it/s, v_num=18]

`Trainer.fit` stopped: `max_epochs=100` reached.


Epoch 99: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:00<00:00, 189.77it/s, v_num=18]


In [5]:
# Evaluate model performance
for batch_num, (image, label) in enumerate(dataloader):
    
    # First, run the image through the model to make a prediction
    prediction = model(image)
    
    # Now make the prediction easy to read and interpret by
    # running it through torch.softmax() and torch.round()
    # dim=0 applies softmax to rows, dim=1 applies soft to columns
    predicted_label = torch.round(torch.softmax(prediction, dim=1), decimals=2) 
    
    # Now print out the the predicted label and the original label
    # so we see how well our CNN performed.
    print("predicted_label:", predicted_label)
    print("original label:", label)
    print("\n")

predicted_label: tensor([[0.4500, 0.5500]], grad_fn=<RoundBackward1>)
original label: tensor([[1., 0.]])


predicted_label: tensor([[0.3600, 0.6400]], grad_fn=<RoundBackward1>)
original label: tensor([[0., 1.]])




## Training Our CNN: Part 2

In [6]:
# Continue training where we left off
path_to_checkpoint = trainer.checkpoint_callback.best_model_path # By default, "best" = "most recent"

# Add another 600 epochs
trainer = L.Trainer(max_epochs=700)
trainer.fit(model, train_dataloaders=dataloader, ckpt_path=path_to_checkpoint)

ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at /notebooks/lightning_logs/version_18/checkpoints/epoch=99-step=200.ckpt
/usr/local/lib/python3.13/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:366: The dirpath has changed from '/notebooks/lightning_logs/version_18/checkpoints' to '/notebooks/lightning_logs/version_19/checkpoints', therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_model_path` and `best_k_models` won't be reloaded. Only `best_model_path` will be reloaded.

  | Name             | Type             | Params | Mode 
--------------------------------------------------------------
0 | conv             | Conv2d           | 10     | 

Epoch 699: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:00<00:00, 260.09it/s, v_num=19]

`Trainer.fit` stopped: `max_epochs=700` reached.


Epoch 699: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:00<00:00, 175.59it/s, v_num=19]


In [7]:
# Evaluate model performance.. again
for batch_num, (image, label) in enumerate(dataloader):
    
    # First, run the image through the model to make a prediction
    prediction = model(image)
    
    # Now make the prediction easy to read and interpret by
    # running it through torch.softmax() and torch.round()
    # dim=0 applies softmax to rows, dim=1 applies soft to columns
    predicted_label = torch.round(torch.softmax(prediction, dim=1), decimals=2) 
    
    # Now print out the the predicted label and the original label
    # so we see how well our CNN performed.
    print("predicted_label:", predicted_label)
    print("original label:", label)
    print("\n")

predicted_label: tensor([[0.9900, 0.0100]], grad_fn=<RoundBackward1>)
original label: tensor([[1., 0.]])


predicted_label: tensor([[0.2300, 0.7700]], grad_fn=<RoundBackward1>)
original label: tensor([[0., 1.]])




## Using Some New Data

In [11]:
shifted_o_image = [[0, 1, 1, 0, 0, 0],
                   [1, 0, 0, 1, 0, 0],
                   [0, 0, 0, 0, 1, 0],
                   [0, 0, 0, 0, 1, 0],
                   [1, 0, 0, 1, 0, 0],
                   [0, 1, 1, 0, 0, 0]]

shifted_x_image = [[0, 1, 0, 0, 0, 0],
                   [0, 0, 1, 0, 0, 1],
                   [0, 0, 0, 1, 1, 0],
                   [0, 0, 0, 1, 1, 0],
                   [0, 0, 1, 0, 0, 1],
                   [0, 1, 0, 0, 0, 0]]

# Create the batch tensor
test_images = torch.tensor([shifted_o_image, shifted_x_image]).type(torch.float32)

# CNNs expect input in the format [batch, channels, height, width].
# The 1 in unsqueeze(1) specifies where to insert the new dimension - at index position 1.
# Add the channel dimension - this changes shape from [2, 6, 6, missing_width] to [2, 1, 6, 6]
test_images = test_images.unsqueeze(1)  # or test_images[:, None, :, :]

# Now run prediction
predictions = model(test_images)

# And generate labels
predicted_labels = torch.round(torch.softmax(predictions, dim=1), decimals=2) ## dim=0 applies argmax to rows, dim=1 applies argmax to colum
predicted_labels

# Note that the correct labels should be
# [1, 0] for shifted O
# [0, 1] for shifted X

tensor([[0.4000, 0.6000],
        [0.2300, 0.7700]], grad_fn=<RoundBackward1>)