# 🏞 Convolutional Neural Network

This notebook is an **unofficial PyTorch implementation** of the excellent [Keras example](https://github.com/davidADSP/Generative_Deep_Learning_2nd_Edition/blob/main/notebooks/02_deeplearning/02_cnn/cnn.ipynb) for Convolutional neural network, originally created by David Foster as part of the companion code for the excellent book [Generative Deep Learning, 2nd Edition](https://www.oreilly.com/library/view/generative-deep-learning/9781098134174/).


_The original code is available [here](https://github.com/davidADSP/Generative_Deep_Learning_2nd_Edition) and is licensed under the Apache License 2.0._
_This implementation is distributed under the Apache License 2.0. See the LICENSE file for details._

In this notebook, we'll walk through the steps required to train your own convolutional neural network (CNN) on the CIFAR dataset using PyTorch.

In [None]:
%load_ext autoreload
%autoreload 2

import os

# Get the working directory and the current notebook directory
working_dir = os.getcwd()
exp_dir = os.path.join(working_dir, "notebooks/02_deeplearning/02_cnn/")

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

import torch.nn as nn
import torch.optim as optim
from torchinfo import summary

import numpy as np 
import matplotlib.pyplot as plt
import math

from notebooks.utils import display

## 0. Parameters <a name="parameters"></a>

In [None]:
NUM_CLASSES = 10
batch_size = 32

## 1. Prepare the Data <a name="prepare"></a>

In [None]:
data_dir = working_dir + "/data"

Note the labels here will not be one hot encoded

In [None]:
# Define the transformations for the data (e.g., normalization, conversion to tensors)
transform = transforms.Compose([
    transforms.ToTensor(),
])

# the pytorch loss fucntion uses the integer lables directly no need to convert them to one hot encoding
trainset = torchvision.datasets.CIFAR10(root=data_dir, train=True, download=True, 
                                        transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)


testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, 
                                       transform=transform)

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

In [None]:
# Load the first batch of images
dataiter = iter(trainloader)
images, lables = next(dataiter)

In [None]:
type(images)
print(images.shape)

In [None]:
display(images[:10])
print(lables[:10])

## 2. Build the model <a name="build"></a>

In [None]:
class CNNModel(nn.Module):
    def __init__(self):
        super().__init__()
        # define the layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, 
                               stride=1, padding='same')
        self.bn1 = nn.BatchNorm2d(32)

        p = self._get_padding_size(32,  2, 3)
        print(p)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, 
                               stride=2, padding=p)
        self.bn2 = nn.BatchNorm2d(32)

        
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, 
                               stride=1, padding='same')
        self.bn3 = nn.BatchNorm2d(64)

        p = self._get_padding_size(16,  2, 3)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3,
                               stride=2, padding=p)
        self.bn4 = nn.BatchNorm2d(64)

        self.fc1 = nn.Linear(8*8*64, 128)
        self.bn5 = nn.BatchNorm1d(128)

        self.fc2 = nn.Linear(128, NUM_CLASSES)

    @staticmethod
    def _get_padding_size(input_w, stride, kernal_size):
        p = ((input_w /2) - 1) * stride
        p = (p - input_w) + kernal_size
        p = math.ceil(p/2)

        return p
    
    def forward(self, x):

        x = self.conv1(x)
        x = self.bn1(x)
        x = F.leaky_relu(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = F.leaky_relu(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = F.leaky_relu(x)

        x = self.conv4(x)
        x = self.bn4(x)
        x = F.leaky_relu(x)

        # flatten the input
        x = x.view(x.shape[0], -1)

        x = self.fc1(x)
        x = self.bn5(x)
        x = F.leaky_relu(x)

        x = self.fc2(x)

        # x = torch.softmax(x, dim=1)
        # CrossEntropyLoss combins the softmax with the loss function so no need to add softmax layer to the model

        return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNModel().to(device)
print(model.state_dict().keys())
print(model)

In [None]:
summary(model, (1, 3, 32, 32))

## 3. Train the model <a name="train"></a>

In [None]:
num_epoch = 10
learning_rate = 0.0005

In [None]:
# Define the loss function
loss_fn = nn.CrossEntropyLoss()
# CrossEntropyLoss combome the softmax with the loss function so no need to add softmax layer to the model

In [None]:
# Define the optmizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
def fit(model, train_dataloader, optimizer, loss_fn, epochs=10):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    # loop over the number of epoch
    for i in range(epochs):
        #  set the model for training
        model.train()
        # loop over the dataloader to get all the data
        running_loss = 0.0
        num_samples = 0
        correct = 0
        for images, labels in train_dataloader:
            #  zero the gradiants of the optimizer
            optimizer.zero_grad()
            # move the training data to the same device as the model
            images, labels = images.to(device), labels.to(device)
            # Predict the lables
            predictions = model(images)
            # calculate the loss
            loss = loss_fn(predictions, labels)
            # calcualte the gradients for the loss
            loss.backward()
            # updat the weights using the optimizer
            optimizer.step()
            # accumilate the loss
            running_loss += loss.item()

            # calcualte the accuracy
            _,pred_lable = torch.max(predictions, 1)
            # _, corr_label = torch.max(labels, 1)

            num_samples += labels.size(0)
            correct += (pred_lable==labels).sum().item()
        
        print( f"Epoch {i+1} / {epochs}: loss= {running_loss/len(train_dataloader):.4f}, accuracy= {correct/num_samples:.4f}")

            

In [None]:
fit(model, trainloader, optimizer, loss_fn, num_epoch)

## 4. Evaluation <a name="evaluate"></a>

In [None]:
def evaluate (model, dataloader, loss_fn, device):
    # set the model to eval mode
    model.eval()
    corr_predictions = 0
    total = 0
    total_loss = 0.0
    # disable the gradiants calculations
    with torch.no_grad():
        # loop to load all data
        for images, lables in dataloader:
            images, lables = images.to(device), lables.to(device)
            # use the model to predict the labels
            outputs = model(images)
            # calcaulte the loss
            loss = loss_fn(outputs, lables)
            total_loss += loss

            # get the highest predicted values
            _, pred_lables = torch.max(outputs, 1)
            # _, corr_lables = torch.max(lables, 1)

            corr_predictions += (pred_lables==lables).sum().item()
            total += lables.size(0)
        
        val_loss = total_loss / len(dataloader)
        val_acc = corr_predictions / total
    
    return val_loss, val_acc


In [None]:
val_loss, val_acc = evaluate(model, testloader, loss_fn, device)
print(f"validation loss: {val_loss}, validation_acc: {val_acc}")

Try indvidual images

In [None]:
CLASSES = np.array(
    [
        "airplane",
        "automobile",
        "bird",
        "cat",
        "deer",
        "dog",
        "frog",
        "horse",
        "ship",
        "truck",
    ]
)

dataiter = iter(testloader)
images, lables = next(dataiter)

In [None]:
output = model(images.to(device))
_, pred_index = torch.max(output.detach().cpu(), 1)
predicted_class = CLASSES[pred_index]

# _, index = torch.max(lables, 1)
gt_classes = CLASSES[lables]

In [None]:
n_to_show = 10
indices = np.random.choice(range(len(images)), n_to_show)

fig = plt.figure(figsize=(15, 3))
fig.subplots_adjust(hspace=0.4, wspace=0.4)

for i, idx in enumerate(indices):
    img = images[idx].permute(1,2,0).numpy()
    ax = fig.add_subplot(1, n_to_show, i + 1)
    ax.axis("off")
    ax.text(
        0.5,
        -0.35,
        "pred = " + str(predicted_class[idx]),
        fontsize=10,
        ha="center",
        transform=ax.transAxes,
    )
    ax.text(
        0.5,
        -0.7,
        "act = " + str(gt_classes[idx]),
        fontsize=10,
        ha="center",
        transform=ax.transAxes,
    )
    ax.imshow(img)