<a href="https://colab.research.google.com/github/thisarli/CNN-layers-from-scratch/blob/main/CNN_layers_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports and Installs

In [1]:
!pip install -q torch torchvision sklearn seaborn

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for sklearn (setup.py) ... [?25l[?25hdone


In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

# 2D convolution

In [8]:
class Conv2d(nn.Module):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 bias=True):

        super(Conv2d, self).__init__()
        """
        2D convolution layer, with variable padding and stride.
        """

        if type(kernel_size) is int:
          self.kernel_x = kernel_size
          self.kernel_y = kernel_size
        elif type(kernel_size) is tuple:
          self.kernel_x = kernel_size[0]
          self.kernel_y = kernel_size[-1]

        if type(stride) is int:
          self.stride_x = stride
          self.stride_y = stride
        elif type(stride) is tuple:
          self.stride_x = stride[0]
          self.stride_y = stride[-1]

        if type(padding) is int:
          self.padding_x = padding
          self.padding_y = padding
        elif type(padding) is tuple:
          self.padding_x = padding[0]
          self.padding_y = padding[-1]
        
        # Weights shape: [out_channels, in_channels, kernel_x, kernel_y]
        self.w = torch.nn.Parameter(torch.randn(size=(out_channels, in_channels, self.kernel_x, self.kernel_y)))
        # Bias shape: [out_channels] 
        self.b = torch.nn.Parameter(torch.zeros(size=(out_channels,)))
        self.F = out_channels
        self.C = in_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.bias = bias

    def forward(self, x):
        """
        Feed-forward function

        Input shape: [N, C, H, W]
        Output shape: [N, F, H', W']
        """

        batch_size = x.shape[0]
        input_channels = x.shape[1]
        input_height = x.shape[2]
        input_width = x.shape[3]

        output_height = int((input_height + 2 * self.padding_y - self.kernel_y) / self.stride_y + 1)
        output_width = int((input_width + 2 * self.padding_x - self.kernel_x) / self.stride_x + 1)

        # Retrieve x_unfolded (unfolded input)
        x_unfolded = F.unfold(x, kernel_size=self.kernel_size, padding=self.padding, stride=self.stride)
        # Retrieve output_unfolded via matrix multiplication with parameters of filter
        output_unfolded = x_unfolded.transpose(1, 2).matmul(self.w.view(self.w.size(0), -1).t())
        # Add bias if true
        if self.bias is True:
          output_unfolded = output_unfolded.add(self.b)
        # Fold output into correct format
        out = F.fold(output_unfolded.transpose(1, 2), (output_width, output_height), (1, 1))

        return out

# 2D max pooling

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MaxPool2d(nn.Module):
    def __init__(self, kernel_size):
        super(MaxPool2d, self).__init__()
        """
        Max-pooling layer with variable kernel size to take max over
        """
        self.stride = kernel_size
        self.kernel_size = kernel_size

        if type(kernel_size) is int:
          self.kernel_x = kernel_size
          self.kernel_y = kernel_size
          self.stride_x = kernel_size
          self.stride_y = kernel_size
        elif type(kernel_size) is tuple:
          self.kernel_x = kernel_size[0]
          self.kernel_y = kernel_size[-1]
          self.stride_x = kernel_size[0]
          self.stride_y = kernel_size[-1]

    def forward(self, x):
        """
        Feed-forward function

        Input shape: [N, C, H, W]
        Output shape: [N, F, H', W']
        """
        x_unfolded = x.unfold(2, self.kernel_x, self.stride_x).unfold(3, self.kernel_y, self.stride_y)
        x_unfolded = x_unfolded.contiguous().view(*x_unfolded.size()[:-2], -1)
        # Apply max
        out, _ = x_unfolded.max(4)
        return out

# Linear layer

In [9]:
class Linear(nn.Module):
    def __init__(self, in_channels, out_channels, bias=True):
        super(Linear, self).__init__()
        """
        Linear layer.
        """
        self.w = torch.nn.Parameter(torch.randn(size=(in_channels, out_channels)))  # shape [in_channels, out_channels]
        self.b = torch.nn.Parameter(torch.randn(size=(out_channels,)))  # shape [out_channels]
        self.bias = bias


    def forward(self, x):
        """
        Feed-forward function
        """
        if self.bias is True:
          out = x.matmul(self.w) + self.b

        else:
          out = x.matmul(self.w)
        
        return out

# 2D batch normalization

In [11]:
class BatchNorm2d(nn.Module):
    def __init__(self, num_features, eps=1e-05, momentum=0.1):
        super(BatchNorm2d, self).__init__()
        """
        Batch Normalization for 2D image mini-batch.
        """
        self.num_features = num_features
        self.eps = eps
        self.momentum = momentum

        self.gamma = nn.Parameter(torch.ones(size=(1, num_features, 1, 1)))
        self.beta = nn.Parameter(torch.zeros(size=(1, num_features, 1, 1)))

        self.moving_mean = torch.zeros(size=(1, num_features, 1, 1))
        self.moving_var = torch.ones(size=(1, num_features, 1, 1))


    def forward(self, x):
        """
        Feed-forward function

        Input shape: [N, C, H, W]
        Output shape: [N, C, H, W] (same shape as input)
        """
        #if not torch.is_grad_enabled():
        if self.training is False:
          # Generate normalised input
          x_norm = (x - self.moving_mean) / torch.sqrt(self.moving_var + self.eps)

        # Otherwise training (where we need access to the gradients)
        else:
          # Calculate mean from mini batch
          mean = x.mean(dim=(0, 2, 3), keepdim=True)
          # Calculate biased variance from minibatch
          var = ((x - mean) ** 2).mean(dim=(0, 2, 3), keepdim=True)
          # Normalise
          x_norm = (x - mean) / torch.sqrt(var + self.eps)

          # Update moving average and variance during training
          self.moving_mean = self.momentum * mean + (1.0 - self.momentum) * self.moving_mean
          self.moving_var = self.momentum * var + (1.0 - self.momentum) * self.moving_var

        # Scaling via gamma / shifting via beta
        x = self.gamma * x_norm + self.beta

        return x