Convolutional Layer

In [None]:
"""Cross-correlation operation"""
import torch
from torch import nn
from d2l import torch as d2l

def corr2d(X, K):
    """Calculate 2D cross-correlation operation"""
    """X is input matrix, K is kernel matrix"""
    h, w = K.shape # h = height, w = width
    Y = torch.zeros(
        (X.shape[0] - h + 1, X.shape[1] - w + 1) # Previous formula: (n_h - k_h + 1) * (n_w - k_w + 1) = (input - kernel + 1)
    )
    for i in range(Y.shape[0]): # Height
        for j in range(Y.shape[1]): # Width
            Y[i, j] = ( # (height, width) matrix
                X[i:i + h, j:j + w] * K # Shift operation up to (h, w), then dot product with K (kernel matrix)
            ).sum() # Sum
    return Y

In [2]:
X = torch.tensor(
    [
        [0.0, 1.0, 2.0],
        [3.0, 4.0, 5.0],
        [6.0, 7.0, 8.0]
    ]
)
K = torch.tensor(
    [
        [0.0, 1.0],
        [2.0, 3.0]
    ]
)
corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

In [None]:
"""2D convolutional operation"""
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(kernel_size)) # Random initialization based on kernel size
        self.bias = nn.Parameter(torch.zeros(1)) # All zero initialization

    def forward(self, x):
        return corr2d(x, self.weight) + self.bias # (X * W) + b, formula = (W.T * X) + b

In [4]:
"""Simple application of convolutional layer: detect edges of different color in images"""
X = torch.ones((6, 8))
X[:, 2:6] = 0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [None]:
K = torch.tensor(
    [[1.0, -1.0]] # If these is no color change, (X * K) should be 0 instead of 1 or -1
)

In [6]:
"""1 represents edge from white color (1) to black color (0)"""
"""-1 represents edge from black color (0) to white color (1)"""
Y = corr2d(X, K)
Y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [7]:
corr2d(X.t(), K) # Kernel K can only detect vertical edges (horizontal edges detection will not work)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

In [None]:
"""Learn the convolution kernel from X to Y"""
conv2d = nn.Conv2d(1, 1, kernel_size=(1, 2), bias=False) # input_channels = output_channels = 1, kernel = (1, 2) matrix

X = X.reshape(
    (1, 1, 6, 8) # channel, batch size, height, width
)
Y = Y.reshape(
    (1, 1, 6, 7) # channel, batch size, height, width
)

for i in range(10):
    Y_hat = conv2d(X) # Get prediction
    l = (Y_hat - Y) ** 2 # Get loss
    conv2d.zero_grad() # Reset gradient back to zero
    l.sum().backward() # Compute gradient
    conv2d.weight.data[:] -= 3e-2 * conv2d.weight.grad # Update gradient
    # Print out batch and loss
    if (i + 1) % 2 == 0:
        print(f'batch {i+1}, loss {l.sum():.3f}')

batch 2, loss 11.863
batch 4, loss 2.806
batch 6, loss 0.805
batch 8, loss 0.272
batch 10, loss 0.102


In [None]:
conv2d.weight.data.reshape((1, 2)) # The result will be similar to (1, -1) kernel matrix

tensor([[ 0.9530, -1.0167]])