In [102]:
import torchvision
import torch
import torch.nn as nn

import matplotlib.pyplot as plt
import numpy as np

In [135]:
C_out = 5
C_in = 4
K = (3, 2)
Kx, Ky = K
stride = 2

W = 35
H = 32

conv_layer = nn.Conv2d(in_channels = C_in, out_channels = C_out, kernel_size = K, stride = stride)

print(conv_layer.weight.data.shape)
print(conv_layer.bias.data.shape)

weight = torch.randn(C_out, C_in, Kx, Ky)

bias = torch.randn(C_out)
print(weight.dtype)

torch.Size([5, 4, 3, 2])
torch.Size([5])
torch.float32


In [157]:
N = 14


# Create a dummy input tensor (1 channel, 4x4 image)
input_tensor = torch.randn(N, C_in, H, W, requires_grad = True)

# Reshape the input tensor to (batch_size, channels, height, width)
input_tensor = input_tensor.reshape(N, C_in, H, W)
input_tensor.retain_grad()

# Define a convolutional layer


# Set custom weights and bias for demonstration
conv_layer.weight.data = weight
conv_layer.bias.data = bias

# Apply the convolutional layer to the input
output = conv_layer(input_tensor)
output.retain_grad()

print("Input shape:", input_tensor.shape)
# print("Input:\n", input_tensor.squeeze())
print("\nConvolution weights:\n", conv_layer.weight.data.shape)
print("Convolution bias:", conv_layer.bias.data.shape)
print("\nOutput shape:", output.shape)
# print("Output:\n", output.squeeze())


Input shape: torch.Size([14, 4, 32, 35])

Convolution weights:
 torch.Size([5, 4, 3, 2])
Convolution bias: torch.Size([5])

Output shape: torch.Size([14, 5, 15, 17])


In [158]:
import math
outputHeight = math.floor((H - (Kx - 1) - 1) / stride + 1)
outputWidth = math.floor((W - (Ky - 1) - 1) / stride + 1)
print(outputHeight, outputWidth)


15 17


### Defining the convolution forward pass

In [159]:
def cross_correlate(A, B):
    m, n = A.shape
    p, q = B.shape
    outputH = int((m - p) / stride) + 1
    outputW = int((n - q) / stride) + 1
    output = torch.zeros((outputH, outputW))
    for i in range(outputH):
        for j in range(outputW):
            value = 0
            for u in range(p):
                for v in range(q):
                    value += A[stride * i + u, stride * j + v] * B[u, v]
            output[i, j] = value
    return output

In [160]:
stride = 2
A = torch.Tensor(np.arange(1, 50).reshape(7,7))
B = torch.Tensor(np.arange(1, 5).reshape(2, 2))

cross_correlate(A, B)

tensor([[ 65.,  85., 105.],
        [205., 225., 245.],
        [345., 365., 385.]])

In [161]:
def convolve(): 
    output_tensor = torch.zeros((N, C_out, outputHeight, outputWidth))
    for i in range(N):
        for j in range(C_out):
            for k in range(C_in):
                output_tensor[i, j] += cross_correlate(input_tensor[i, k], weight[j, k])
            output_tensor[i, j] += bias[j]
    return output_tensor

In [162]:
output_tensor = convolve()

In [163]:
l = output.sum()
l.backward()
output_grad = output.grad

In [164]:
assert(torch.norm(output_tensor - output) < 1e-4)

In [165]:
def cross_correlation_grad(A, B, C_grad):
    p, q = B.shape
    m, n = A.shape
    weight_grad = torch.zeros(p, q)
    
    xLim = math.floor((m - p) / stride + 1)
    yLim = math.floor((n - q) / stride + 1)
    for a in range(p):
        for b in range(q):
            value = 0
            for i in range(xLim):
                for j in range(yLim):
                    value += C_grad[i, j].item() * A[i * stride + a, j * stride + b].item()
            weight_grad[a, b] = value
    return weight_grad

In [166]:
def convolution_grad():
    weight_grad = torch.zeros(C_out, C_in, Kx, Ky)
    for j in range(C_out):
        for k in range(C_in):
            w_grad = torch.zeros(Kx, Ky)
            for i in range(N):
                w_grad += cross_correlation_grad(input_tensor[i, k], weight[j, k], output_grad[i, j])
            weight_grad[j, k] += w_grad
    return weight_grad

In [167]:
w_grad = convolution_grad()

In [168]:
input_tensor.shape

torch.Size([14, 4, 32, 35])

In [169]:
weight.shape

torch.Size([5, 4, 3, 2])

In [175]:
w_grad.shape

torch.Size([5, 4, 3, 2])

In [173]:
conv_layer.weight.grad

tensor([[[[  18.4215,   83.8345],
          [ -51.6044,   32.2838],
          [  14.3864,   16.1129]],

         [[  14.4165,  -92.5791],
          [-168.0305,  102.9635],
          [  31.7107, -159.5172]],

         [[ 201.1115,  -69.9786],
          [  47.7925,  136.3253],
          [ 186.3972,  -46.2984]],

         [[ -50.9993,   16.0457],
          [-224.6265,  -94.3197],
          [ -11.9449,  -16.3227]]],


        [[[  18.4215,   83.8345],
          [ -51.6044,   32.2838],
          [  14.3864,   16.1129]],

         [[  14.4165,  -92.5791],
          [-168.0305,  102.9635],
          [  31.7107, -159.5172]],

         [[ 201.1115,  -69.9786],
          [  47.7925,  136.3253],
          [ 186.3972,  -46.2984]],

         [[ -50.9993,   16.0457],
          [-224.6265,  -94.3197],
          [ -11.9449,  -16.3227]]],


        [[[  18.4215,   83.8345],
          [ -51.6044,   32.2838],
          [  14.3864,   16.1129]],

         [[  14.4165,  -92.5791],
          [-168.0305,  102

In [171]:
weight.grad

In [122]:
# Define the input tensor A of shape (N,N) where N=5
A = torch.tensor([
    [5, 27, 9, -15, 12],
    [2, 4, -2, -7, 8],
    [-8, 16, 5, 23, -6],
    [-17, 7, 9, -14, 3],
    [10, -5, 13, 6, -11]
], dtype=torch.float32).requires_grad_()

# Define the kernel tensor B of shape (K,K) where K=2
B = torch.tensor([
    [8, 4],
    [2, 6]
], dtype=torch.float32).requires_grad_()

In [123]:
# Reshape input tensor A to match conv2d input format (N, C, H, W)
# N = batch size (1)
# C = input channels (1)
# H = height (4)
# W = width (4)
A = A.unsqueeze(0).unsqueeze(0)  # Add batch and channel dimensions
print("Input shape:", A.shape)  # Should be (1, 1, 4, 4)
A.retain_grad()
# Reshape kernel tensor B to match conv2d weight format 
# (out_channels, in_channels, kernel_height, kernel_width)
B = B.unsqueeze(0).unsqueeze(0)  # Add out_channels and in_channels dimensions
B.retain_grad()
print("Kernel shape:", B.shape)  # Should be (1, 1, 1, 1)

Input shape: torch.Size([1, 1, 5, 5])
Kernel shape: torch.Size([1, 1, 2, 2])


In [124]:
# Create Conv2d layer
# in_channels=1: we have a single input channel
# out_channels=1: we want a single output channel
# kernel_size=1: our kernel is 1x1
# stride=1: move the kernel by 1 pixel at a time
# padding=0: no padding
conv_layer = nn.Conv2d(in_channels=1, 
                      out_channels=1,
                      kernel_size=1,
                      stride=stride,
                      padding=0,
                      bias=False)

In [125]:
# Set the weight of conv_layer to our kernel B
conv_layer.weight = nn.Parameter(B)

# Perform convolution
output = conv_layer(A)
# output.requires_grad_()
output.retain_grad()

print("\nOutput shape:", output.shape)  # Should be (1, 1, 4, 4)
print("\nConvolution output:")
print(output.squeeze().detach().numpy())  # Remove batch and channel dimensions for display


Output shape: torch.Size([1, 1, 2, 2])

Convolution output:
[[176. -34.]
 [  8.  66.]]


In [126]:
l = output.sum()
l.backward()

In [127]:
print(A.grad)

tensor([[[[8., 4., 8., 4., 0.],
          [2., 6., 2., 6., 0.],
          [8., 4., 8., 4., 0.],
          [2., 6., 2., 6., 0.],
          [0., 0., 0., 0., 0.]]]])


In [128]:
conv_layer.weight.grad

tensor([[[[ 11.,  51.],
          [ -8., -10.]]]])

In [129]:
output.grad

tensor([[[[1., 1.],
          [1., 1.]]]])

In [130]:
A = A.squeeze()
B = conv_layer.weight.data.squeeze()

In [131]:
output

tensor([[[[176., -34.],
          [  8.,  66.]]]], grad_fn=<ConvolutionBackward0>)

In [132]:
C = output.squeeze()
C_grad = output.grad.squeeze()

In [133]:
C

tensor([[176., -34.],
        [  8.,  66.]], grad_fn=<SqueezeBackward0>)

In [79]:
import math

In [134]:
cross_correlation_grad(A, B, C_grad)

tensor([[ 11.,  51.],
        [ -8., -10.]])