In [1]:
import torchvision
import torch
import torch.nn as nn

import matplotlib.pyplot as plt
import numpy as np

In [2]:
C_out = 5
C_in = 4
K = (3, 2)
Kx, Ky = K
stride = 2

W = 35
H = 32

conv_layer = nn.Conv2d(in_channels = C_in, out_channels = C_out, kernel_size = K, stride = stride)

print(conv_layer.weight.data.shape)
print(conv_layer.bias.data.shape)

weight = torch.randn(C_out, C_in, Kx, Ky)

bias = torch.randn(C_out)
print(weight.dtype)

torch.Size([5, 4, 3, 2])
torch.Size([5])
torch.float32


In [3]:
N = 14


# Create a dummy input tensor (1 channel, 4x4 image)
input_tensor = torch.randn(N, C_in, H, W, requires_grad = True)

# Reshape the input tensor to (batch_size, channels, height, width)
input_tensor = input_tensor.reshape(N, C_in, H, W)
input_tensor.retain_grad()

# Define a convolutional layer


# Set custom weights and bias for demonstration
conv_layer.weight.data = weight
conv_layer.bias.data = bias

# Apply the convolutional layer to the input
output = conv_layer(input_tensor)
output.retain_grad()

print("Input shape:", input_tensor.shape)
# print("Input:\n", input_tensor.squeeze())
print("\nConvolution weights:\n", conv_layer.weight.data.shape)
print("Convolution bias:", conv_layer.bias.data.shape)
print("\nOutput shape:", output.shape)
# print("Output:\n", output.squeeze())


Input shape: torch.Size([14, 4, 32, 35])

Convolution weights:
 torch.Size([5, 4, 3, 2])
Convolution bias: torch.Size([5])

Output shape: torch.Size([14, 5, 15, 17])


In [4]:
import math
outputHeight = math.floor((H - (Kx - 1) - 1) / stride + 1)
outputWidth = math.floor((W - (Ky - 1) - 1) / stride + 1)
print(outputHeight, outputWidth)


15 17


### Defining the convolution forward pass

In [5]:
def cross_correlate(A, B):
    m, n = A.shape
    p, q = B.shape
    outputH = int((m - p) / stride) + 1
    outputW = int((n - q) / stride) + 1
    output = torch.zeros((outputH, outputW))
    for i in range(outputH):
        for j in range(outputW):
            value = 0
            for u in range(p):
                for v in range(q):
                    value += A[stride * i + u, stride * j + v] * B[u, v]
            output[i, j] = value
    return output

In [6]:
stride = 2
A = torch.Tensor(np.arange(1, 50).reshape(7,7))
B = torch.Tensor(np.arange(1, 5).reshape(2, 2))

cross_correlate(A, B)

tensor([[ 65.,  85., 105.],
        [205., 225., 245.],
        [345., 365., 385.]])

In [7]:
def convolve():
    output_tensor = torch.zeros((N, C_out, outputHeight, outputWidth))
    for i in range(N):
        for j in range(C_out):
            for k in range(C_in):
                output_tensor[i, j] += cross_correlate(input_tensor[i, k], weight[j, k])
            output_tensor[i, j] += bias[j]
    return output_tensor

In [8]:
%%time
output_tensor = convolve()

CPU times: user 13.6 s, sys: 1.24 s, total: 14.9 s
Wall time: 15.1 s


In [9]:
# random_gradient = torch.randn(output.shape)
random_gradient = torch.ones(output.shape)
l = (output * random_gradient).sum()
l.backward()
output_grad = output.grad

In [10]:
assert(torch.isclose(random_gradient, output_grad).all().item())

In [11]:
assert(torch.norm(output_tensor - output) < 1e-4)

In [12]:
def within_limits(A, x, y):
  p, q = A.shape
  return (x >= 0 and x < p) and (y >= 0 and y < q)

def cross_correlation_grad(C_grad, A, s, p, q):
    m, n = A.shape
    weight_grad = torch.zeros(p, q)

    xLim = math.floor((m - p) / abs(s) + 1)
    yLim = math.floor((n - q) / abs(s) + 1)
    for a in range(p):
        for b in range(q):
            value = 0
            for i in range(xLim):
                for j in range(yLim):
                  xVal = i * s + a
                  yVal = j * s + b
                  if within_limits(A, xVal, yVal):
                    value += C_grad[i, j].item() * A[xVal, yVal].item()
            weight_grad[a, b] = value
    return weight_grad

In [13]:
def convolution_grad():
    weight_grad = torch.zeros(C_out, C_in, Kx, Ky)
    for j in range(C_out):
        for k in range(C_in):
            w_grad = torch.zeros(Kx, Ky)
            for i in range(N):
                w_grad += cross_correlation_grad(output_grad[i, j], input_tensor[i, k],  stride, Kx, Ky)
            weight_grad[j, k] += w_grad


    bias_grad = torch.zeros(C_out)

    for j in range(C_out):
      value = 0
      for i in range(N):
        for k in range(outputHeight):
          for l in range(outputWidth):
            value += output_grad[i, j, k, l]
      bias_grad[j] += value

    input_grad = torch.zeros(N, C_in, H, W)

    for i in range(N):
      for k in range(C_in):
        i_grad = torch.zeros(H, W)
        for j in range(C_out):
          i_grad += cross_correlation_grad(output_grad[i, j], weight[j, k], -stride, H, W)
        input_grad[i, k] += i_grad

    return weight_grad, bias_grad, input_grad

In [14]:
conv_layer.bias.grad

tensor([3570., 3570., 3570., 3570., 3570.])

In [15]:
%%time
w_grad, b_grad, i_grad = convolution_grad()

CPU times: user 9.1 s, sys: 22 ms, total: 9.12 s
Wall time: 9.35 s


In [16]:
torch.norm(i_grad - input_tensor.grad).item()

544.2529907226562

In [17]:
torch.norm(conv_layer.weight.grad - w_grad).item()

0.00042751748696900904

In [18]:
torch.norm(conv_layer.bias.grad - b_grad).item()

0.0

In [19]:
assert(torch.norm(i_grad - input_tensor.grad).item() < 1e-4)

AssertionError: 

In [20]:
assert(torch.isclose(b_grad, conv_layer.bias.grad).all().item())

In [21]:
assert(torch.norm(conv_layer.weight.grad - w_grad).item() < 1e-2)

In [None]:
conv_layer.weight.grad

In [34]:
A = torch.tensor(np.array([[5, 27, 9, -15, 12], [2,4,-2,-7,8], [-8, 16, 5, 23, -6], [-8, 16, 5, 23, -6], [-17, 7, 9, -14, 3], [10, -5, 13, 6, -11.0]]), requires_grad = True)
A.retain_grad()
B = torch.tensor(np.array([[8, 4], [2, 6.0]]), requires_grad = True)
B.retain_grad()