In [1]:
import torch

# Define input data and filters
x = torch.tensor([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]], requires_grad=True)
f = torch.tensor([[1., 0., -1.], [2., 0., -2.], [1., 0., -1.]], requires_grad=True)

# Define the forward pass function
def conv2d(x, f):
    return torch.nn.functional.conv2d(x.view(1, 1, 3, 3), f.view(1, 1, 3, 3), padding=0)

# Define the loss function (sum of the output values)
def loss(y):
    return y.sum()

# Compute the derivative of the loss with respect to x and f using autograd
y = conv2d(x, f)
L = loss(y)
grads = torch.autograd.grad(L, [x, f])

# Print the derivative values
print("Derivative with respect to x: ")
print(grads[0])
print("Derivative with respect to f: ")
print(grads[1])


Derivative with respect to x: 
tensor([[ 1.,  0., -1.],
        [ 2.,  0., -2.],
        [ 1.,  0., -1.]])
Derivative with respect to f: 
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])


In [8]:
import torch

# Define input data and filters
x = torch.tensor([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]], requires_grad=True)
f = torch.tensor([[1., 0., -1.], [2., 0., -2.], [1., 0., -1.]], requires_grad=True)

# Define the forward pass function
def conv2d(x, f):
    return torch.nn.functional.conv2d(x.view(1, 1, 3, 3), f.view(1, 1, 3, 3), padding=0)

# Define the loss function (sum of the output values)
def loss(y):
    return y.sum()

# Compute the derivative of the loss with respect to x and f using PyTorch's low-level functions
y = conv2d(x, f)
L = loss(y)

# Compute the derivative of L with respect to y
grad_y = torch.ones_like(y)
#grad_y *= L

# Compute the derivative of L with respect to x and f using conv2d_transpose
grad_x = torch.nn.functional.conv_transpose2d(grad_y, f.view(1, 1, 3, 3), padding=0)
grad_f = torch.nn.functional.conv2d(x.view(1, 1, 3, 3), grad_y, padding=0)

# Print the derivative values
print("Derivative with respect to x: ")
print(grad_x)
print("Derivative with respect to f: ")
print(grad_f)


Derivative with respect to x: 
tensor([[[[ 1.,  0., -1.],
          [ 2.,  0., -2.],
          [ 1.,  0., -1.]]]], grad_fn=<ConvolutionBackward0>)
Derivative with respect to f: 
tensor([[[[1., 2., 3.],
          [4., 5., 6.],
          [7., 8., 9.]]]], grad_fn=<ConvolutionBackward0>)


In [10]:
import tensorflow as tf

# Define input data and filters
x = tf.constant([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]], dtype=tf.float32)
f = tf.constant([[1., 0., -1.], [2., 0., -2.], [1., 0., -1.]], dtype=tf.float32)

# Define the forward pass function
def conv2d(x, f):
    return tf.nn.conv2d(tf.reshape(x, [1, 3, 3, 1]), tf.reshape(f, [3, 3, 1, 1]), strides=[1, 1, 1, 1], padding='VALID')

# Define the loss function (sum of the output values)
def loss(y):
    return tf.reduce_sum(y)

# Compute the derivative of the loss with respect to x and f using tape gradient
with tf.GradientTape() as tape:
    tape.watch([x, f])
    y = conv2d(x, f)
    #L = loss(y)
    print(y)
grads = tape.gradient(y, [x, f])

# Print the derivative values
print("Derivative with respect to x: ")
print(grads[0])
print("Derivative with respect to f: ")
print(grads[1])


tf.Tensor([[[[-8.]]]], shape=(1, 1, 1, 1), dtype=float32)
Derivative with respect to x: 
tf.Tensor(
[[ 1.  0. -1.]
 [ 2.  0. -2.]
 [ 1.  0. -1.]], shape=(3, 3), dtype=float32)
Derivative with respect to f: 
tf.Tensor(
[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]], shape=(3, 3), dtype=float32)


In [9]:
import tensorflow as tf

# Define input data and filters
x = tf.constant([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]], dtype=tf.float32)
f = tf.constant([[1., 0., -1.], [2., 0., -2.], [1., 0., -1.]], dtype=tf.float32)

# Perform the 2D convolution
y = tf.nn.conv2d(tf.reshape(x, [1, 3, 3, 1]), tf.reshape(f, [3, 3, 1, 1]), strides=[1, 1, 1, 1], padding='VALID')
print("Output of convolution: ")
print(y)

# Define the derivative of the convolution operation
x_grad = tf.nn.conv2d_transpose(y, tf.reshape(f, [3, 3, 1, 1]), tf.shape(x), strides=[1, 1, 1, 1], padding='VALID')
f_grad = tf.nn.conv2d(tf.reshape(x, [1, 3, 3, 1]), tf.transpose(y, perm=[1, 2, 0, 3]), strides=[1, 1, 1, 1], padding='VALID')


Output of convolution: 
tf.Tensor([[[[-8.]]]], shape=(1, 1, 1, 1), dtype=float32)


In [2]:
x_grad

<tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy=
array([[[[ -8.],
         [  0.],
         [  8.]],

        [[-16.],
         [  0.],
         [ 16.]],

        [[ -8.],
         [  0.],
         [  8.]]]], dtype=float32)>

In [38]:
import numpy as np
from scipy.signal import convolve2d

def convolve2d_gradient(x, w, d):
    """
    Computes the gradient of 2D convolution with respect to x (dx) and the filter (dw).

    Args:
    x: 2D input array of shape (H, W)
    w: 2D filter array of shape (FH, FW)
    d: 2D output gradient array of shape (OH, OW)

    Returns:
    dx: 2D gradient of x array of shape (H, W)
    dw: 2D gradient of w array of shape (FH, FW)
    """

    # Flip the filter in both directions for cross-correlation
    w_flipped = np.flip(np.flip(w, axis=0), axis=1)

    # Compute the gradient of x using the transposed filter
    dx = convolve2d(d, w_flipped, mode='full')[:x.shape[0], :x.shape[1]]

    # Compute the gradient of w using the input and output gradients
    dw = convolve2d(np.flip(np.flip(x, axis=0), axis=1), d, mode='valid')

    return dx, dw


In [39]:
# Example usage
x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
w = np.array([[1, 0], [0, 1]])
d = np.array([[1, 2], [3, 4]])

dx, dw = convolve2d_gradient(x, w, d)

print("Gradient of x:")
print(dx)

print("Gradient of w:")
print(dw)


Gradient of x:
[[1 2 0]
 [3 5 2]
 [0 3 4]]
Gradient of w:
[[77 67]
 [47 37]]


In [63]:
import torch
import torch.nn.functional as F

def rot180(w):
    """
    Roate by 180 degrees
    """
    return torch.flip(w, dims=[2, 3])

def pad_to_full_conv2d(x, w_size):
    """
    Pad x, such that using a 'VALID' convolution in PyTorch is the same
    as using a 'FULL' convolution.
    """
    padding = w_size - 1
    return F.pad(x, (padding, padding, padding, padding), mode='constant', value=0)

def NHWC_to_HWIO(out):
    """
    Converts [batch, in_channels, in_height, in_width]
    to       [out_channels, in_channels, filter_height, filter_width]
    """
    return out.permute(3, 2, 1, 0)

# sizes, fixed strides, in_channel, out_channel be 1 for now
x_size = 4
w_size = 3  # use an odd number here
x_shape = (1, 1, x_size, x_size)
w_shape = (1, 1, w_size, w_size)
out_shape = (1, 1, x_size - w_size + 1, x_size - w_size + 1)
strides = (1, 1)

# numpy value
x_np = torch.randint(10, size=x_shape, dtype=torch.float32)
w_np = torch.randint(10, size=w_shape, dtype=torch.float32)
out_scale_np = torch.randint(10, size=out_shape, dtype=torch.float32)

# pytorch forward
x = torch.tensor(x_np, requires_grad=True)
w = torch.tensor(w_np, requires_grad=True)
out = F.conv2d(x, w, stride=strides, padding=0)
out_scale = torch.tensor(out_scale_np, requires_grad=True)
f = torch.sum(out * out_scale)

# pytorch backward
f.backward(retain_graph=True)
d_out = x.grad.detach()

# 4 different ways to compute d_x
d_x = x.grad
d_x_manual = F.conv2d(d_out, w.flip([2, 3]), stride=strides, padding=0)




In [64]:
d_x_manual

tensor([[[[1788., 1604.],
          [2218., 1998.]]]], grad_fn=<ConvolutionBackward0>)

In [60]:
d_x

tensor([[[[ 27.,  78.,  31.,  40.],
          [ 63.,  81., 105.,  71.],
          [ 18.,  54.,  93.,  45.],
          [  0.,  18.,  27.,   9.]]]])

In [85]:
import torch
import torch.nn.functional as F
from torch.autograd import Variable

# Define input tensor and filters
x = Variable(torch.randn(3, 1, 3, 3), requires_grad=True)
w = Variable(torch.randn(3, 1, 3, 3), requires_grad=True)

# Perform convolution operation
y = F.conv2d(x, w, stride=1, padding=1)

# Define loss function
loss = y.sum()

# Compute gradients
loss.backward()

# Retrieve gradients of input and weights
grad_x = x.grad
grad_w = w.grad


In [86]:
grad_x

tensor([[[[ -8.9880, -10.5122,  -4.5570],
          [ -5.1470,  -7.5202,  -4.7913],
          [  2.8497,   0.1662,  -2.4810]]],


        [[[ -8.9880, -10.5122,  -4.5570],
          [ -5.1470,  -7.5202,  -4.7913],
          [  2.8497,   0.1662,  -2.4810]]],


        [[[ -8.9880, -10.5122,  -4.5570],
          [ -5.1470,  -7.5202,  -4.7913],
          [  2.8497,   0.1662,  -2.4810]]]])

In [87]:
grad_w

tensor([[[[ -5.2912,  -9.7557,  -5.3568],
          [ -6.1425, -11.4750,  -5.6802],
          [ -3.0966,  -5.6679,  -2.7107]]],


        [[[ -5.2912,  -9.7557,  -5.3568],
          [ -6.1425, -11.4750,  -5.6802],
          [ -3.0966,  -5.6679,  -2.7107]]],


        [[[ -5.2912,  -9.7557,  -5.3568],
          [ -6.1425, -11.4750,  -5.6802],
          [ -3.0966,  -5.6679,  -2.7107]]]])

In [177]:
import torch
import torch.nn.functional as F

# Define input tensor and filters
x = torch.randn(1, 1, 4, 4, requires_grad=True)
w = torch.randn(1, 1, 3, 3, requires_grad=True)

# Perform convolution operation
y = F.conv2d(x, w, stride=1, padding=1)

# Define loss function
loss = y.sum()

# Compute gradients of output tensor
grad_y = torch.autograd.grad(loss, y, retain_graph=True)[0]

# Compute gradients of input tensor and filters
grad_x = F.conv_transpose2d(grad_y,w, stride=1, padding=1)
grad_w = F.conv2d(x,grad_y,stride=1,padding=1)



In [178]:
grad_y.shape

torch.Size([1, 1, 4, 4])

In [179]:
grad_x

tensor([[[[-2.5836, -4.1881, -4.1881, -3.6721],
          [-1.2203, -2.8034, -2.8034, -2.5446],
          [-1.2203, -2.8034, -2.8034, -2.5446],
          [-0.6830, -1.5558, -1.5558, -1.9757]]]],
       grad_fn=<ConvolutionBackward0>)

In [180]:
grad_w

tensor([[[[-2.7657, -4.4332, -4.4177],
          [-3.8632, -6.1062, -5.6357],
          [-3.9322, -4.1167, -3.1665]]]], grad_fn=<ConvolutionBackward0>)

In [181]:
loss.backward()

# Retrieve gradients of input and weights
grad_x = x.grad
grad_w = w.grad


In [182]:
grad_x

tensor([[[[-2.5836, -4.1881, -4.1881, -3.6721],
          [-1.2203, -2.8034, -2.8034, -2.5446],
          [-1.2203, -2.8034, -2.8034, -2.5446],
          [-0.6830, -1.5558, -1.5558, -1.9757]]]])

In [183]:
grad_w

tensor([[[[-2.7657, -4.4332, -4.4177],
          [-3.8632, -6.1062, -5.6357],
          [-3.9322, -4.1167, -3.1665]]]])

In [184]:
grad_x = F.conv2d(grad_y,w.flip([2,3]),padding=1)

In [185]:
grad_x

tensor([[[[-2.5836, -4.1881, -4.1881, -3.6721],
          [-1.2203, -2.8034, -2.8034, -2.5446],
          [-1.2203, -2.8034, -2.8034, -2.5446],
          [-0.6830, -1.5558, -1.5558, -1.9757]]]],
       grad_fn=<ConvolutionBackward0>)