In [1]:
import torch

# Define input data and filters
x = torch.tensor([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]], requires_grad=True)
f = torch.tensor([[1., 0., -1.], [2., 0., -2.], [1., 0., -1.]], requires_grad=True)

# Define the forward pass function
def conv2d(x, f):
    return torch.nn.functional.conv2d(x.view(1, 1, 3, 3), f.view(1, 1, 3, 3), padding=0)

# Define the loss function (sum of the output values)
def loss(y):
    return y.sum()

# Compute the derivative of the loss with respect to x and f using autograd
y = conv2d(x, f)
L = loss(y)
grads = torch.autograd.grad(L, [x, f])

# Print the derivative values
print("Derivative with respect to x: ")
print(grads[0])
print("Derivative with respect to f: ")
print(grads[1])


Derivative with respect to x: 
tensor([[ 1.,  0., -1.],
        [ 2.,  0., -2.],
        [ 1.,  0., -1.]])
Derivative with respect to f: 
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])


In [8]:
import torch

# Define input data and filters
x = torch.tensor([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]], requires_grad=True)
f = torch.tensor([[1., 0., -1.], [2., 0., -2.], [1., 0., -1.]], requires_grad=True)

# Define the forward pass function
def conv2d(x, f):
    return torch.nn.functional.conv2d(x.view(1, 1, 3, 3), f.view(1, 1, 3, 3), padding=0)

# Define the loss function (sum of the output values)
def loss(y):
    return y.sum()

# Compute the derivative of the loss with respect to x and f using PyTorch's low-level functions
y = conv2d(x, f)
L = loss(y)

# Compute the derivative of L with respect to y
grad_y = torch.ones_like(y)
#grad_y *= L

# Compute the derivative of L with respect to x and f using conv2d_transpose
grad_x = torch.nn.functional.conv_transpose2d(grad_y, f.view(1, 1, 3, 3), padding=0)
grad_f = torch.nn.functional.conv2d(x.view(1, 1, 3, 3), grad_y, padding=0)

# Print the derivative values
print("Derivative with respect to x: ")
print(grad_x)
print("Derivative with respect to f: ")
print(grad_f)


Derivative with respect to x: 
tensor([[[[ 1.,  0., -1.],
          [ 2.,  0., -2.],
          [ 1.,  0., -1.]]]], grad_fn=<ConvolutionBackward0>)
Derivative with respect to f: 
tensor([[[[1., 2., 3.],
          [4., 5., 6.],
          [7., 8., 9.]]]], grad_fn=<ConvolutionBackward0>)


In [10]:
import tensorflow as tf

# Define input data and filters
x = tf.constant([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]], dtype=tf.float32)
f = tf.constant([[1., 0., -1.], [2., 0., -2.], [1., 0., -1.]], dtype=tf.float32)

# Define the forward pass function
def conv2d(x, f):
    return tf.nn.conv2d(tf.reshape(x, [1, 3, 3, 1]), tf.reshape(f, [3, 3, 1, 1]), strides=[1, 1, 1, 1], padding='VALID')

# Define the loss function (sum of the output values)
def loss(y):
    return tf.reduce_sum(y)

# Compute the derivative of the loss with respect to x and f using tape gradient
with tf.GradientTape() as tape:
    tape.watch([x, f])
    y = conv2d(x, f)
    #L = loss(y)
    print(y)
grads = tape.gradient(y, [x, f])

# Print the derivative values
print("Derivative with respect to x: ")
print(grads[0])
print("Derivative with respect to f: ")
print(grads[1])


tf.Tensor([[[[-8.]]]], shape=(1, 1, 1, 1), dtype=float32)
Derivative with respect to x: 
tf.Tensor(
[[ 1.  0. -1.]
 [ 2.  0. -2.]
 [ 1.  0. -1.]], shape=(3, 3), dtype=float32)
Derivative with respect to f: 
tf.Tensor(
[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]], shape=(3, 3), dtype=float32)


In [9]:
import tensorflow as tf

# Define input data and filters
x = tf.constant([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]], dtype=tf.float32)
f = tf.constant([[1., 0., -1.], [2., 0., -2.], [1., 0., -1.]], dtype=tf.float32)

# Perform the 2D convolution
y = tf.nn.conv2d(tf.reshape(x, [1, 3, 3, 1]), tf.reshape(f, [3, 3, 1, 1]), strides=[1, 1, 1, 1], padding='VALID')
print("Output of convolution: ")
print(y)

# Define the derivative of the convolution operation
x_grad = tf.nn.conv2d_transpose(y, tf.reshape(f, [3, 3, 1, 1]), tf.shape(x), strides=[1, 1, 1, 1], padding='VALID')
f_grad = tf.nn.conv2d(tf.reshape(x, [1, 3, 3, 1]), tf.transpose(y, perm=[1, 2, 0, 3]), strides=[1, 1, 1, 1], padding='VALID')


Output of convolution: 
tf.Tensor([[[[-8.]]]], shape=(1, 1, 1, 1), dtype=float32)


In [2]:
x_grad

<tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy=
array([[[[ -8.],
         [  0.],
         [  8.]],

        [[-16.],
         [  0.],
         [ 16.]],

        [[ -8.],
         [  0.],
         [  8.]]]], dtype=float32)>

In [38]:
import numpy as np
from scipy.signal import convolve2d

def convolve2d_gradient(x, w, d):
    """
    Computes the gradient of 2D convolution with respect to x (dx) and the filter (dw).

    Args:
    x: 2D input array of shape (H, W)
    w: 2D filter array of shape (FH, FW)
    d: 2D output gradient array of shape (OH, OW)

    Returns:
    dx: 2D gradient of x array of shape (H, W)
    dw: 2D gradient of w array of shape (FH, FW)
    """

    # Flip the filter in both directions for cross-correlation
    w_flipped = np.flip(np.flip(w, axis=0), axis=1)

    # Compute the gradient of x using the transposed filter
    dx = convolve2d(d, w_flipped, mode='full')[:x.shape[0], :x.shape[1]]

    # Compute the gradient of w using the input and output gradients
    dw = convolve2d(np.flip(np.flip(x, axis=0), axis=1), d, mode='valid')

    return dx, dw


In [39]:
# Example usage
x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
w = np.array([[1, 0], [0, 1]])
d = np.array([[1, 2], [3, 4]])

dx, dw = convolve2d_gradient(x, w, d)

print("Gradient of x:")
print(dx)

print("Gradient of w:")
print(dw)


Gradient of x:
[[1 2 0]
 [3 5 2]
 [0 3 4]]
Gradient of w:
[[77 67]
 [47 37]]


In [328]:
import torch
import torch.nn.functional as F

def rot180(w):
    """
    Roate by 180 degrees
    """
    return torch.flip(w, dims=[2, 3])

def pad_to_full_conv2d(x, w_size):
    """
    Pad x, such that using a 'VALID' convolution in PyTorch is the same
    as using a 'FULL' convolution.
    """
    padding = w_size - 1
    return F.pad(x, (padding, padding, padding, padding), mode='constant', value=0)

def NHWC_to_HWIO(out):
    """
    Converts [batch, in_channels, in_height, in_width]
    to       [out_channels, in_channels, filter_height, filter_width]
    """
    return out.permute(3, 2, 1, 0)

# sizes, fixed strides, in_channel, out_channel be 1 for now
x_size = 4
w_size = 3  # use an odd number here
x_shape = (1, 1, x_size, x_size)
w_shape = (1, 1, w_size, w_size)
out_shape = (1, 1, x_size - w_size + 1, x_size - w_size + 1)
strides = (1, 1)

# numpy value
x_np = torch.randint(10, size=x_shape, dtype=torch.float32)
w_np = torch.randint(10, size=w_shape, dtype=torch.float32)
out_scale_np = torch.randint(10, size=out_shape, dtype=torch.float32)

# pytorch forward
x = torch.tensor(x_np, requires_grad=True)
w = torch.tensor(w_np, requires_grad=True)
out = F.conv2d(x, w, stride=strides, padding=0)
out_scale = torch.tensor(out_scale_np, requires_grad=True)
f = torch.sum(out * out_scale)

# pytorch backward
f.backward(retain_graph=True)
d_out = x.grad.detach()

# 4 different ways to compute d_x
d_x = x.grad
d_x_manual = F.conv2d(d_out, w.flip([2, 3]), stride=strides, padding=0)




In [329]:
d_x_manual

tensor([[[[1509., 1680.],
          [ 930., 1288.]]]], grad_fn=<ConvolutionBackward0>)

In [330]:
d_x

tensor([[[[25., 30., 25.,  0.],
          [30., 47., 79., 20.],
          [13., 27., 65., 36.],
          [ 2.,  6., 18., 28.]]]])

In [85]:
import torch
import torch.nn.functional as F
from torch.autograd import Variable

# Define input tensor and filters
x = Variable(torch.randn(3, 1, 3, 3), requires_grad=True)
w = Variable(torch.randn(3, 1, 3, 3), requires_grad=True)

# Perform convolution operation
y = F.conv2d(x, w, stride=1, padding=1)

# Define loss function
loss = y.sum()

# Compute gradients
loss.backward()

# Retrieve gradients of input and weights
grad_x = x.grad
grad_w = w.grad


In [86]:
grad_x

tensor([[[[ -8.9880, -10.5122,  -4.5570],
          [ -5.1470,  -7.5202,  -4.7913],
          [  2.8497,   0.1662,  -2.4810]]],


        [[[ -8.9880, -10.5122,  -4.5570],
          [ -5.1470,  -7.5202,  -4.7913],
          [  2.8497,   0.1662,  -2.4810]]],


        [[[ -8.9880, -10.5122,  -4.5570],
          [ -5.1470,  -7.5202,  -4.7913],
          [  2.8497,   0.1662,  -2.4810]]]])

In [87]:
grad_w

tensor([[[[ -5.2912,  -9.7557,  -5.3568],
          [ -6.1425, -11.4750,  -5.6802],
          [ -3.0966,  -5.6679,  -2.7107]]],


        [[[ -5.2912,  -9.7557,  -5.3568],
          [ -6.1425, -11.4750,  -5.6802],
          [ -3.0966,  -5.6679,  -2.7107]]],


        [[[ -5.2912,  -9.7557,  -5.3568],
          [ -6.1425, -11.4750,  -5.6802],
          [ -3.0966,  -5.6679,  -2.7107]]]])

In [321]:
import torch
import torch.nn.functional as F

# Define input tensor and filters
x = torch.randn(3, 3, 5, 5, requires_grad=True)
w = torch.randn(3, 3, 3, 3, requires_grad=True)

# Perform convolution operation
y = F.conv2d(x, w, stride=1, padding=1)

# Define loss function
loss = y.sum()

# Compute gradients of output tensor
grad_y = torch.autograd.grad(loss, y, retain_graph=True)[0]

# Compute gradients of input tensor and filters
grad_x = F.conv_transpose2d(grad_y,w, stride=1, padding=1)
grad_w = F.conv2d(x,grad_y,stride=1,padding=1)



In [322]:
grad_y.shape

torch.Size([3, 3, 5, 5])

In [323]:
grad_x

tensor([[[[-3.0718, -3.6533, -3.6533, -3.6533, -1.4389],
          [-3.5878, -5.2947, -5.2947, -5.2947, -1.9294],
          [-3.5878, -5.2947, -5.2947, -5.2947, -1.9294],
          [-3.5878, -5.2947, -5.2947, -5.2947, -1.9294],
          [-1.9459, -3.7799, -3.7799, -3.7799, -1.8813]],

         [[-1.5236,  0.8639,  0.8639,  0.8639,  3.1336],
          [-1.0251,  2.7720,  2.7720,  2.7720,  3.0399],
          [-1.0251,  2.7720,  2.7720,  2.7720,  3.0399],
          [-1.0251,  2.7720,  2.7720,  2.7720,  3.0399],
          [ 0.9425,  3.4974,  3.4974,  3.4974,  0.3375]],

         [[-5.8785, -5.5987, -5.5987, -5.5987, -1.8357],
          [-6.8675, -8.8960, -8.8960, -8.8960, -7.4468],
          [-6.8675, -8.8960, -8.8960, -8.8960, -7.4468],
          [-6.8675, -8.8960, -8.8960, -8.8960, -7.4468],
          [-4.2066, -4.6123, -4.6123, -4.6123, -5.2918]]],


        [[[-3.0718, -3.6533, -3.6533, -3.6533, -1.4389],
          [-3.5878, -5.2947, -5.2947, -5.2947, -1.9294],
          [-3.5878, -5.

In [324]:
grad_w

tensor([[[[ -9.8554, -10.0831,  -8.2008],
          [ -9.1956,  -9.3712,  -5.0751],
          [ -8.3023,  -8.3388,  -3.4559]],

         [[ -9.8554, -10.0831,  -8.2008],
          [ -9.1956,  -9.3712,  -5.0751],
          [ -8.3023,  -8.3388,  -3.4559]],

         [[ -9.8554, -10.0831,  -8.2008],
          [ -9.1956,  -9.3712,  -5.0751],
          [ -8.3023,  -8.3388,  -3.4559]]],


        [[[ 16.6867,  17.7942,  10.9246],
          [  9.7287,  10.0513,   4.0828],
          [  7.8516,   6.7181,   3.6140]],

         [[ 16.6867,  17.7942,  10.9246],
          [  9.7287,  10.0513,   4.0828],
          [  7.8516,   6.7181,   3.6140]],

         [[ 16.6867,  17.7942,  10.9246],
          [  9.7287,  10.0513,   4.0828],
          [  7.8516,   6.7181,   3.6140]]],


        [[[  5.7695,   1.1255,   1.1586],
          [  5.2874,   0.5180,   2.1058],
          [  8.9798,   6.4206,   7.8446]],

         [[  5.7695,   1.1255,   1.1586],
          [  5.2874,   0.5180,   2.1058],
          [  8.9

In [325]:
loss.backward()

# Retrieve gradients of input and weights
grad_x = x.grad
grad_w = w.grad


In [326]:
grad_x

tensor([[[[-3.0718, -3.6533, -3.6533, -3.6533, -1.4389],
          [-3.5878, -5.2947, -5.2947, -5.2947, -1.9294],
          [-3.5878, -5.2947, -5.2947, -5.2947, -1.9294],
          [-3.5878, -5.2947, -5.2947, -5.2947, -1.9294],
          [-1.9459, -3.7799, -3.7799, -3.7799, -1.8813]],

         [[-1.5236,  0.8639,  0.8639,  0.8639,  3.1336],
          [-1.0251,  2.7720,  2.7720,  2.7720,  3.0399],
          [-1.0251,  2.7720,  2.7720,  2.7720,  3.0399],
          [-1.0251,  2.7720,  2.7720,  2.7720,  3.0399],
          [ 0.9425,  3.4974,  3.4974,  3.4974,  0.3375]],

         [[-5.8785, -5.5987, -5.5987, -5.5987, -1.8357],
          [-6.8675, -8.8960, -8.8960, -8.8960, -7.4468],
          [-6.8675, -8.8960, -8.8960, -8.8960, -7.4468],
          [-6.8675, -8.8960, -8.8960, -8.8960, -7.4468],
          [-4.2066, -4.6123, -4.6123, -4.6123, -5.2918]]],


        [[[-3.0718, -3.6533, -3.6533, -3.6533, -1.4389],
          [-3.5878, -5.2947, -5.2947, -5.2947, -1.9294],
          [-3.5878, -5.

In [327]:
grad_w

tensor([[[[ 4.7395,  2.8424,  0.5553],
          [-2.2336, -3.6704, -3.1803],
          [-3.5062, -4.7763, -2.5760]],

         [[-4.0514, -6.9727, -6.8359],
          [-1.2134, -3.3513, -1.9261],
          [ 5.6523,  5.6591,  8.4727]],

         [[11.9128, 12.9668, 10.1629],
          [ 9.2675,  8.2198,  6.2199],
          [ 6.3831,  3.9171,  2.1060]]],


        [[[ 4.7395,  2.8424,  0.5553],
          [-2.2336, -3.6704, -3.1803],
          [-3.5062, -4.7763, -2.5760]],

         [[-4.0514, -6.9727, -6.8359],
          [-1.2134, -3.3513, -1.9261],
          [ 5.6523,  5.6591,  8.4727]],

         [[11.9128, 12.9668, 10.1629],
          [ 9.2675,  8.2198,  6.2199],
          [ 6.3831,  3.9171,  2.1060]]],


        [[[ 4.7395,  2.8424,  0.5553],
          [-2.2336, -3.6704, -3.1803],
          [-3.5062, -4.7763, -2.5760]],

         [[-4.0514, -6.9727, -6.8359],
          [-1.2134, -3.3513, -1.9261],
          [ 5.6523,  5.6591,  8.4727]],

         [[11.9128, 12.9668, 10.1629],
     

In [302]:
grad_x = F.conv2d(grad_y,w.flip([2,3]),padding=1)

In [303]:
grad_x

tensor([[[[ 4.0973,  6.3338,  6.3338,  6.3338,  5.3199],
          [ 7.9552, 11.1193, 11.1193, 11.1193,  8.0118],
          [ 7.9552, 11.1193, 11.1193, 11.1193,  8.0118],
          [ 7.9552, 11.1193, 11.1193, 11.1193,  8.0118],
          [ 6.5303,  7.7477,  7.7477,  7.7477,  4.1833]]]],
       grad_fn=<ConvolutionBackward0>)

In [319]:
import torch
import torch.nn.functional as F

# Create some input data and a convolution kernel
x = torch.randn(1, 1, 5, 5, requires_grad=True)
w = torch.randn(1, 1, 3, 3, requires_grad=True)

# Perform the convolution
y = F.conv2d(x, w,stride=1,padding=1)
loss = y.sum()
# Compute the gradient of the convolution output with respect to the input
#grad_y = torch.ones_like(y)
grad_y = torch.autograd.grad(loss, y, retain_graph=True)[0]

grad_x = F.conv_transpose2d(grad_y, w, stride=1, padding=1)
grad_w = F.conv2d(x,grad_y,stride=1,padding=1)
# Print the gradient of y with respect to x
print("Gradient of y with respect to x:")
print(grad_x)
print(grad_w)

Gradient of y with respect to x:
tensor([[[[-3.9087, -2.5821, -2.5821, -2.5821, -0.2595],
          [-3.3397, -3.2095, -3.2095, -3.2095, -0.7135],
          [-3.3397, -3.2095, -3.2095, -3.2095, -0.7135],
          [-3.3397, -3.2095, -3.2095, -3.2095, -0.7135],
          [-0.5627, -2.0320, -2.0320, -2.0320, -0.0084]]]],
       grad_fn=<ConvolutionBackward0>)
tensor([[[[ -5.2275,  -2.9276,  -2.5082],
          [ -9.0215,  -4.8723,  -3.1846],
          [-10.2993,  -6.0862,  -4.9238]]]], grad_fn=<ConvolutionBackward0>)


In [320]:
loss = y.sum()

loss.backward()

# Retrieve gradients of input and weights
grad_x = x.grad
grad_w = w.grad
print(grad_x)
print(grad_w)

tensor([[[[-3.9087, -2.5821, -2.5821, -2.5821, -0.2595],
          [-3.3397, -3.2095, -3.2095, -3.2095, -0.7135],
          [-3.3397, -3.2095, -3.2095, -3.2095, -0.7135],
          [-3.3397, -3.2095, -3.2095, -3.2095, -0.7135],
          [-0.5627, -2.0320, -2.0320, -2.0320, -0.0084]]]])
tensor([[[[ -5.2275,  -2.9276,  -2.5082],
          [ -9.0215,  -4.8723,  -3.1846],
          [-10.2993,  -6.0862,  -4.9238]]]])


In [249]:
grad_x = F.conv2d(grad_y, w.flip([2,3]), stride=1, padding=1)
grad_x

tensor([[[[ 1.9456, -1.0765, -1.0765, -1.0765, -0.9311],
          [ 1.7023, -3.1691, -3.1691, -3.1691, -3.4197],
          [ 1.7023, -3.1691, -3.1691, -3.1691, -3.4197],
          [ 1.7023, -3.1691, -3.1691, -3.1691, -3.4197],
          [ 1.1934, -1.7279, -1.7279, -1.7279, -2.1491]]]])

In [310]:
import torch
import torch.nn.functional as F

# Create some input data and a convolution kernel
x = torch.randn(1, 1, 5, 5, requires_grad=True)
w = torch.randn(1, 1, 3, 3, requires_grad=True)

# Perform the convolution
y = F.conv2d(x, w, stride=1, padding=1)
loss = y.sum()

# Backpropagate gradients using auto-differentiation
loss.backward()

# Retrieve gradients of input and weights
grad_x = x.grad
grad_w = w.grad

# Compute gradients of output tensor
grad_y = torch.autograd.grad(loss, y, retain_graph=True)[0]

# Compute gradients of input tensor and filters
grad_x_manual = F.conv_transpose2d(grad_y,w, stride=1, padding=1)
grad_w_manual = F.conv2d(x,grad_y,stride=1,padding=1)

# Print the gradients computed using both methods to compare
print("Gradient of y with respect to x using auto-differentiation:")
print(grad_x)
print("Gradient of y with respect to x using manual computation:")
print(grad_x_manual)

print("Gradient of y with respect to w using auto-differentiation:")
print(grad_w)
print("Gradient of y with respect to w using manual computation:")
print(grad_w_manual)


Gradient of y with respect to x using auto-differentiation:
tensor([[[[-2.1584, -2.1899, -2.1899, -2.1899, -1.1411],
          [-4.0398, -2.6966, -2.6966, -2.6966,  0.4368],
          [-4.0398, -2.6966, -2.6966, -2.6966,  0.4368],
          [-4.0398, -2.6966, -2.6966, -2.6966,  0.4368],
          [-4.0967, -1.7770, -1.7770, -1.7770,  0.6559]]]])
Gradient of y with respect to x using manual computation:
tensor([[[[-2.1584, -2.1899, -2.1899, -2.1899, -1.1411],
          [-4.0398, -2.6966, -2.6966, -2.6966,  0.4368],
          [-4.0398, -2.6966, -2.6966, -2.6966,  0.4368],
          [-4.0398, -2.6966, -2.6966, -2.6966,  0.4368],
          [-4.0967, -1.7770, -1.7770, -1.7770,  0.6559]]]],
       grad_fn=<ConvolutionBackward0>)
Gradient of y with respect to w using auto-differentiation:
tensor([[[[ 4.6517,  4.9953,  1.5210],
          [ 5.8352,  4.6035,  0.5456],
          [ 4.3783,  4.0135, -0.5564]]]])
Gradient of y with respect to w using manual computation:
tensor([[[[ 4.6517,  4.9953, 

In [314]:
import torch
import torch.nn.functional as F

# Define input tensor and filters
x = torch.randn(1, 1, 5, 5, requires_grad=True)
w = torch.randn(1, 1, 3, 3, requires_grad=True)

# Perform convolution operation
y = F.conv2d(x, w, stride=1, padding=1)

# Define loss function
loss = y.sum()
# Backpropagate gradients using auto-differentiation
loss.backward()

# Retrieve gradients of input and weights
grad_x = x.grad
grad_w = w.grad

# Compute gradients of output tensor
grad_y = torch.autograd.grad(loss, y, retain_graph=True)[0]

# Compute gradients of input tensor and filters
grad_x_manual = F.conv_transpose2d(grad_y,w, stride=1, padding=1)
grad_w_manual = F.conv2d(x,grad_y,stride=1,padding=1)

print(grad_x)
print(grad_w)
print(grad_x_manual)
print(grad_w_manual)

tensor([[[[-0.0801,  2.6186,  2.6186,  2.6186,  0.8259],
          [-2.4839, -0.7851, -0.7851, -0.7851, -1.2002],
          [-2.4839, -0.7851, -0.7851, -0.7851, -1.2002],
          [-2.4839, -0.7851, -0.7851, -0.7851, -1.2002],
          [-1.7288, -1.3754, -1.3754, -1.3754, -0.9006]]]])
tensor([[[[-3.6176, -2.4300, -3.9069],
          [-7.5963, -5.9993, -7.7322],
          [-4.9380, -3.5094, -5.2451]]]])
tensor([[[[-0.0801,  2.6186,  2.6186,  2.6186,  0.8259],
          [-2.4839, -0.7851, -0.7851, -0.7851, -1.2002],
          [-2.4839, -0.7851, -0.7851, -0.7851, -1.2002],
          [-2.4839, -0.7851, -0.7851, -0.7851, -1.2002],
          [-1.7288, -1.3754, -1.3754, -1.3754, -0.9006]]]],
       grad_fn=<ConvolutionBackward0>)
tensor([[[[-3.6176, -2.4300, -3.9069],
          [-7.5963, -5.9993, -7.7322],
          [-4.9380, -3.5094, -5.2451]]]], grad_fn=<ConvolutionBackward0>)


In [338]:
import torch
import torch.nn.functional as F

# Create some input data and a convolution kernel
x = torch.randn(1, 3, 5, 5, requires_grad=True)
w = torch.randn(1, 3, 3, 3, requires_grad=True)

# Perform the convolution
y = F.conv2d(x, w, stride=1, padding=1)
loss = y.sum()

# Backpropagate gradients using auto-differentiation
loss.backward()

# Retrieve gradients of input and weights
grad_x = x.grad
grad_w = w.grad

# Compute the gradients of y with respect to x and w using manual computation
grad_y = torch.autograd.grad(loss, y, retain_graph=True)[0]
grad_x_manual = F.conv_transpose2d(grad_y, w, stride=1, padding=1)
grad_w_manual = F.conv2d( x.transpose(0,1),grad_y, stride=1, padding=1)

# Print the gradients computed using both methods to compare
print("Gradient of y with respect to x using auto-differentiation:")
print(grad_x)
print("Gradient of y with respect to x using manual computation:")
print(grad_x_manual)

print("Gradient of y with respect to w using auto-differentiation:")
print(grad_w)
print("Gradient of y with respect to w using manual computation:")
print(grad_w_manual)


Gradient of y with respect to x using auto-differentiation:
tensor([[[[0.9809, 0.2741, 0.2741, 0.2741, 0.6787],
          [1.7208, 2.1069, 2.1069, 2.1069, 2.0114],
          [1.7208, 2.1069, 2.1069, 2.1069, 2.0114],
          [1.7208, 2.1069, 2.1069, 2.1069, 2.0114],
          [1.6300, 2.4149, 2.4149, 2.4149, 1.2166]],

         [[0.9261, 3.3335, 3.3335, 3.3335, 3.1076],
          [1.4645, 3.2860, 3.2860, 3.2860, 2.1651],
          [1.4645, 3.2860, 3.2860, 3.2860, 2.1651],
          [1.4645, 3.2860, 3.2860, 3.2860, 2.1651],
          [0.4526, 0.7800, 0.7800, 0.7800, 0.2822]],

         [[1.3954, 0.5946, 0.5946, 0.5946, 0.3930],
          [2.7697, 2.7162, 2.7162, 2.7162, 1.9890],
          [2.7697, 2.7162, 2.7162, 2.7162, 1.9890],
          [2.7697, 2.7162, 2.7162, 2.7162, 1.9890],
          [2.6803, 3.7876, 3.7876, 3.7876, 2.6149]]]])
Gradient of y with respect to x using manual computation:
tensor([[[[0.9809, 0.2741, 0.2741, 0.2741, 0.6787],
          [1.7208, 2.1069, 2.1069, 2.1069, 

In [359]:
import torch
import torch.nn.functional as F

# Create some input data and a convolution kernel
x = torch.randn(3, 3, 5, 5, requires_grad=True)
w = torch.randn(3, 3, 3, 3, requires_grad=True)

# Perform the convolution
y = F.conv2d(x, w, stride=1, padding=1)
loss = y.sum()

# Backpropagate gradients using auto-differentiation
loss.backward()

# Retrieve gradients of input and weights
grad_x = x.grad
grad_w = w.grad

# Compute the gradients of y with respect to x and w using manual computation
grad_y = torch.autograd.grad(loss, y, retain_graph=True)[0]
grad_x_manual = F.conv_transpose2d(grad_y, w, stride=1, padding=1)
grad_w_manual = F.conv2d(x.transpose(0, 1), grad_y, stride=1, padding=1)

# Print the gradients computed using both methods to compare
print("Gradient of y with respect to x using auto-differentiation:")
print(grad_x)
print("Gradient of y with respect to x using manual computation:")
print(grad_x_manual)

print("Gradient of y with respect to w using auto-differentiation:")
print(grad_w)
print("Gradient of y with respect to w using manual computation:")
print(grad_w_manual)


Gradient of y with respect to x using auto-differentiation:
tensor([[[[  3.4949,  -0.1608,  -0.1608,  -0.1608,   1.2496],
          [  1.3516,  -1.3661,  -1.3661,  -1.3661,   1.0601],
          [  1.3516,  -1.3661,  -1.3661,  -1.3661,   1.0601],
          [  1.3516,  -1.3661,  -1.3661,  -1.3661,   1.0601],
          [ -0.6415,   0.8868,   0.8868,   0.8868,   3.6159]],

         [[ -2.7936,  -7.6039,  -7.6039,  -7.6039,  -4.5853],
          [ -2.1214, -10.2306, -10.2306, -10.2306,  -7.3572],
          [ -2.1214, -10.2306, -10.2306, -10.2306,  -7.3572],
          [ -2.1214, -10.2306, -10.2306, -10.2306,  -7.3572],
          [  1.1513,  -5.0100,  -5.0100,  -5.0100,  -2.3244]],

         [[ -0.2077,  -1.8835,  -1.8835,  -1.8835,  -2.2063],
          [  2.9592,   1.4115,   1.4115,   1.4115,  -0.1305],
          [  2.9592,   1.4115,   1.4115,   1.4115,  -0.1305],
          [  2.9592,   1.4115,   1.4115,   1.4115,  -0.1305],
          [  3.8845,   1.0679,   1.0679,   1.0679,  -0.2776]]],


  

          [  3.3270,   7.1439,   7.0406]]]], grad_fn=<ConvolutionBackward0>)


In [366]:
grad_x_manual = F.conv2d(grad_y,w.flip([2,3]),stride=1, padding=1)
grad_x_manual

tensor([[[[ 2.7638, -0.0145, -0.0145, -0.0145,  1.3298],
          [ 3.2370,  0.2800,  0.2800,  0.2800,  1.2824],
          [ 3.2370,  0.2800,  0.2800,  0.2800,  1.2824],
          [ 3.2370,  0.2800,  0.2800,  0.2800,  1.2824],
          [ 4.1476,  0.9790,  0.9790,  0.9790,  1.5422]],

         [[-2.4175, -3.8301, -3.8301, -3.8301, -1.9125],
          [-1.0575, -3.4531, -3.4531, -3.4531, -2.4171],
          [-1.0575, -3.4531, -3.4531, -3.4531, -2.4171],
          [-1.0575, -3.4531, -3.4531, -3.4531, -2.4171],
          [ 1.0559,  1.5893,  1.5893,  1.5893,  2.1449]],

         [[ 0.1473, -5.8035, -5.8035, -5.8035, -4.9593],
          [ 0.0100, -7.0121, -7.0121, -7.0121, -5.2930],
          [ 0.0100, -7.0121, -7.0121, -7.0121, -5.2930],
          [ 0.0100, -7.0121, -7.0121, -7.0121, -5.2930],
          [-0.8092, -5.6235, -5.6235, -5.6235, -2.6732]]],


        [[[ 2.7638, -0.0145, -0.0145, -0.0145,  1.3298],
          [ 3.2370,  0.2800,  0.2800,  0.2800,  1.2824],
          [ 3.2370,  0.