this notebook shows the gradient of conv transpose, at least in simple cases (stride=1, no padding).

here we have no bias; adding it won't change the result, as bias doesn't affect gradient of weights at all by simple derivation.

In [1]:
import numpy as np
from torch import FloatTensor
from torch.autograd import Variable
from torch.nn.functional import mse_loss, conv2d

In [2]:
from torch.nn import ConvTranspose2d, Conv2d

In [3]:
import torch

In [4]:
_ = torch.manual_seed(0)

In [5]:
num_channel_code = 4
kernel_size = 9
input_this = FloatTensor(1, num_channel_code, 17, 17)
_ = input_this.normal_(0, 1)
output_ref = FloatTensor(1, 1, 25, 25)
_ = output_ref.normal_(0, 1)

In [6]:
def forward():
    code_to_image_layer = ConvTranspose2d(num_channel_code, 1, kernel_size, bias=False)
    input_var = Variable(input_this, requires_grad=True)
    output_now = code_to_image_layer(input_var)
    cost = mse_loss(output_now, Variable(output_ref), size_average=False)
#     return cost
    if input_var.grad is not None:
        input_var.grad.zero()
    # then compute grad.
    cost.backward()
    return cost.data.numpy()[0], input_var.grad.data.numpy(), code_to_image_layer, output_now.data.numpy()

In [7]:
cost_this, grad_this, layer_forward, output_this = forward()

In [8]:
cost_this

656.59412

In [9]:
grad_this.mean(), grad_this.std()

(-0.0020146435, 0.62366945)

In [10]:
# ok. let's try to recover this myself.
layer_forward.weight.size()

torch.Size([4, 1, 9, 9])

In [11]:
def backward():
    # ok. let's compute the grad 
    grad1 = 2*(output_this-output_ref)
    
    grad2 = conv2d(Variable(FloatTensor(grad1), volatile=True),
                   layer_forward.weight,bias=None)
    print(grad2.size())
    return grad2.data.numpy()

In [12]:
grad_this_hand = backward()

torch.Size([1, 4, 17, 17])


In [13]:
assert grad_this.shape == grad_this_hand.shape == (1,num_channel_code,17,17)
print(abs(grad_this-grad_this_hand).max())
assert abs(grad_this-grad_this_hand).max() < 1e-4

0.0
