Trying to make pytorch autograd work for convolution using im2col.

In [0]:
import torch
loss_fn = torch.nn.MSELoss(reduction='sum')

In [0]:
a = torch.rand((4,3), requires_grad=True)
c = a.view(3,4)
b = torch.rand((3,5), requires_grad=True)
d = b.view(5,3)

In [15]:
c.requires_grad

True

In [16]:
e = torch.mm(d, c)
print(e.shape)

torch.Size([5, 4])


In [17]:
f = torch.sum(e)
print(f)
print(f.shape)

tensor(14.8477, grad_fn=<SumBackward0>)
torch.Size([])


In [18]:
loss2 = loss_fn(f, torch.tensor([20.]))
print(loss2)

tensor(26.5464, grad_fn=<MseLossBackward>)


  return F.mse_loss(input, target, reduction=self.reduction)


In [0]:
loss.backward()

In [0]:
loss2.backward()

In [12]:
# let's print some grads
print(a.grad)
print(b.grad)

tensor([[-14.4121, -21.3487, -27.2102],
        [-14.4121, -21.3487, -27.2102],
        [-14.4121, -21.3487, -27.2102],
        [-14.4121, -21.3487, -27.2102]])
tensor([[-12.7357, -12.7357, -12.7357, -12.7357, -12.7357],
        [-18.9845, -18.9845, -18.9845, -18.9845, -18.9845],
        [-12.6186, -12.6186, -12.6186, -12.6186, -12.6186]])


In [20]:
# let's print some grads
print(c.grad)
print(d.grad)

None
None


In [21]:
print(c.is_leaf)

False


In [22]:
print(a.grad)
print(b.grad)

tensor([[-24.2393, -24.2393, -24.2393],
        [-24.2393, -15.6413, -15.6413],
        [-15.6413, -15.6413, -35.2512],
        [-35.2512, -35.2512, -35.2512]])
tensor([[-22.9580, -28.3688, -16.3512, -22.9580, -28.3688],
        [-16.3512, -22.9580, -28.3688, -16.3512, -22.9580],
        [-28.3688, -16.3512, -22.9580, -28.3688, -16.3512]])


## let's do some magic

In [0]:
def im2col( x, hf, wf ,stride):

    """
    Parameters:
        x: image tensor to be translated into columns, (C,H,W)
        hf: filter height
        wf: filter width
        stride: stride
    Returns:
        col: tensor of dimension (h_out*w_out,hf*wf*C), where each column is a cube that will convolve with a filter
            h_out = (H-hf) // stride + 1, w_out = (W-wf) // stride + 1
    """

    c,h,w = x.shape
    h_out = (h-hf) // stride + 1
    w_out = (w-wf) // stride + 1
    x_col = torch.zeros(h_out*w_out,c*hf*wf)

    for i in range(h_out):
        for j in range(w_out):
            patch = x[...,i*stride:i*stride+hf,j*stride:j*stride+wf]
            x_col[i*w_out+j,:] = patch.reshape(-1)  #patch.reshape(-1)
    return x_col, h_out, w_out
    

In [24]:
C_out, C_in, hf, wf = 4, 3, 2, 2
h_in, w_in = 28, 28
x = torch.rand((C_in, h_in, w_in), requires_grad=True)
kernels = torch.rand((C_out, C_in, hf, wf), requires_grad=True)

print(x.is_leaf)
print(kernels.is_leaf)
print(x.requires_grad)
print(kernels.requires_grad)

True
True
True
True


In [0]:
# x = self.pad_tensor(x)                               # pad input
x_col, h_out, w_out = im2col(x, hf, wf,stride=1)

In [37]:
# moment of truth
print(x_col.requires_grad)
print(x_col.grad_fn)
print(x_col.is_leaf)

True
<CopySlices object at 0x7f07cd0f05c0>
False


In [0]:
x_col_t = x_col.t()
x_col_t_cuda = x_col_t.to('cuda', dtype=torch.float)

In [40]:
# moment of truth
print(x_col_t.grad_fn)
print(x_col_t_cuda.requires_grad)
print(x_col_t_cuda.grad_fn)
print(x_col_t.is_leaf)

<TBackward object at 0x7f07d5ce5940>
True
<CopyBackwards object at 0x7f07d5ce5940>
False


In [41]:
k_col = kernels.view(C_out, -1)                       # converted to 2d tensor 
print(k_col.requires_grad)
print(k_col.grad_fn)
print(k_col.is_leaf)

True
<ViewBackward object at 0x7f07d5d13b70>
False


In [42]:
print(k_col.shape)

torch.Size([4, 12])


In [44]:
k_col_gpu = k_col.to('cuda', dtype=torch.float)    # to gpu
print(k_col_gpu.requires_grad)
print(k_col_gpu.grad_fn)
print(k_col_gpu.is_leaf)

True
<CopyBackwards object at 0x7f07cd0f07f0>
False


In [46]:
print(k_col_gpu.shape, x_col_t_cuda.shape)
x_out = torch.mm(k_col_gpu, x_col_t_cuda)

torch.Size([4, 12]) torch.Size([12, 729])


In [47]:
print(x_out.shape)

torch.Size([4, 729])


In [48]:
print(x_out.requires_grad)
print(x_out.grad_fn)

True
<MmBackward object at 0x7f07d5e2bef0>


In [49]:
x_out_final = x_out.view(C_out, h_out, w_out)
x_out_final.shape

torch.Size([4, 27, 27])

In [51]:
# moment of truth
print(x_out_final.requires_grad)
print(x_out_final.grad_fn)
print(x_out_final.is_leaf)

True
<ViewBackward object at 0x7f07d5de5c88>
False


### Let's backprop now

In [0]:
loss = torch.nn.MSELoss(reduction='sum')

In [0]:
y = torch.rand_like(x_out_final, dtype=torch.float, device='cuda')*2

In [59]:
print(y)

tensor([[[1.0779, 0.7363, 1.4410,  ..., 1.0774, 1.1681, 1.4276],
         [0.5752, 0.3452, 1.2488,  ..., 1.0219, 0.4327, 0.8197],
         [0.3465, 0.5466, 1.3813,  ..., 0.5199, 1.0800, 0.7481],
         ...,
         [0.0503, 1.5900, 1.7840,  ..., 0.8094, 1.8392, 1.1054],
         [1.9059, 0.8647, 0.4012,  ..., 0.5458, 1.7754, 1.0504],
         [0.7548, 0.6548, 0.8848,  ..., 1.3808, 0.8822, 1.6523]],

        [[1.7525, 1.3746, 0.6401,  ..., 0.5344, 0.0471, 0.2882],
         [1.4012, 1.9239, 1.4569,  ..., 1.6859, 1.0718, 1.8612],
         [1.0573, 1.2915, 1.7502,  ..., 0.7491, 0.5394, 1.1549],
         ...,
         [0.5510, 1.7967, 1.8409,  ..., 0.3260, 0.3209, 0.1287],
         [1.4140, 1.9443, 1.1496,  ..., 0.1182, 1.7204, 1.6857],
         [0.0446, 0.6196, 1.3128,  ..., 1.8048, 1.9222, 0.3820]],

        [[0.0610, 0.7955, 1.5580,  ..., 1.0814, 0.3084, 0.4804],
         [0.5563, 0.5694, 0.9076,  ..., 0.6768, 0.2003, 1.1878],
         [0.5625, 1.7562, 1.8248,  ..., 1.1715, 1.4536, 0.

In [60]:
print(y.requires_grad)
print(y.is_leaf)

False
True


In [61]:
print(x_out_final)  # checking the magnitude

tensor([[[2.4588, 1.9888, 2.6494,  ..., 2.8088, 2.1124, 2.1825],
         [2.7072, 2.1903, 2.3385,  ..., 2.6334, 3.5819, 2.5503],
         [2.2628, 3.4325, 3.0799,  ..., 2.4191, 2.9195, 2.8200],
         ...,
         [2.6939, 3.5774, 3.2642,  ..., 1.7953, 2.7338, 2.9416],
         [2.9370, 3.6831, 3.2801,  ..., 2.9853, 2.9768, 2.4105],
         [3.2281, 3.4560, 3.4859,  ..., 3.3212, 2.6112, 2.4556]],

        [[2.9001, 2.4742, 3.0539,  ..., 3.0904, 2.2951, 1.9649],
         [3.4373, 2.9022, 2.0193,  ..., 3.1191, 3.5433, 2.2567],
         [3.4956, 3.0646, 3.2512,  ..., 3.3227, 3.1910, 2.8646],
         ...,
         [3.7307, 3.3619, 3.3360,  ..., 2.1349, 2.7135, 2.6997],
         [3.0728, 4.0720, 3.5770,  ..., 2.8562, 3.4816, 2.4407],
         [3.0541, 4.0797, 3.9784,  ..., 3.1574, 3.4356, 2.7536]],

        [[1.8838, 1.6032, 1.5547,  ..., 1.7364, 1.5973, 1.3380],
         [1.9631, 1.5811, 1.5240,  ..., 2.1798, 2.3255, 1.7932],
         [1.9677, 2.1491, 1.7950,  ..., 1.8336, 1.7051, 1.

In [0]:
loss_ = loss(x_out_final, y)

In [63]:
print(loss_)

tensor(8253.8721, device='cuda:0', grad_fn=<MseLossBackward>)


In [0]:
loss_.backward()

### Let's print the grads

In [65]:
print(x.grad)

tensor([[[ 5.3825,  6.2556,  7.2395,  ...,  6.8728,  5.0658,  1.2409],
         [12.1214, 16.6468, 12.3507,  ..., 21.9282, 17.8585,  6.9995],
         [14.2796, 24.1571, 16.5720,  ..., 26.2706, 24.8124,  6.8416],
         ...,
         [11.3688, 25.0172, 25.3967,  ..., 17.8251, 16.7213,  8.4912],
         [14.1096, 26.7388, 30.4649,  ..., 20.7332, 15.5110,  5.2640],
         [ 5.5542, 15.1542, 15.7519,  ...,  9.6888,  7.3231,  4.8062]],

        [[ 3.8229,  6.7660,  5.8613,  ...,  7.3426,  6.1416,  2.5127],
         [ 7.7673, 14.6324, 11.8095,  ..., 17.9468, 15.3693,  4.7572],
         [10.1069, 16.1810, 11.7481,  ..., 18.1754, 14.8501,  8.0058],
         ...,
         [10.9453, 18.9015, 18.1346,  ..., 15.1796, 15.5703,  8.8132],
         [ 9.0653, 19.8548, 23.2319,  ..., 18.0649, 12.4256,  5.5578],
         [ 6.7814, 13.8343, 13.3690,  ...,  8.7083,  7.6194,  2.3633]],

        [[ 3.1171,  5.6844,  5.6374,  ...,  7.0368,  5.6943,  2.8490],
         [ 8.9639, 15.3855, 13.1500,  ..., 19

In [69]:
print(x.shape)
print(x.grad.shape)

torch.Size([3, 28, 28])
torch.Size([3, 28, 28])


In [66]:
print(kernels.grad)

tensor([[[[1463.5704, 1416.5044],
          [1462.6399, 1452.6211]],

         [[1394.0062, 1378.3148],
          [1371.7632, 1376.6411]],

         [[1345.8103, 1386.7366],
          [1348.7325, 1426.2594]]],


        [[[1576.6775, 1505.4690],
          [1511.4930, 1592.6027]],

         [[1529.7990, 1488.8860],
          [1572.4814, 1537.0872]],

         [[1536.2754, 1540.5842],
          [1566.6355, 1564.5747]]],


        [[[ 662.2352,  660.4199],
          [ 730.4019,  714.9020]],

         [[ 697.0024,  702.4091],
          [ 646.6875,  634.8527]],

         [[ 650.3807,  651.5308],
          [ 736.1923,  707.1312]]],


        [[[ 704.5916,  687.4864],
          [ 644.9044,  648.7449]],

         [[ 634.8807,  620.9167],
          [ 639.5372,  722.4066]],

         [[ 714.1688,  687.8112],
          [ 645.5270,  640.5413]]]])


In [68]:
print(kernels.shape)
print(kernels.grad.shape)

torch.Size([4, 3, 2, 2])
torch.Size([4, 3, 2, 2])


Don't mind the variables names. I just wanted to keep everything seperate and easy to debug. But ***All worked well***, and we could sucessfully use ***PyTorch Autograd***.