In [5]:
%load_ext autoreload
%autoreload 2

In [6]:
import torch
from torch.nn.functional import unfold, fold
from others.implementations import *
from torch import nn


## Convolution

In [87]:
torch.set_default_tensor_type(torch.DoubleTensor)

padding = 0
stride = 2
kernel_size = 7
bias = True
custom_conv = Conv2d(3, 5, kernel_size=kernel_size, padding=padding, stride = stride, bias = bias)

inp = torch.ones((2,3,32,32))
inp[:,0,:,:] = 3
inp.requires_grad = True
out = custom_conv.forward(inp)
target = torch.zeros(out.size())

target_conv = nn.Conv2d(3, 5, kernel_size=kernel_size, padding=padding, stride=stride, bias = bias)
def init_weights(m):
    if isinstance(m, nn.Conv2d):
        nn.init.ones_(m.weight)
        if bias:
            nn.init.zeros_(m.bias)
target_conv = target_conv.apply(init_weights)
target_out = target_conv(inp)
target_out.retain_grad()
loss = 0.5*((target-target_out)**2).sum()
loss.backward()
w = target_conv.weight

dl_dout = target_out.grad.detach()
dl_dw = w.grad.detach()
dl_dx = inp.grad

if bias:
    b = target_conv.bias
    dl_db = b.grad.detach()

In [88]:
#check out
(target_out - out).abs().sum()

tensor(0., grad_fn=<SumBackward0>)

In [89]:
#check dl_dx
dl_dx_custom = custom_conv.backward(dl_dout)
(dl_dx_custom - dl_dx).abs().sum()

tensor(0.)

In [90]:
#check dl_dw
(custom_conv.dl_dw - dl_dw).abs().sum()

tensor(0., grad_fn=<SumBackward0>)

In [91]:
#check dl_db
(custom_conv.dl_db - dl_db).abs().sum()

tensor(0.)

## Up-sampling

In [399]:
from torch import nn
import others.implementations

In [400]:
scale_factor = 5
target_upsample = nn.Upsample(scale_factor = scale_factor, mode = 'nearest')
custom_upsample = NearestUpsampling(scale_factor = scale_factor)
inp = torch.ones((1,3,14,8))
inp.requires_grad = True
target_out = target_upsample.forward(inp)
target_out.retain_grad()
custom_out = custom_upsample.forward(inp)
target = torch.zeros((1,3,inp.shape[-2]*scale_factor, inp.shape[-1]*scale_factor))
loss = 0.5*((target-target_out)**2).sum()
loss.backward()


In [401]:
#check out
(target_out - custom_out).abs().sum()

tensor(0., grad_fn=<SumBackward0>)

In [402]:
#check dl_dx
(custom_upsample.backward(target_out)-inp.grad).abs().sum()

tensor(0., grad_fn=<SumBackward0>)

## Model

In [9]:
from others.implementations import *
import torch.nn as nn
import torch
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
inp_custom = torch.rand((1, 3, 32, 32))
inp_target = inp_custom.clone()
inp_target.requires_grad = True

In [11]:
custom_model = Sequential(
    Conv2d(3, 5, kernel_size=2, stride=2, padding=0),
    ReLU(),
    Conv2d(5, 5, kernel_size=2, stride=2, padding=0),
    ReLU(),
    NearestUpsampling(scale_factor=2),
    Conv2d(5, 5, kernel_size=3, stride=1, padding=1),
    ReLU(),
    NearestUpsampling(scale_factor=2),
    Conv2d(5, 3, kernel_size=3, stride=1, padding=1),
    Sigmoid()
    )


In [12]:
target_model = nn.Sequential(
    nn.Conv2d(3,5,kernel_size=2, stride=2, padding=0),
    nn.ReLU(),
    nn.Conv2d(5,5,kernel_size=2, stride=2, padding=0),
    nn.ReLU(),
    nn.Upsample(scale_factor=2),
    nn.Conv2d(5,5,kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.Upsample(scale_factor=2),
    nn.Conv2d(5,3,kernel_size=3, stride=1, padding=1),
    nn.Sigmoid()
    )
target_model = target_model.double()

In [13]:
#check out
custom_out = custom_model.forward(inp_custom)
target_out = target_model.forward(inp_target)
(custom_out - target_out).abs().sum()

tensor(16.2179, grad_fn=<SumBackward0>)

In [21]:
#check dl_dx
target = torch.zeros(custom_out.shape)
custom_mse = MSE()
target_mse = nn.MSELoss()
custom_loss = custom_mse.forward(custom_out, target)
target_loss =  target_mse(target_out, target)
inp_custom_grad = custom_mse.backward()
target_loss.backward()
(inp_custom_grad - inp_target.grad).abs().sum()

tensor(3082.9513)

In [22]:
from torch import optim
#check optimizer

custom_optimizer = SGD(custom_model.param(), 0.01)
target_optimizer = optim.SGD(target_model.parameters(), lr=0.01)
custom_optimizer.step()
target_optimizer.step()

In [23]:
#check out
custom_out = custom_model.forward(inp_custom)
target_out = target_model.forward(inp_target)
(custom_out - target_out).abs().sum()

tensor(510.1748, grad_fn=<SumBackward0>)

In [24]:
from torch import empty, cat, arange
from torch.nn.functional import fold, unfold
import torch

out_channels, in_channels, kernel_size_1, kernel_size_2 = 2, 3, 2, 2
kernel_size = (kernel_size_1, kernel_size_2)
stride = 1

x = empty((1,3,5,5)).fill_(1)
kernel = empty((out_channels, in_channels, kernel_size[0], kernel_size[1])).fill_(1)
bias = empty(out_channels).fill_(1)
x_unfolded = unfold(x, kernel.shape[-2:], stride=stride)
conv_output = x_unfolded.transpose(1, 2).matmul(kernel.reshape(out_channels, -1).t()).transpose(1, 2) + bias.view(1, -1, 1)
out = fold(conv_output, ((x.shape[2] - kernel_size[0]) // stride + 1, (x.shape[3] - kernel_size[1]) // stride + 1), (1, 1))

#new_dim = self.output_dim(torch.tensor(x.shape[-2:]), torch.tensor(kernel.shape[-2:]), stride)
#out = fold(conv_output, new_dim, (1, 1))

In [7]:
torch.set_default_tensor_type(torch.DoubleTensor)

def test_backward_conv2d():
    class torch_Net(torch.nn.Module):
        def __init__(self, in_channels, out_channels, kernel_size, stride, padding,bias):
            super(torch_Net,self).__init__()


            self.conv1 = torch.nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=stride,padding=padding,bias=bias)

        def forward(self, input):
            input = self.conv1(input)
            #input = torch.nn.functional.relu(input)
            return input

    in_channels=24
    out_channels=3
    kernel_size=(4,6)
    stride=(2,3)
    padding=(2,3)
    bias=True

    input = torch.randn(15,in_channels,12,12, requires_grad=True).double()
    my_nn = torch_Net(in_channels, out_channels, kernel_size, stride, padding, bias)

    out_torch = my_nn(input)

    my_nn.zero_grad()
    initial_gradient = torch.rand_like(out_torch)
    out_torch.backward(initial_gradient)
    torch_gradient = input.grad
    torch_gradient_weight = my_nn.conv1.weight.grad
    if bias:
        torch_gradient_bias = my_nn.conv1.bias.grad

    own_conv2d = Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=stride,padding=padding,bias=bias)
    own_conv2d.kernel = my_nn.conv1.weight
    own_conv2d.bias = my_nn.conv1.bias

    out_own = own_conv2d.forward(input)
    own_gradient=own_conv2d.backward(initial_gradient)
    own_gradients = own_conv2d.param()

    #print("Own gradient: {}".format(own_gradient))
    assert(own_gradient.shape==torch_gradient.shape)
    torch.testing.assert_allclose(out_torch,out_own)
    torch.testing.assert_allclose(torch_gradient , own_gradient)
    torch.testing.assert_allclose(torch_gradient_weight , own_gradients[0][1])
    if bias:
        torch.testing.assert_allclose(torch_gradient_bias, own_gradients[1][1])

In [8]:
test_backward_conv2d()

torch.Size([15, 24, 12, 12])
torch.Size([15, 24, 12, 12])
