In [10]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
import torch
from torch.nn.functional import unfold, fold
from others.implementations import *
from torch import nn


## Convolution

In [20]:
torch.set_default_tensor_type(torch.DoubleTensor)

padding = 1
stride = 2
kernel_size = 7
bias = True
ch_in = 3
ch_out = 5
batch = 1
size = 32

custom_conv = Conv2d(ch_in, ch_out, kernel_size=kernel_size, padding=padding, stride = stride, bias = bias)
target_conv = nn.Conv2d(ch_in, ch_out, kernel_size=kernel_size, padding=padding, stride=stride, bias = bias)

custom_conv.kernel = target_conv.weight
if bias:
    custom_conv.bias = target_conv.bias

inp_custom = torch.rand((batch, ch_in, size, size))
inp_target = inp_custom.clone()
inp_target.requires_grad = True

custom_out = custom_conv.forward(inp_custom)
target_out = target_conv.forward(inp_target)

target_out.retain_grad()

target = torch.rand(custom_out.shape)

custom_mse = MSE()
target_mse = nn.MSELoss(reduction='mean')

custom_loss = custom_mse.forward(custom_out, target)
target_loss =  target_mse(target_out, target)

inp_custom_grad = custom_conv.backward(custom_mse.backward())
target_loss.backward()

In [21]:
#check weights
(custom_conv.kernel - target_conv.weight).abs().sum(),  (custom_conv.bias - target_conv.bias).abs().sum() if bias else 0, 

(tensor(0., grad_fn=<SumBackward0>), tensor(0., grad_fn=<SumBackward0>))

In [22]:
#check out
(target_out - custom_out).abs().sum()

tensor(8.2635e-14, grad_fn=<SumBackward0>)

In [23]:
#check loss

(custom_loss-target_loss)

tensor(-5.5511e-17, grad_fn=<SubBackward0>)

In [24]:
#check dl_dx
(inp_custom_grad - inp_target.grad).abs().sum(), inp_custom_grad.shape == inp_target.grad.shape

(tensor(2.9402e-16, grad_fn=<SumBackward0>), True)

In [25]:
#check dl_dw
(custom_conv.dl_dw - target_conv.weight.grad).abs().sum(), custom_conv.dl_dw.shape == target_conv.weight.grad.shape

(tensor(1.2566e-14, grad_fn=<SumBackward0>), True)

In [26]:
#check dl_db
if bias:
    print((custom_conv.dl_db - target_conv.bias.grad).abs().sum(), custom_conv.dl_db.shape == target_conv.bias.grad.shape)

tensor(1.9429e-16, grad_fn=<SumBackward0>) True


## Up-sampling

In [12]:
from torch import nn
import others.implementations

In [17]:
scale_factor = 5
target_upsample = nn.Upsample(scale_factor = scale_factor, mode = 'nearest')
custom_upsample = NearestUpsampling(scale_factor = scale_factor)
inp = torch.ones((1,3,14,8))
inp.requires_grad = True
target_out = target_upsample.forward(inp)
target_out.retain_grad()
custom_out = custom_upsample.forward(inp)
target = torch.zeros((1,3,inp.shape[-2]*scale_factor, inp.shape[-1]*scale_factor))
loss = 0.5*((target-target_out)**2).sum()
loss.backward()


In [18]:
#check out
(target_out - custom_out).abs().sum()

tensor(0., grad_fn=<SumBackward0>)

In [19]:
#check dl_dx
(custom_upsample.backward(target_out)-inp.grad).abs().sum()

tensor(0., grad_fn=<SumBackward0>)

## Model

In [1]:
from others.implementations import *
import torch.nn as nn
import torch
%load_ext autoreload
%autoreload 2


In [10]:
inp_custom = torch.rand((1, 3, 32, 32))
inp_target = inp_custom.clone()
inp_target.requires_grad = True

In [11]:
# in_channels, out_channels, kernel_size, padding, scale_factor, dilation=None

In [12]:
custom_model = Sequential(
    Conv2d(3, 5, kernel_size=2, stride=2, padding=0),
    ReLU(),
    Conv2d(5, 5, kernel_size=2, stride=2, padding=0),
    ReLU(),
    Upsampling(5, 5, kernel_size=3, padding=1, scale_factor=2),
    ReLU(),
    Upsampling(5, 3, kernel_size=3, padding=1, scale_factor=2),
    Sigmoid()
    )


In [13]:
target_model = nn.Sequential(
    nn.Conv2d(3,5,kernel_size=2, stride=2, padding=0),
    nn.ReLU(),
    nn.Conv2d(5,5,kernel_size=2, stride=2, padding=0),
    nn.ReLU(),
    nn.Upsample(scale_factor=2),
    nn.Conv2d(5,5,kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.Upsample(scale_factor=2),
    nn.Conv2d(5,3,kernel_size=3, stride=1, padding=1),
    nn.Sigmoid()
    )
target_model = target_model.float()

In [14]:
custom_model.modules[0].kernel = target_model[0].weight
custom_model.modules[0].bias = target_model[0].bias

custom_model.modules[2].kernel = target_model[2].weight
custom_model.modules[2].bias = target_model[2].bias

custom_model.modules[4].conv2d.kernel = target_model[5].weight
custom_model.modules[4].conv2d.bias = target_model[5].bias

custom_model.modules[6].conv2d.kernel = target_model[8].weight
custom_model.modules[6].conv2d.bias = target_model[8].bias

In [15]:
#check out
custom_out = custom_model.forward(inp_custom)
target_out = target_model.forward(inp_target)
target_out.retain_grad()
(custom_out - target_out).abs().sum()

tensor(6.0499e-06, grad_fn=<SumBackward0>)

In [20]:
#check dl_dx
target = torch.zeros(custom_out.shape)

custom_mse = MSE()
target_mse = nn.MSELoss()

custom_loss = custom_mse.forward(custom_out, target)
target_loss =  target_mse(target_out, target)

target_loss.backward()

inp_custom_grad = custom_model.backward(custom_mse.backward())
(inp_custom_grad - inp_target.grad).abs().sum()

  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


tensor(1.4442e-09, grad_fn=<SumBackward0>)

In [21]:
from torch import optim
#check optimizer

custom_optimizer = SGD(custom_model.param(), 0.01)
target_optimizer = optim.SGD(target_model.parameters(), lr=0.01)
custom_optimizer.step()
target_optimizer.step()

RuntimeError: a leaf Variable that requires grad is being used in an in-place operation.

In [62]:
#check out
custom_out = custom_model.forward(inp_custom)
target_out = target_model.forward(inp_target)
(custom_out - target_out).abs().sum()

tensor(47.8825, grad_fn=<SumBackward0>)

In [24]:
from torch import empty, cat, arange
from torch.nn.functional import fold, unfold
import torch

out_channels, in_channels, kernel_size_1, kernel_size_2 = 2, 3, 2, 2
kernel_size = (kernel_size_1, kernel_size_2)
stride = 1

x = empty((1,3,5,5)).fill_(1)
kernel = empty((out_channels, in_channels, kernel_size[0], kernel_size[1])).fill_(1)
bias = empty(out_channels).fill_(1)
x_unfolded = unfold(x, kernel.shape[-2:], stride=stride)
conv_output = x_unfolded.transpose(1, 2).matmul(kernel.reshape(out_channels, -1).t()).transpose(1, 2) + bias.view(1, -1, 1)
out = fold(conv_output, ((x.shape[2] - kernel_size[0]) // stride + 1, (x.shape[3] - kernel_size[1]) // stride + 1), (1, 1))

#new_dim = self.output_dim(torch.tensor(x.shape[-2:]), torch.tensor(kernel.shape[-2:]), stride)
#out = fold(conv_output, new_dim, (1, 1))

In [7]:
torch.set_default_tensor_type(torch.DoubleTensor)

def test_backward_conv2d():
    class torch_Net(torch.nn.Module):
        def __init__(self, in_channels, out_channels, kernel_size, stride, padding,bias):
            super(torch_Net,self).__init__()


            self.conv1 = torch.nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=stride,padding=padding,bias=bias)

        def forward(self, input):
            input = self.conv1(input)
            #input = torch.nn.functional.relu(input)
            return input

    in_channels=24
    out_channels=3
    kernel_size=(4,6)
    stride=(2,3)
    padding=(2,3)
    bias=True

    input = torch.randn(15,in_channels,12,12, requires_grad=True).float()
    my_nn = torch_Net(in_channels, out_channels, kernel_size, stride, padding, bias)

    out_torch = my_nn(input)

    my_nn.zero_grad()
    initial_gradient = torch.rand_like(out_torch)
    out_torch.backward(initial_gradient)
    torch_gradient = input.grad
    torch_gradient_weight = my_nn.conv1.weight.grad
    if bias:
        torch_gradient_bias = my_nn.conv1.bias.grad

    own_conv2d = Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=stride,padding=padding,bias=bias)
    own_conv2d.kernel = my_nn.conv1.weight
    own_conv2d.bias = my_nn.conv1.bias

    out_own = own_conv2d.forward(input)
    own_gradient=own_conv2d.backward(initial_gradient)
    own_gradients = own_conv2d.param()

    #print("Own gradient: {}".format(own_gradient))
    assert(own_gradient.shape==torch_gradient.shape)
    torch.testing.assert_allclose(out_torch,out_own)
    torch.testing.assert_allclose(torch_gradient , own_gradient)
    torch.testing.assert_allclose(torch_gradient_weight , own_gradients[0][1])
    if bias:
        torch.testing.assert_allclose(torch_gradient_bias, own_gradients[1][1])

In [8]:
test_backward_conv2d()

torch.Size([15, 24, 12, 12])
torch.Size([15, 24, 12, 12])


In [1]:
import pickle
with open("../top20.pickle", "rb") as f:
    data = pickle.load(f)

In [9]:
sorted(data.items(), key=lambda x: -x[1])

[('shallow_channels=32,deep_channels=64,lr=0.01,momentum=0.9,nesterov=True,batch_size=4,downsampling_kernel_size=2,upsampling_kernel_size=5',
  tensor(23.0711)),
 ('shallow_channels=32,deep_channels=64,lr=0.01,momentum=0.9,nesterov=True,batch_size=4,downsampling_kernel_size=4,upsampling_kernel_size=3',
  tensor(22.9439)),
 ('shallow_channels=32,deep_channels=64,lr=0.01,momentum=0.9,nesterov=True,batch_size=4,downsampling_kernel_size=4,upsampling_kernel_size=5',
  tensor(22.8452)),
 ('shallow_channels=16,deep_channels=64,lr=0.01,momentum=0.9,nesterov=False,batch_size=4,downsampling_kernel_size=2,upsampling_kernel_size=5',
  tensor(22.7992)),
 ('shallow_channels=16,deep_channels=64,lr=0.01,momentum=0.9,nesterov=True,batch_size=4,downsampling_kernel_size=2,upsampling_kernel_size=5',
  tensor(22.7090)),
 ('shallow_channels=16,deep_channels=64,lr=0.01,momentum=0.9,nesterov=True,batch_size=4,downsampling_kernel_size=4,upsampling_kernel_size=5',
  tensor(22.6540)),
 ('shallow_channels=16,deep