In [8]:
%run backprop_modules.ipynb

In [9]:
import torch
from torch.autograd import Variable
import numpy as np
import unittest

import scipy as sp
import scipy.signal
import skimage

In [10]:
class TestLayers(unittest.TestCase):
    
    def test_BatchNormalization(self):
        np.random.seed(42)
        torch.manual_seed(42)

        batch_size, n_in = 32, 16
        for _ in range(100):
            # layers initialization
            slope = np.random.uniform(0.01, 0.05)
            alpha = 0.9
            custom_layer = BatchNormalization(alpha)
            custom_layer.train()
            torch_layer = torch.nn.BatchNorm1d(n_in, eps=custom_layer.EPS, momentum=1.-alpha, affine=False)
            custom_layer.moving_mean = torch_layer.running_mean.numpy().copy()
            custom_layer.moving_variance = torch_layer.running_var.numpy().copy()

            layer_input = np.random.uniform(-5, 5, (batch_size, n_in)).astype(np.float32)
            next_layer_grad = np.random.uniform(-5, 5, (batch_size, n_in)).astype(np.float32)

            # 1. check layer output
            custom_layer_output = custom_layer.forward(layer_input)
            layer_input_var = Variable(torch.from_numpy(layer_input), requires_grad=True)
            torch_layer_output_var = torch_layer(layer_input_var)
            self.assertTrue(np.allclose(torch_layer_output_var.data.numpy(), custom_layer_output, atol=1e-6))

            # 2. check layer input grad
            custom_layer_grad = custom_layer.backward(layer_input, next_layer_grad)
            torch_layer_output_var.backward(torch.from_numpy(next_layer_grad))
            torch_layer_grad_var = layer_input_var.grad
            # please, don't increase `atol` parameter, it's garanteed that you can implement batch norm layer
            # with tolerance 1e-5
            self.assertTrue(np.allclose(torch_layer_grad_var.data.numpy(), custom_layer_grad, atol=1e-5))

            # 3. check moving mean
            self.assertTrue(np.allclose(custom_layer.moving_mean, torch_layer.running_mean.numpy()))
            # we don't check moving_variance because pytorch uses slightly different formula for it:
            # it computes moving average for unbiased variance (i.e var*N/(N-1))
            #self.assertTrue(np.allclose(custom_layer.moving_variance, torch_layer.running_var.numpy()))

            # 4. check evaluation mode
            custom_layer.moving_variance = torch_layer.running_var.numpy().copy()
            custom_layer.evaluate()
            custom_layer_output = custom_layer.forward(layer_input)
            torch_layer.eval()
            torch_layer_output_var = torch_layer(layer_input_var)
            self.assertTrue(np.allclose(torch_layer_output_var.data.numpy(), custom_layer_output, atol=1e-6))

    def test_Dropout(self):
        np.random.seed(42)

        batch_size, n_in = 2, 4
        for _ in range(100):
            # layers initialization
            p = np.random.uniform(0.3, 0.7)
            layer = Dropout(p)
            layer.train()

            layer_input = np.random.uniform(-5, 5, (batch_size, n_in)).astype(np.float32)
            next_layer_grad = np.random.uniform(-5, 5, (batch_size, n_in)).astype(np.float32)

            # 1. check layer output
            layer_output = layer.forward(layer_input)
            self.assertTrue(np.all(np.logical_or(np.isclose(layer_output, 0), 
                                        np.isclose(layer_output*(1.-p), layer_input))))

            # 2. check layer input grad
            layer_grad = layer.backward(layer_input, next_layer_grad)
            self.assertTrue(np.all(np.logical_or(np.isclose(layer_grad, 0), 
                                        np.isclose(layer_grad*(1.-p), next_layer_grad))))

            # 3. check evaluation mode
            layer.evaluate()
            layer_output = layer.forward(layer_input)
            self.assertTrue(np.allclose(layer_output, layer_input))

            # 4. check mask
            p = 0.0
            layer = Dropout(p)
            layer.train()
            layer_output = layer.forward(layer_input)
            self.assertTrue(np.allclose(layer_output, layer_input))

            p = 0.5
            layer = Dropout(p)
            layer.train()
            layer_input = np.random.uniform(5, 10, (batch_size, n_in)).astype(np.float32)
            next_layer_grad = np.random.uniform(5, 10, (batch_size, n_in)).astype(np.float32)
            layer_output = layer.forward(layer_input)
            zeroed_elem_mask = np.isclose(layer_output, 0)
            layer_grad = layer.backward(layer_input, next_layer_grad)        
            self.assertTrue(np.all(zeroed_elem_mask == np.isclose(layer_grad, 0)))

            # 5. dropout mask should be generated independently for every input matrix element, not for row/column
            batch_size, n_in = 1000, 1
            p = 0.8
            layer = Dropout(p)
            layer.train()

            layer_input = np.random.uniform(5, 10, (batch_size, n_in)).astype(np.float32)
            layer_output = layer.forward(layer_input)
            self.assertTrue(np.sum(np.isclose(layer_output, 0)) != layer_input.size)

            layer_input = layer_input.T
            layer_output = layer.forward(layer_input)
            self.assertTrue(np.sum(np.isclose(layer_output, 0)) != layer_input.size)
            
    def test_Conv2d(self):
        np.random.seed(42)
        torch.manual_seed(42)

        batch_size, n_in, n_out = 2, 3, 4
        h,w = 5,6
        kern_size = 3
        for _ in range(100):
            # layers initialization
            torch_layer = torch.nn.Conv2d(n_in, n_out, kern_size, padding=1)
            custom_layer = Conv2d(n_in, n_out, kern_size)
            custom_layer.W = torch_layer.weight.data.numpy() # [n_out, n_in, kern, kern]
            custom_layer.b = torch_layer.bias.data.numpy()

            layer_input = np.random.uniform(-1, 1, (batch_size, n_in, h,w)).astype(np.float32)
            next_layer_grad = np.random.uniform(-1, 1, (batch_size, n_out, h, w)).astype(np.float32)

            # 1. check layer output
            custom_layer_output = custom_layer._compute_output(layer_input)
            layer_input_var = Variable(torch.from_numpy(layer_input), requires_grad=True)
            torch_layer_output_var = torch_layer(layer_input_var)
            self.assertTrue(np.allclose(torch_layer_output_var.data.numpy(), custom_layer_output, atol=1e-6, equal_nan=True))

        
            # 2. check layer input grad
            custom_layer_grad = custom_layer._compute_input_grad(layer_input, next_layer_grad)
            torch_layer_output_var.backward(torch.from_numpy(next_layer_grad))
            torch_layer_grad_var = layer_input_var.grad
            self.assertTrue(np.allclose(torch_layer_grad_var.data.numpy(), custom_layer_grad, atol=1e-6))
            
            # 3. check layer parameters grad
            custom_layer.accGradParameters(layer_input, next_layer_grad)
            weight_grad = custom_layer.gradW
            bias_grad = custom_layer.gradb
            torch_weight_grad = torch_layer.weight.grad.data.numpy()
            torch_bias_grad = torch_layer.bias.grad.data.numpy()
            #m = ~np.isclose(torch_weight_grad, weight_grad, atol=1e-5)
            self.assertTrue(np.allclose(torch_weight_grad, weight_grad, atol=1e-6, ))
            self.assertTrue(np.allclose(torch_bias_grad, bias_grad, atol=1e-6))
    
suite = unittest.TestLoader().loadTestsFromTestCase(TestLayers)
unittest.TextTestRunner(verbosity=2).run(suite)

test_BatchNormalization (__main__.TestLayers.test_BatchNormalization) ... ok
test_Conv2d (__main__.TestLayers.test_Conv2d) ... ok
test_Dropout (__main__.TestLayers.test_Dropout) ... ok

----------------------------------------------------------------------
Ran 3 tests in 2.585s

OK


<unittest.runner.TextTestResult run=3 errors=0 failures=0>

In [7]:

np.random.seed(42)
torch.manual_seed(42)

batch_size, n_in, n_out = 2, 3, 4
h,w = 5,6
kern_size = 3
for _ in range(100):
    # layers initialization
    torch_layer = torch.nn.Conv2d(n_in, n_out, kern_size, padding=1)
    custom_layer = Conv2d(n_in, n_out, kern_size)
    custom_layer.W = torch_layer.weight.data.numpy() # [n_out, n_in, kern, kern]
    custom_layer.b = torch_layer.bias.data.numpy()

    layer_input = np.random.uniform(-1, 1, (batch_size, n_in, h,w)).astype(np.float32)
    next_layer_grad = np.random.uniform(-1, 1, (batch_size, n_out, h, w)).astype(np.float32)

    # 1. check layer output
    custom_layer_output = custom_layer._compute_output(layer_input)

custom_layer_output

array([[[[ 0.04843396, -0.21861504,  0.12515776, -0.17066272,
          -0.3562255 ,  0.00411915],
         [-0.11512321,  0.74192786, -0.10935627, -0.33640385,
          -0.09818171, -0.03279735],
         [-0.02695464, -0.2590999 ,  0.0528103 ,  0.49233657,
          -0.4718731 , -0.02858615],
         [ 0.3012539 ,  0.5557125 , -0.04545643, -0.36686188,
          -0.17769161,  0.78966016],
         [ 0.02954957, -0.03502005, -0.36738777,  0.34471208,
           0.1276055 , -0.37517685]],

        [[-0.14400652, -0.17719232, -0.11702606, -0.33820397,
           0.06207688, -0.5518278 ],
         [-0.364331  ,  0.01046771,  0.15027677, -0.18802173,
          -0.01891376, -0.68515223],
         [-0.59648645, -0.14795962,  0.04761004, -0.32040688,
           0.12295546, -0.36304075],
         [-0.10211983, -0.51526964,  0.04450738,  0.07590434,
          -0.15449232, -0.34822953],
         [-0.28588665, -0.15407708, -0.15894523, -0.26590088,
           0.06817274, -0.23077342]],

      