In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class MNISTConvNet(nn.Module):
    
    def __init__(self):
        super(MNISTConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(10, 20, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
        
    def forward(self, input):
        x = self.pool1(F.relu(self.conv1(input)))
        x = self.pool2(F.relu(self.conv2(x)))
        
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x
    

In [3]:
net = MNISTConvNet()
net

MNISTConvNet(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=320, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=10, bias=True)
)

In [4]:
input = torch.randn(1, 1, 28, 28)
out = net(input)

In [5]:
out

tensor([[ 0.0971,  0.0000,  0.0027,  0.0000,  0.0606,  0.0245,  0.0138,
          0.1563,  0.0344,  0.0944]])

In [6]:
target = torch.tensor([3], dtype=torch.long)
loss_fn = nn.CrossEntropyLoss()
err = loss_fn(out, target)
err.backward()

In [7]:
err

tensor(2.3522)

In [8]:
net.conv1.weight.grad

tensor(1.00000e-02 *
       [[[[-0.0924,  0.6788, -0.3091, -1.1697, -0.6006],
          [ 0.0327, -1.8250,  1.1705,  0.4314, -0.1080],
          [-0.3117,  1.2235,  0.2890, -0.8914,  0.3482],
          [-1.4294,  1.0099,  1.1901, -1.1482,  0.9309],
          [-0.9081, -0.3514, -1.5364,  0.2472, -0.8223]]],


        [[[-0.3293,  1.4851, -0.3353, -0.0441, -0.9048],
          [ 0.1503, -1.1915, -1.1917,  0.4583, -1.3083],
          [ 0.1321,  0.2493,  0.4980, -0.2893, -1.8903],
          [ 0.1883,  1.2472,  0.0794, -0.9436,  0.2583],
          [-0.0893,  0.4557, -0.6890,  1.1562, -0.4931]]],


        [[[ 0.9244,  1.1569,  0.2159, -0.1867, -0.3201],
          [-1.9751,  0.3393,  1.1108,  0.9333,  0.8667],
          [ 0.8876,  0.4905,  0.5255,  0.5335, -0.9599],
          [ 0.2371, -1.0396,  0.4625, -1.8430, -0.7134],
          [-1.1961,  1.2138,  2.0265,  0.8317,  0.1822]]],


        [[[ 0.7619,  0.8316, -0.5600, -1.0744,  1.3814],
          [ 0.4620,  2.1488, -0.2455, -1.8019, -0.4913]

In [9]:
net.conv1.weight.grad.size()

torch.Size([10, 1, 5, 5])

In [10]:
net.conv1.weight.data.norm()

tensor(1.9484)

In [11]:
net.conv1.weight.grad.data.norm()

tensor(0.1325)

In [12]:
def printnorm(self, input, output):
    # input is a tuple of packed inputs
    # output is a Tensor. output.data is the Tensor we are interested
    print('Inside ' + self.__class__.__name__ + ' forward')
    print('')
    print('input: ', type(input))
    print('input[0]: ', type(input[0]))
    print('output: ', type(output))
    print('')
    print('input size:', input[0].size())
    print('output size:', output.data.size())
    print('output norm:', output.data.norm())

In [13]:
net.conv2.register_forward_hook(printnorm)

<torch.utils.hooks.RemovableHandle at 0x7f4dfa798c88>

In [14]:
out = net(input)

Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([1, 10, 12, 12])
output size: torch.Size([1, 20, 8, 8])
output norm: tensor(13.3902)


In [18]:
def printgradnorm(self, grad_input, grad_output):
    print('Inside ' + self.__class__.__name__ + ' backward')
    print('Inside class:' + self.__class__.__name__)
    print('')
    print('grad_input: ', type(grad_input))
    print('grad_input[0]: ', type(grad_input[0]))
    print('grad_output: ', type(grad_output))
    print('grad_output[0]: ', type(grad_output[0]))
    print('')
    print('grad_input size:', grad_input[0].size())
    print('grad_output size:', grad_output[0].size())
    print('grad_input norm:', grad_input[0].norm())


net.conv2.register_backward_hook(printgradnorm)

out = net(input)
err = loss_fn(out, target)
err.backward()

Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([1, 10, 12, 12])
output size: torch.Size([1, 20, 8, 8])
output norm: tensor(13.3902)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([1, 10, 12, 12])
grad_output size: torch.Size([1, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       2.8586)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([1, 10, 12, 12])
grad_output size: torch.Size([1, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       2.8586)


In [22]:
class RNN(nn.Module):
    def __init__(self, data_size, hidden_size, output_size):
        super(RNN, self).__init__()
        
        self.hidden_size = hidden_size
        input_size = data_size + hidden_size
        
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2o = nn.Linear(hidden_size, output_size)
        
    def forward(self, data, last_hidden):
        input = torch.cat((data, last_hidden), 1)
        hidden = self.i2h(input)
        output = self.h2o(hidden)
        return hidden, output

rnn = RNN(50, 20, 10)

In [23]:
loss_fn = nn.MSELoss()

batch_size = 10
TIMESTEPS = 5

batch = torch.randn(batch_size, 50)
hidden = torch.zeros(batch_size, 20)
target = torch.zeros(batch_size, 10)

loss = 0

for t in range(TIMESTEPS):
    hidden, output = rnn(batch, hidden)
    loss += loss_fn(output, target)
    
loss.backward()