---
#### 1) ConvNet
---

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class MNISTConvNet(nn.Module):
    def __init__(self):
        # this is the place where you instantiate all your modules
        # you can later access them using the same names you've given them in
        # here
        super(MNISTConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(10, 20, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
       
    
    # it's the forward function that defines the network structure
    # we're accepting only a single input in here, but if you want,
    # feel free to use more
    def forward(self, x):        
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        
        # in your model definition you can go full crazy and use arbitrary
        # python code to define your model structure
        # all these are perfectly legal, and will be handled correctly
        # by autograd:
        # if x.gt(0) > x.numel() / 2:
        #      ...
        #
        # you can even do a loop and reuse the same module inside it
        # modules no longer hold ephemeral state, so you can use them
        # multiple times during your forward pass
        # while x.norm(2) < 10:
        #    x = self.conv1(x)

        x = x.view(x.size(0), -1) # batch 개수 & 나머지 dim으로 dimension 변경
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x
    
# torch.nn only support to mini-batch type input not for single smaple
## ex) for nn.Conv2d -> tensor should be 4d (nSample x nChannels x Height x Width)

In [3]:
net = MNISTConvNet()
print(net)

MNISTConvNet(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=320, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=10, bias=True)
)


In [4]:
input = torch.randn(1,1,28,28)
out = net(input)
print(out, torch.max(out,1)[0], torch.max(out,1)[1]) # torch.max returns tuple (value, index)

tensor([[0.0000, 0.0019, 0.0000, 0.0000, 0.0000, 0.0000, 0.0954, 0.0000, 0.1129,
         0.1805]], grad_fn=<ReluBackward>) tensor([0.1805], grad_fn=<MaxBackward0>) tensor([9])


In [5]:
# dummy target label

target = torch.tensor([3], dtype = torch.long)
loss_fn = nn.CrossEntropyLoss() # LogSoftmax + ClassNLL Loss
err = loss_fn(out, target) # pred dist / target (scalar) -> order!
print(err)

err.backward()

tensor(2.3437, grad_fn=<NllLossBackward>)


In [6]:
print(out.size(), target.size())

torch.Size([1, 10]) torch.Size([1])


In [7]:
print(net.conv1.weight.size()) # output channel / input chaneel / width / height
print(net.conv1.weight.data.norm())
print(net.conv1.weight.grad.size())
print(net.conv1.weight.grad.data.norm())

torch.Size([10, 1, 5, 5])
tensor(1.8495)
torch.Size([10, 1, 5, 5])
tensor(0.1492)


---
### forward and backward hook
- network layer에 forward & backward process에서의 intermeidate 과정을 볼 수 있는 hook funciton
---

In [8]:
def printnorm(self, input, output):
    # input is a tuple of packed inputs
    # output is a Tensor. output.data is the Tensor we are interested
    print('Inside ' + self.__class__.__name__ + ' forward')
    print('')
    print('input: ', type(input))
    print('input[0]: ', type(input[0]))
    print('output: ', type(output))
    print('')
    print('input size:', input[0].size())
    print('output size:', output.data.size())
    print('output norm:', output.data.norm())


net.conv2.register_forward_hook(printnorm)

out = net(input)

Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([1, 10, 12, 12])
output size: torch.Size([1, 20, 8, 8])
output norm: tensor(12.4095)


In [9]:
def printgradnorm(self, grad_input, grad_output):
    print('Inside ' + self.__class__.__name__ + ' backward')
    print('Inside class:' + self.__class__.__name__)
    print('')
    print('grad_input: ', type(grad_input))
    print('grad_input[0]: ', type(grad_input[0]))
    print('grad_output: ', type(grad_output))
    print('grad_output[0]: ', type(grad_output[0]))
    print('')
    print('grad_input size:', grad_input[0].size())
    print('grad_output size:', grad_output[0].size())
    print('grad_input norm:', grad_input[0].norm())
    
net.conv2.register_backward_hook(printgradnorm)

out = net(input)
err = loss_fn(out, target)
err.backward()
# grad_input -> conv2 layer의 input dimension
# grad_output -> conv2 layer의 output dimension

Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([1, 10, 12, 12])
output size: torch.Size([1, 20, 8, 8])
output norm: tensor(12.4095)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([1, 10, 12, 12])
grad_output size: torch.Size([1, 20, 8, 8])
grad_input norm: tensor(0.0308)


---
### Recurrent Net
---

In [12]:
class RNN(nn.Module):
    
    # you can also accept args. in your model constructor
    def __init__(self, data_size, hidden_size, output_size):
        super(RNN, self).__init__()
        
        self.hidden_size = hidden_size
        input_size = data_size + hidden_size
        
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2o = nn.Linear(hidden_size, output_size)
        
    def forward(self, data, last_hidden):
        input = torch.cat((data, last_hidden), 1)
        hidden = self.i2h(input)
        output = self.h2o(hidden)
        
        return hidden, output
    
rnn = RNN(50,20,10)
print(rnn)

RNN(
  (i2h): Linear(in_features=70, out_features=20, bias=True)
  (h2o): Linear(in_features=20, out_features=10, bias=True)
)


In [13]:
loss_fn = nn.MSELoss()

batch_size = 10
TIMESTEPS = 5

# create some fake data
batch = torch.randn(batch_size, 50)
hidden = torch.zeros(batch_size, 20)
target = torch.zeros(batch_size, 10)


loss = 0
for t in range(TIMESTEPS):
    # yes! you can reuse the same network several times,
    # sum up the losses, and call backward!
    hidden, output = rnn(batch, hidden)
    loss += loss_fn(output, target)
loss.backward()