In [42]:
# 상속관련
class Parent:
    def __init__(self, num):
        print(num + 1)
        
class Child(Parent):
    def __init__(self, value):
        super().__init__(value)

class Child2(Parent):
    def __init__(self, value):
        super(Child2, self).__init__(value)

Child(3)
Child2(3)

4
4


<__main__.Child2 at 0x7f90e8589588>

In [43]:
import torch
import torch.nn as nn # 자동으로 initialize (no training args.)
import torch.nn.functional as F # weight를 선언해줘야 함 (takes training args.)
# 참조 : https://discuss.pytorch.org/t/understanding-net-class/2557/6

In [44]:
class Net(nn.Module):
    # 주로 parameter(특히 weight)가 있는 module을 initialize (아닌 것들을 미리 선언해도 무방)
    def __init__(self):
        # input = 32x32
        super(Net, self).__init__() #nn.Module을 상속
        # input = 1, output 6 channels, 5x5 square conv.
        self.conv1 = nn.Conv2d(1,6,5) # output size = (input size + 2xpatting - filter size)/stride + 1
        self.conv2 = nn.Conv2d(6,16,5)
        # affine operation of y = Wx + b
        self.fc1 = nn.Linear(16*5*5, 120) # input channel = 16, 5x5
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        # max pooling over 2x2 window
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, (2,2)) # max pooling을 거치며 24x24 -> 12x12로 변경
        # maxpooling
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        # flattenzzz
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [45]:
net = Net()
net

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [46]:
params = list(net.parameters())
print(len(params))
for _, par in enumerate(params):
    print(par.size())

10
torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 400])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [47]:
# generate random image
input = torch.randn(1,1,32,32) # only support a mini-batch of samples, and not a sing sample, batch / channel / width / heights
input.data

tensor([[[[ 0.1772, -0.9534, -2.2107,  ...,  1.0373, -0.9370, -2.0802],
          [ 0.0600,  0.2533,  0.2950,  ..., -1.9627, -0.0115,  0.9909],
          [-0.3977,  0.5232,  1.3472,  ...,  1.5629, -0.1605,  0.3471],
          ...,
          [-0.2409,  1.1261,  0.0733,  ..., -2.2515, -0.3116,  1.0288],
          [-0.4356,  0.6182,  1.0567,  ...,  0.0300,  1.4172, -0.5777],
          [-1.1561,  1.2751,  1.4673,  ...,  0.0427,  0.1550,  0.5052]]]])

In [48]:
out = net(input)

In [49]:
print(out.grad_fn)
print(out.grad)

<ThAddmmBackward object at 0x7f90e85896d8>
None


In [52]:
print(params[0].grad)
# zero the gradient buffers of all parameters and backprops with random gradients:
net.zero_grad() # every time a variable is back propagated through, the gradient will be accumulated instead of being replaced. (for mini-batch)
out.backward(torch.randn(1,10))

print(params[0].grad)

None
tensor([[[[ 0.0336, -0.0280, -0.0068, -0.0358, -0.0249],
          [-0.0342,  0.0217, -0.0893, -0.0524,  0.0223],
          [-0.0085, -0.0315,  0.0529,  0.0040, -0.0022],
          [ 0.0096,  0.0047, -0.0603, -0.0068, -0.0458],
          [ 0.0370,  0.0139,  0.0376,  0.0448,  0.1305]]],


        [[[-0.0318,  0.0149, -0.0507,  0.0081, -0.0209],
          [-0.0004, -0.0459,  0.0673,  0.0170, -0.0505],
          [-0.0007,  0.0223, -0.0038,  0.0406, -0.0104],
          [-0.0569, -0.0404,  0.0177, -0.0037, -0.0736],
          [ 0.0248, -0.0022,  0.0004, -0.0096, -0.0370]]],


        [[[-0.0162, -0.1240,  0.0363, -0.0235,  0.0847],
          [-0.0088,  0.0378, -0.0065, -0.1118,  0.0619],
          [-0.0241,  0.0213,  0.0282, -0.0877,  0.0172],
          [ 0.0125,  0.0325,  0.0589,  0.0544, -0.1419],
          [-0.0288,  0.0440,  0.0574,  0.0041,  0.0286]]],


        [[[ 0.0425,  0.0454, -0.0588,  0.0284,  0.0222],
          [ 0.1208, -0.0083,  0.0227, -0.0380, -0.0270],
          [ 0.

---
- .backward() : holds the gradient w.r.t the tensor
- nn.Module : neural network module. convenient for encapsulating params., move to GPU, exporting, loading, ETC
- nn.Parameter : attribute of nn.Module
- autograd.Function : forward & backward definition of autograd operation.

---

In [53]:
output = net(input)
print(output)
target = torch.randn(10)
target = target.view(-1, 10)
print(target)
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)

tensor([[ 0.1012,  0.0894,  0.0157,  0.0564,  0.0811, -0.0447, -0.0746,  0.0814,
          0.0580, -0.0431]], grad_fn=<ThAddmmBackward>)
tensor([[ 0.2760, -1.3648,  0.3509,  0.5655,  0.7815,  1.3215,  0.5029,  0.6733,
          0.1455,  0.7818]])
tensor(0.6246, grad_fn=<MseLossBackward>)


In [54]:
loss.grad_fn
# input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
#       -> view -> linear -> relu -> linear -> relu -> lienar
#       -> MSELoss
#       -> loss

<MseLossBackward at 0x7f90e8589630>

In [55]:
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<MseLossBackward object at 0x7f90e8589630>
<ThAddmmBackward object at 0x7f90e858d710>
<ExpandBackward object at 0x7f90e858d828>


In [56]:
# To backpropagate the error all we have to do is to loss.backward()
## 1) clear all existing grads else grads will be accumulated

In [58]:
net.zero_grad()
print('conv1.bias.grad before backward')
print(net.conv1.bias.grad) # 앞에서 out.backward를 한번 했기 때문

loss.backward()
print('conv1.bias.grad after backward')
print(net.conv2.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([-0.0024, -0.0021,  0.0015, -0.0031,  0.0212,  0.0011,  0.0027,  0.0086,
        -0.0268, -0.0139, -0.0173, -0.0061,  0.0112,  0.0141,  0.0036, -0.0067])


In [59]:
## update weights
## -weight = weight - learning_rate*grad

In [60]:
lr = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data*lr)


In [63]:
# optimizer 이용

import torch.optim as optim
opt = optim.SGD(net.parameters(), lr = lr)

opt.zero_grad() # optimizer에 zero_grad 써도 됨
output = net(input)
loss = criterion(output, target)
loss.backward()
opt.step()