### 순전파 네트워크 신경망 정의하기

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        # 1 input image channel, 6 output channels, 3X3 square confolution
        # kernel
        self.conv1 = nn.Conv2d(1,6,3)
        self.conv2 = nn.Conv2d(6,16,3)
        # an affine operation : y = Wx +b
        self.fc1 = nn.Linear(16*6*6, 120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
    
    def forward(self,x):
        # Max pooling over a (2,2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)),2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *=s
        return num_features
net=Net()
print(net)
        

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


### 모델의 학습 가능한 매개변수들

In [4]:
params = list(net.parameters())
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 3, 3])


In [8]:
params

[Parameter containing:
 tensor([[[[ 0.3194,  0.1491,  0.1957],
           [-0.0941, -0.1004, -0.0291],
           [ 0.2390,  0.2083,  0.0274]]],
 
 
         [[[-0.1466,  0.0542, -0.0455],
           [-0.2678, -0.2044,  0.1502],
           [-0.3258,  0.0716,  0.3157]]],
 
 
         [[[-0.0737,  0.1086,  0.0371],
           [-0.2197,  0.0652, -0.0947],
           [-0.1196, -0.0692,  0.1874]]],
 
 
         [[[ 0.0700, -0.0759, -0.2550],
           [ 0.2558,  0.2764, -0.3056],
           [-0.0379, -0.0759, -0.0735]]],
 
 
         [[[-0.2544, -0.0853,  0.2888],
           [ 0.3016, -0.2607, -0.1468],
           [-0.3318, -0.2604, -0.1139]]],
 
 
         [[[-0.2927, -0.0121,  0.1649],
           [-0.1257, -0.0822, -0.1423],
           [ 0.1282, -0.3023,  0.3255]]]], requires_grad=True),
 Parameter containing:
 tensor([ 0.3157, -0.0802,  0.0940, -0.2767, -0.2267, -0.3317],
        requires_grad=True),
 Parameter containing:
 tensor([[[[-8.1966e-02, -1.0604e-01,  9.4410e-02],
           [

### 입력값 넣기

In [6]:
input = torch.randn(1,1,32,32)
out = net(input)
print(out)

tensor([[-0.1385, -0.1042, -0.1001, -0.0115, -0.0935, -0.0230,  0.0423, -0.0780,
         -0.1106,  0.0341]], grad_fn=<AddmmBackward>)


### 모든 매개변수의 변화도 버퍼를 0으로 설정하고 무작위 값으로 역전파

In [7]:
net.zero_grad()
out.backward(torch.randn(1,10))

### 손실 함수 (Loss Function)

In [9]:
output = net(input)
target = torch.randn(10)
target = target.view(1,-1)
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(0.5513, grad_fn=<MseLossBackward>)


In [10]:
target

tensor([[ 0.4861, -1.2041,  0.0353,  0.7220, -0.3998, -0.4954,  0.1747,  1.6112,
          0.2948,  0.1011]])

In [11]:
print(loss.grad_fn) # MSELoss

<MseLossBackward object at 0x0000015EAABD4248>


In [12]:
print(loss.grad_fn.next_functions[0][0]) # Linear

<AddmmBackward object at 0x0000015EAABAC948>


In [13]:
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU

<AccumulateGrad object at 0x0000015EAAB40988>


### 역전파

In [14]:
net.zero_grad()

In [15]:
print(net.conv1.bias.grad)

tensor([0., 0., 0., 0., 0., 0.])


In [16]:
loss.backward()

In [17]:
print(net.conv1.bias.grad)

tensor([ 1.2341e-03, -3.2454e-05,  2.5736e-03,  5.7237e-04, -6.6848e-03,
         1.1176e-02])


### 가중치 갱신

In [18]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data*learning_rate)

In [19]:
params

[Parameter containing:
 tensor([[[[ 0.3194,  0.1490,  0.1957],
           [-0.0941, -0.1004, -0.0291],
           [ 0.2389,  0.2083,  0.0274]]],
 
 
         [[[-0.1467,  0.0542, -0.0455],
           [-0.2678, -0.2044,  0.1502],
           [-0.3260,  0.0716,  0.3158]]],
 
 
         [[[-0.0737,  0.1085,  0.0370],
           [-0.2198,  0.0653, -0.0947],
           [-0.1195, -0.0692,  0.1874]]],
 
 
         [[[ 0.0699, -0.0759, -0.2550],
           [ 0.2557,  0.2764, -0.3056],
           [-0.0379, -0.0758, -0.0735]]],
 
 
         [[[-0.2544, -0.0854,  0.2887],
           [ 0.3016, -0.2608, -0.1468],
           [-0.3319, -0.2604, -0.1138]]],
 
 
         [[[-0.2926, -0.0121,  0.1647],
           [-0.1256, -0.0823, -0.1423],
           [ 0.1281, -0.3022,  0.3254]]]], requires_grad=True),
 Parameter containing:
 tensor([ 0.3157, -0.0802,  0.0939, -0.2767, -0.2267, -0.3318],
        requires_grad=True),
 Parameter containing:
 tensor([[[[-8.1892e-02, -1.0592e-01,  9.4558e-02],
           [

In [20]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr = 0.01)

In [21]:
optimizer.zero_grad() # zero the gradient buffers
ouput = net(input)
loss = criterion(output, target)
loss.backward()
optimezer.step() # Does the update

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.