### 순전파 네트워크 신경망 정의하기

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        # 1 input image channel, 6 output channels, 3X3 square confolution
        # kernel
        self.conv1 = nn.Conv2d(1,6,3)
        self.conv2 = nn.Conv2d(6,16,3)
        # an affine operation : y = Wx +b
        self.fc1 = nn.Linear(16*6*6, 120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
    
    def forward(self,x):
        # Max pooling over a (2,2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)),2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *=s
        return num_features
net=Net()
print(net)
        

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


### 모델의 학습 가능한 매개변수들

In [3]:
params = list(net.parameters())
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 3, 3])


In [4]:
params

[Parameter containing:
 tensor([[[[-0.0868, -0.2959,  0.0953],
           [ 0.1201,  0.0730, -0.2166],
           [ 0.2952,  0.1039,  0.1219]]],
 
 
         [[[ 0.2519,  0.2399, -0.2708],
           [ 0.2508,  0.2293,  0.1310],
           [ 0.0949,  0.3242,  0.2241]]],
 
 
         [[[-0.0740,  0.2693,  0.0848],
           [ 0.2143,  0.3234,  0.0352],
           [ 0.2981,  0.0711, -0.1905]]],
 
 
         [[[-0.0016,  0.1309,  0.2531],
           [-0.0077, -0.0025, -0.3046],
           [ 0.2468, -0.0400,  0.0962]]],
 
 
         [[[-0.1364,  0.2217,  0.1301],
           [ 0.0696,  0.2726,  0.0480],
           [ 0.1466,  0.0925, -0.2750]]],
 
 
         [[[-0.0915,  0.2704,  0.3001],
           [ 0.2916, -0.1201, -0.2325],
           [-0.1499, -0.2593, -0.1234]]]], requires_grad=True),
 Parameter containing:
 tensor([ 0.2872, -0.0387,  0.0141, -0.1389, -0.0395,  0.1885],
        requires_grad=True),
 Parameter containing:
 tensor([[[[-0.1043,  0.1211, -0.1093],
           [-0.0163, -0.

### 입력값 넣기

In [5]:
input = torch.randn(1,1,32,32)
out = net(input)
print(out)

tensor([[ 0.0039,  0.0120,  0.1090, -0.0351,  0.0487,  0.0468, -0.0857, -0.0507,
          0.0011,  0.1185]], grad_fn=<AddmmBackward>)


### 모든 매개변수의 변화도 버퍼를 0으로 설정하고 무작위 값으로 역전파

In [6]:
net.zero_grad()
out.backward(torch.randn(1,10))

### 손실 함수 (Loss Function)

In [7]:
output = net(input)
target = torch.randn(10)
target = target.view(1,-1)
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(1.2339, grad_fn=<MseLossBackward>)


In [8]:
target

tensor([[-0.3047, -2.1296, -0.5625, -0.0447,  0.0752, -0.7782,  2.1782,  0.5857,
          0.3609,  1.0485]])

In [9]:
print(loss.grad_fn) # MSELoss

<MseLossBackward object at 0x00000165A3A07208>


In [10]:
print(loss.grad_fn.next_functions[0][0]) # Linear

<AddmmBackward object at 0x00000165A3A07B08>


In [11]:
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU

<AccumulateGrad object at 0x00000165A49FD2C8>


### 역전파

In [12]:
net.zero_grad()

In [13]:
print(net.conv1.bias.grad)

tensor([0., 0., 0., 0., 0., 0.])


In [14]:
loss.backward()

In [15]:
print(net.conv1.bias.grad)

tensor([ 0.0077,  0.0031,  0.0057, -0.0033,  0.0019,  0.0017])


### 가중치 갱신

In [16]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data*learning_rate)

In [17]:
params

[Parameter containing:
 tensor([[[[-0.0868, -0.2956,  0.0954],
           [ 0.1202,  0.0731, -0.2167],
           [ 0.2953,  0.1039,  0.1219]]],
 
 
         [[[ 0.2519,  0.2399, -0.2710],
           [ 0.2507,  0.2293,  0.1311],
           [ 0.0949,  0.3242,  0.2241]]],
 
 
         [[[-0.0738,  0.2693,  0.0848],
           [ 0.2143,  0.3233,  0.0351],
           [ 0.2982,  0.0711, -0.1905]]],
 
 
         [[[-0.0015,  0.1310,  0.2531],
           [-0.0079, -0.0026, -0.3046],
           [ 0.2468, -0.0399,  0.0962]]],
 
 
         [[[-0.1363,  0.2216,  0.1302],
           [ 0.0696,  0.2726,  0.0481],
           [ 0.1466,  0.0925, -0.2750]]],
 
 
         [[[-0.0915,  0.2704,  0.3001],
           [ 0.2915, -0.1202, -0.2324],
           [-0.1499, -0.2593, -0.1235]]]], requires_grad=True),
 Parameter containing:
 tensor([ 0.2871, -0.0387,  0.0141, -0.1389, -0.0395,  0.1885],
        requires_grad=True),
 Parameter containing:
 tensor([[[[-0.1043,  0.1210, -0.1093],
           [-0.0164, -0.

In [19]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr = 0.01)

In [20]:
optimizer.zero_grad() # zero the gradient buffers
ouput = net(input)
loss = criterion(output, target)
loss.backward()
optimezer.step() # Does the update

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.