# 神经网络

## 构建神经网络

In [1]:
import torch

class Net(torch.nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 6, 3)
        self.conv2 = torch.nn.Conv2d(6, 16, 3)
        self.fc1 = torch.nn.Linear(16*6*6, 120)
        self.fc2 = torch.nn.Linear(120, 84)
        self.fc3 = torch.nn.Linear(84, 10)
    
    def forward(self, input):
        input = torch.nn.functional.max_pool2d(
            torch.nn.functional.relu(self.conv1(input)), 
            (2, 2)
        )
        input = torch.nn.functional.max_pool2d(
            torch.nn.functional.relu(self.conv2(input)),
            2
        )
        input = input.view(-1, self.num_flat_features(input))
        input = torch.nn.functional.relu(self.fc1(input))
        input = torch.nn.functional.relu(self.fc2(input))
        output = self.fc3(input)
        return output

    def num_flat_features(self, input):
        size = input.size()[1:]
        num = 1
        for s in size:
            num = num * s
        return num

## 实例话网络

In [2]:
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


## 查看参数

In [4]:
params = list(net.parameters())
# print(params)
print(params[0].size())

torch.Size([6, 1, 3, 3])


## 初始化随机输入

In [9]:
input = torch.randn(1, 1, 32, 32)
print(input)
out = net(input)
print(out)

tensor([[[[ 1.4556,  0.7649,  0.0063,  ...,  2.0683,  1.2691, -0.4167],
          [-1.9184,  0.6124, -1.4503,  ..., -0.5787, -0.9259, -0.7016],
          [ 0.8113, -0.1982,  0.6460,  ...,  0.5894,  0.0507,  0.6753],
          ...,
          [-0.7751, -0.1513,  1.1919,  ...,  1.2714, -1.3903,  0.0237],
          [ 0.7113,  2.2669, -2.0401,  ...,  0.2541, -0.1589,  0.5107],
          [ 1.6336, -1.2237,  0.2752,  ...,  1.6409,  0.1877, -0.5024]]]])
tensor([[ 0.0890, -0.0096,  0.0514,  0.0966,  0.0061,  0.0740,  0.0617,  0.1087,
         -0.0801,  0.0313]], grad_fn=<AddmmBackward>)


### 用随机梯度将所有的参数和反向传播的梯度缓冲区归零

In [10]:
net.zero_grad()
out.backward(torch.randn(1, 10))

## 损失函数

In [14]:
output = net(input)
target = torch.randn(10)
print(target)
target = target.view(1, -1)
print(target)
criterion = torch.nn.MSELoss()
loss = criterion(output, target)
print(loss)

tensor([ 1.7268, -1.0941, -0.5264,  0.0278, -0.5645,  1.2786,  0.2433, -0.3703,
         0.6218,  0.4939])
tensor([[ 1.7268, -1.0941, -0.5264,  0.0278, -0.5645,  1.2786,  0.2433, -0.3703,
          0.6218,  0.4939]])
tensor(0.6943, grad_fn=<MseLossBackward>)


## 向后走几步

In [19]:
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<MseLossBackward object at 0x12ac1dad0>
<AddmmBackward object at 0x129f1f390>
<AccumulateGrad object at 0x12aeb7fd0>


## 反向传播

In [20]:
# 将所有的参数归零
net.zero_grad()
print('第一个卷积层反向传播前')
print(net.conv1.bias.grad)
# 反向传播
loss.backward()
print('第一个卷积层反向传播后')
print(net.conv1.bias.grad)

第一个卷积层反向传播前
tensor([0., 0., 0., 0., 0., 0.])
第一个卷积层反向传播后
tensor([-0.0047,  0.0041,  0.0003,  0.0020,  0.0240, -0.0145])


## 更新权重-[update weight]

> weight = weight - learning_rate * gradient

### 原理

    learning_rate = 0.01
    for p in net.parameters():
        p.data.sub_(p.grad.data * learning_rate)


In [21]:
optimizer = torch.optim.SGD(
    net.parameters(),
    lr=0.01
)
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()