In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        
        #nn.Conv2d 接受一个4维的张量，即nSamples x nChannels x Height x Width
        # 1 input image channel, 6 output channels, 3x3 square convolution
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        self.fc1 = nn.Linear(16 * 6 * 6, 120)  # 6*6 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self,x):
        size = x.size()[1:] #除去批处理维度的其他所有维度
        num_features = 1
        for s in size:
            num_features*=s
        return num_features
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [13]:
#只需要定义 forward 函数，backward函数会在使用autograd时自动定义
#可学习参数可以通过net.parameters()返回
params = list(net.parameters())
print(params[8].size())  # conv1's .weight
#3x3,1channel,6nodes

torch.Size([10, 84])


In [14]:
print(params[8][0])

tensor([-6.4684e-02,  3.4561e-02,  7.4980e-02,  8.5394e-02,  1.7106e-03,
         4.7966e-02,  4.7496e-02, -2.7433e-03,  1.7400e-02,  5.5115e-02,
         6.7925e-02,  3.5371e-02,  6.6226e-02,  1.1103e-02, -2.4573e-02,
         5.6788e-04, -8.8122e-02,  7.8587e-02, -5.6167e-02, -4.6688e-02,
         2.3285e-02, -4.4688e-02, -5.5120e-03, -9.0511e-02, -1.0067e-01,
         3.4955e-02,  2.0192e-02, -5.1164e-02, -1.5932e-02, -8.5873e-02,
         6.2057e-02, -2.0761e-03, -4.0981e-02,  7.2101e-02, -7.0125e-02,
         5.1413e-02,  1.7200e-02, -7.5259e-02,  8.8434e-02, -4.6851e-02,
         7.4234e-02, -4.1856e-02,  9.4501e-03,  3.6735e-02,  8.0596e-02,
         9.0384e-02,  1.1378e-02,  3.8251e-03, -2.3081e-02,  1.0827e-01,
         8.7125e-02, -4.9087e-02, -8.7020e-03, -7.2149e-02, -8.7786e-02,
        -2.5918e-02,  1.3950e-02,  9.7916e-02, -9.8683e-02, -7.8135e-02,
         2.7821e-02,  6.7591e-02,  5.0045e-02, -4.2832e-02,  6.3457e-05,
         7.4131e-02, -7.9049e-02, -1.0833e-01, -5.7

In [15]:
print(params[1].size())  # conv2(maxpool)'s .weight

torch.Size([6])


In [16]:
print(params[2].size())  # conv3's .weight
##3x3,6node input , 16node output

torch.Size([16, 6, 3, 3])


In [17]:
print(params[5].size())  # conv6(ReLU)'s .weight

torch.Size([120])


In [18]:
#nn.Conv2d 接受一个4维的张量，即nSamples x nChannels x Height x Width
#如果是一个单独的样本，只需要使用input.unsqueeze(0)来添加一个“假的”批大小维度。
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[ 0.0550, -0.0035, -0.0233, -0.0223, -0.0566, -0.1032, -0.1182, -0.0413,
          0.0774,  0.0759]], grad_fn=<AddmmBackward>)


In [19]:
#清零所有参数的梯度缓存，然后进行随机梯度的反向传播：
net.zero_grad()
out.backward(torch.randn(1, 10))

In [20]:
output=net(input)
target=torch.randn(10)# 假的target数据
target=target.view(1,-1)# 使目标值与数据值尺寸一致
criterion=nn.MSELoss()#loss function

loss=criterion(output,target)
loss

tensor(1.1955, grad_fn=<MseLossBackward>)

In [39]:
#input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
#      -> view -> linear -> relu -> linear -> relu -> linear
#      -> MSELoss
#      -> loss
loss.backward()
#requires_grad=True以经计算好链式法则所需的梯度

In [40]:
#let us follow a few steps backward:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward object at 0x7ff1a218af50>
<AddmmBackward object at 0x7ff21071fd10>
<AccumulateGrad object at 0x7ff1a1ad5110>


In [21]:
#需要清零现有的梯度，否则梯度将会与已有的梯度累加
net.zero_grad()
print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])


In [22]:
loss.backward()

In [23]:
print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad after backward
tensor([-0.0169, -0.0205,  0.0029, -0.0282,  0.0163, -0.0046])


In [24]:
#现在已经算了各层各节点的梯度，只欠更新权重
#weight = weight - learning_rate * gradient
#简单手动过程
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)
    #f.data=f.data-f.grad.data*learning_rate

In [25]:
print(params[8][0])

tensor([-6.4384e-02,  3.4786e-02,  7.5336e-02,  8.5394e-02,  1.9380e-03,
         4.8232e-02,  4.7496e-02, -2.7433e-03,  1.7400e-02,  5.5858e-02,
         6.8484e-02,  3.5920e-02,  6.6226e-02,  1.1103e-02, -2.4573e-02,
         7.8164e-04, -8.8000e-02,  7.9080e-02, -5.6167e-02, -4.6352e-02,
         2.3294e-02, -4.4688e-02, -5.5120e-03, -9.0511e-02, -1.0067e-01,
         3.4955e-02,  2.0192e-02, -5.1127e-02, -1.5932e-02, -8.5873e-02,
         6.2057e-02, -2.0351e-03, -4.0710e-02,  7.2788e-02, -7.0125e-02,
         5.1413e-02,  1.7200e-02, -7.5106e-02,  8.9275e-02, -4.6848e-02,
         7.4234e-02, -4.1856e-02,  9.4501e-03,  3.7328e-02,  8.1254e-02,
         9.0384e-02,  1.1378e-02,  3.8732e-03, -2.2990e-02,  1.0827e-01,
         8.7125e-02, -4.9087e-02, -8.7020e-03, -7.2149e-02, -8.7786e-02,
        -2.5918e-02,  1.4983e-02,  9.8040e-02, -9.8683e-02, -7.8135e-02,
         2.7821e-02,  6.7591e-02,  5.0045e-02, -4.2832e-02,  6.3457e-05,
         7.4417e-02, -7.8386e-02, -1.0833e-01, -5.7

In [26]:
#在使用神经网络时，可能希望使用各种不同的更新规则，
#如SGD、Nesterov-SGD、Adam、RMSProp等
import torch.optim as optim
# 创建优化器(optimizer）
optimizer = optim.SGD(net.parameters(), lr=0.01)

In [27]:
# 在训练的迭代中：
optimizer.zero_grad()   # 清零梯度缓存
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()    # 更新参数
#梯度是累加的,必须使用optimizer.zero_grad()手动清零的