In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

## 定义网络

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        # 1 input image channel, 6 output channels, 5x5 square convolution kernel
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        # an affine operation: y = Wx + b
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [3]:
net = Net()
print(net, type(Net))

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
) <class 'type'>


In [4]:
params = list(net.parameters())
# print(params)
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 5, 5])


## 定义输入，前向传播

In [5]:
from torch.autograd import Variable
input = Variable(torch.randn(1, 1, 32, 32))
output = net(input)
print(output)
print(output.type(), type(output))
print(output.size())

tensor([[-0.0851, -0.0634, -0.0953, -0.0334, -0.0953, -0.1915, -0.0252,
          0.1188, -0.0320,  0.1387]])
torch.FloatTensor <class 'torch.Tensor'>
torch.Size([1, 10])


In [6]:
"""
# 对所有的参数的梯度缓冲区进行归零
net.zero_grad()

# 使用随机的梯度进行反向传播
out.backward(torch.randn(1, 10))
"""
output_temp = output.clone() # 深复制，不会改变output
print(output_temp.transpose_(0, 1)) # 这里会改变output_temp形状
output = output[:,]
print(output_temp)
print(output_temp[0])
print(output_temp[0].size())

tensor([[-0.0851],
        [-0.0634],
        [-0.0953],
        [-0.0334],
        [-0.0953],
        [-0.1915],
        [-0.0252],
        [ 0.1188],
        [-0.0320],
        [ 0.1387]])
tensor([[-0.0851],
        [-0.0634],
        [-0.0953],
        [-0.0334],
        [-0.0953],
        [-0.1915],
        [-0.0252],
        [ 0.1188],
        [-0.0320],
        [ 0.1387]])
tensor(1.00000e-02 *
       [-8.5070])
torch.Size([1])


## 设置目标值

In [7]:
# 设置目标值
target = Variable(torch.arange(1, 11))
print(target)
print(target.size())
target = target.view(1, -1)
print(target)
print(target.size())

tensor([  1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.])
torch.Size([10])
tensor([[  1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.]])
torch.Size([1, 10])


## 设置损失函数

In [8]:
# 设置损失函数
criterion = nn.MSELoss()
print(type(criterion))

<class 'torch.nn.modules.loss.MSELoss'>


## 计算损失函数

In [9]:
loss = criterion(output, target)
print(type(loss))
print(loss.type())
print(loss)

<class 'torch.Tensor'>
torch.FloatTensor
tensor(38.5867)


In [10]:
print(dir(loss))

['__abs__', '__add__', '__and__', '__array__', '__array_wrap__', '__bool__', '__class__', '__deepcopy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__div__', '__doc__', '__eq__', '__float__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__', '__idiv__', '__ilshift__', '__imul__', '__index__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__long__', '__lshift__', '__lt__', '__matmul__', '__mod__', '__module__', '__mul__', '__ne__', '__neg__', '__new__', '__nonzero__', '__or__', '__pow__', '__radd__', '__rdiv__', '__reduce__', '__reduce_ex__', '__repr__', '__rmul__', '__rpow__', '__rshift__', '__rsub__', '__rtruediv__', '__setattr__', '__setitem__', '__setstate__', '__sizeof__', '__str__', '__sub__', '__subclasshook__', '__truediv__', '__weakref__', '__xor__', '_abs', '_addmv', '_addmv_', '_addr', '_

In [11]:
print(loss.grad_fn)
# print(loss.grad_fn.type())
print(type(loss.grad_fn))

print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<MseLossBackward object at 0x000002DB8F1DC518>
<class 'MseLossBackward'>
<AliasBackward object at 0x000002DB8F1DC518>
<AddmmBackward object at 0x000002DB8F1DC6D8>


## 反向传播

In [12]:
# 对所有的参数的梯度缓冲区进行归零
net.zero_grad()

print('conv1.bias before backward')
print(net.conv1.bias)
print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

# 将loss的梯度进行反向传播
loss.backward()
print('conv1.bias after backward')
print(net.conv1.bias)
print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias before backward
Parameter containing:
tensor([-0.1806,  0.0430,  0.1913, -0.1282, -0.1779, -0.0264])
conv1.bias.grad before backward
None
conv1.bias after backward
Parameter containing:
tensor([-0.1806,  0.0430,  0.1913, -0.1282, -0.1779, -0.0264])
conv1.bias.grad after backward
tensor([ 0.0923,  0.0654, -0.0013, -0.1094, -0.1548,  0.0237])


## 权重更新（根据梯度来更新权重）

In [13]:
# 更新权重
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [14]:
print('conv1.bias.grad after update')
print(net.conv1.bias.grad)

conv1.bias.grad after update
tensor([ 0.0923,  0.0654, -0.0013, -0.1094, -0.1548,  0.0237])


In [15]:
print('conv1.bias after update')
print(net.conv1.bias)

conv1.bias after update
Parameter containing:
tensor([-0.1816,  0.0423,  0.1913, -0.1271, -0.1764, -0.0266])


## 使用内置梯度更新策略来更新权重

In [18]:
# 将神经网络待更新的权重传入优化器
optimizer = torch.optim.SGD(net.parameters(), lr=0.001 * 6)
# 优化器梯度置零
optimizer.zero_grad()
    
for i in range(10):
    optimizer.zero_grad()
    # 前向传播一次
    output = net(input)

    # 计算损失函数
    loss = criterion(output, target)
    print('\nloss is %0.7f' % loss.item())
  
    # 反向传播一次，更新梯度
    loss.backward()
    
#     print('\nconv1.bias before update')
#     print(net.conv1.bias)
    # 更新权重
    optimizer.step()
#     print('\nconv1.bias after update')
#     print(net.conv1.bias)
    optimizer.zero_grad()


loss is 37.5668449

loss is 37.2013550

loss is 36.7162094

loss is 35.9450073

loss is 34.7345657

loss is 32.5138130

loss is 27.9811897

loss is 18.1138763

loss is 2.5353034

loss is 2.2518749
