## 神经网络包nn和优化器optm

 - torch.nn: pytorch专门为神经网络设计的模型化接口
 - `import torch.nn as nn`
 - `import torch.nn.functional as F`

In [2]:
import torch
torch.__version__
import torch.nn as nn
import torch.nn.functional as F

### 定义一个网络

- 继承nn.Module，实现它的forward方法。Pytorch会根据autograd，自动实现backward函数。
- forward函数中可使用任何的Tensor支持的函数，还可以使用if、for、print、log等python语法

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        # 卷积层 单通道，6输出通道，卷积核为3*3
        self.conv1=nn.Conv2d(1,6,3)
        # 线性层 输入1350个特征，输出10个特征
        self.fc1=nn.Linear(1350,10)
    # 正向传播
    def forward(self,x):
        print(x.size())
        # 卷积-> 激活 -> 池化
        x=self.conv1(x)
        x=F.relu(x)
        print(x.size())
        x=F.max_pool2d(x,(2,2))
        x=F.relu(x)
        print(x.size())
        # reshape
        x=x.view(x.size()[0],-1)
        print(x.size())
        x=self.fc1(x)
        return x

In [4]:
net=Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=1350, out_features=10, bias=True)
)


In [5]:
# 网络的可学习参数
for parameters in net.parameters():
    print(parameters)

Parameter containing:
tensor([[[[-0.0079,  0.1297, -0.2451],
          [ 0.1030,  0.1361,  0.1590],
          [ 0.2589,  0.3319,  0.0626]]],


        [[[ 0.2521, -0.2298,  0.1167],
          [-0.1748,  0.1688, -0.1334],
          [-0.2969, -0.0790, -0.0943]]],


        [[[-0.2975, -0.0407, -0.0770],
          [ 0.3035,  0.0179,  0.2448],
          [ 0.1437,  0.2363, -0.0439]]],


        [[[ 0.0701, -0.0349, -0.1852],
          [-0.2790, -0.0468,  0.2768],
          [-0.1921,  0.0059,  0.1197]]],


        [[[-0.3148,  0.0139,  0.1341],
          [ 0.2395, -0.2048, -0.1377],
          [ 0.0726, -0.0313, -0.3294]]],


        [[[-0.0025,  0.3097,  0.1006],
          [-0.2884,  0.2416, -0.2444],
          [-0.1085, -0.0013,  0.1929]]]], requires_grad=True)
Parameter containing:
tensor([-0.3189, -0.2133, -0.0736,  0.2419,  0.1609, -0.2002],
       requires_grad=True)
Parameter containing:
tensor([[-0.0041,  0.0105, -0.0018,  ...,  0.0131,  0.0230,  0.0180],
        [-0.0057,  0.0131,  0

In [24]:
# 获取返回可学习的参数及名称
for name,parameters in net.named_parameters():
    print(name,parameters.size())

conv1.weight torch.Size([6, 1, 3, 3])
conv1.bias torch.Size([6])
fc1.weight torch.Size([10, 1350])
fc1.bias torch.Size([10])


- forward()的输入输出都是Tensor

In [27]:
input=torch.randn(1,1,32,32)
out=net(input)
out.size()

torch.Size([1, 1, 32, 32])
torch.Size([1, 6, 30, 30])
torch.Size([1, 6, 15, 15])
torch.Size([1, 1350])


torch.Size([1, 10])

- 反向传播前，先要将所有参数的梯度清零

In [28]:
net.zero_grad()
out.backward(torch.ones_like(out))
print(net.conv1.weight)
print(net.conv1.bias)
print(net.fc1.weight)
print(net.fc1.bias)

Parameter containing:
tensor([[[[-0.0079,  0.1297, -0.2451],
          [ 0.1030,  0.1361,  0.1590],
          [ 0.2589,  0.3319,  0.0626]]],


        [[[ 0.2521, -0.2298,  0.1167],
          [-0.1748,  0.1688, -0.1334],
          [-0.2969, -0.0790, -0.0943]]],


        [[[-0.2975, -0.0407, -0.0770],
          [ 0.3035,  0.0179,  0.2448],
          [ 0.1437,  0.2363, -0.0439]]],


        [[[ 0.0701, -0.0349, -0.1852],
          [-0.2790, -0.0468,  0.2768],
          [-0.1921,  0.0059,  0.1197]]],


        [[[-0.3148,  0.0139,  0.1341],
          [ 0.2395, -0.2048, -0.1377],
          [ 0.0726, -0.0313, -0.3294]]],


        [[[-0.0025,  0.3097,  0.1006],
          [-0.2884,  0.2416, -0.2444],
          [-0.1085, -0.0013,  0.1929]]]], requires_grad=True)
Parameter containing:
tensor([-0.3189, -0.2133, -0.0736,  0.2419,  0.1609, -0.2002],
       requires_grad=True)
Parameter containing:
tensor([[-0.0041,  0.0105, -0.0018,  ...,  0.0131,  0.0230,  0.0180],
        [-0.0057,  0.0131,  0

- torch.nn只支持mini-batches，不支持一次只输入一个样本，每次需要一个batch。所以上述输入

## 损失函数

In [22]:
y=torch.arange(0,10).view(1,10).float()
print(y)
criterion=nn.MSELoss() # 计算均方误差
loss=criterion(out,y)
print(loss)

tensor([[0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]])
tensor(29.7409, grad_fn=<MseLossBackward>)


## 优化器

- 反向传播计算所有参数的梯度后，需要优化方法更新权重和参数
- torch.optim提供大多数的优化方法，例如RMSProp、Adam、SGD等

In [31]:
import torch.optim
out=net(input)
# 输出初始参数
print(net.conv1.weight)
print(net.conv1.bias)
print(net.fc1.weight)
print(net.fc1.bias)
# 定义loss计算方法
criterion=nn.MSELoss()
loss=criterion(out,y)
# 定义优化器
optimizer=torch.optim.SGD(net.parameters(),lr=0.01)
optimizer.zero_grad() # 先梯度清零，与net.zero_grad()效果一致
loss.backward()
# 更新参数
optimizer.step() 
# 查看更新后的参数
print(net.conv1.weight)
print(net.conv1.bias)
print(net.fc1.weight)
print(net.fc1.bias)

torch.Size([1, 1, 32, 32])
torch.Size([1, 6, 30, 30])
torch.Size([1, 6, 15, 15])
torch.Size([1, 1350])
Parameter containing:
tensor([[[[-0.0079,  0.1297, -0.2451],
          [ 0.1030,  0.1361,  0.1590],
          [ 0.2589,  0.3319,  0.0626]]],


        [[[ 0.2521, -0.2298,  0.1167],
          [-0.1748,  0.1688, -0.1334],
          [-0.2969, -0.0790, -0.0943]]],


        [[[-0.2975, -0.0407, -0.0770],
          [ 0.3035,  0.0179,  0.2448],
          [ 0.1437,  0.2363, -0.0439]]],


        [[[ 0.0701, -0.0349, -0.1852],
          [-0.2790, -0.0468,  0.2768],
          [-0.1921,  0.0059,  0.1197]]],


        [[[-0.3148,  0.0139,  0.1341],
          [ 0.2395, -0.2048, -0.1377],
          [ 0.0726, -0.0313, -0.3294]]],


        [[[-0.0025,  0.3097,  0.1006],
          [-0.2884,  0.2416, -0.2444],
          [-0.1085, -0.0013,  0.1929]]]], requires_grad=True)
Parameter containing:
tensor([-0.3189, -0.2133, -0.0736,  0.2419,  0.1609, -0.2002],
       requires_grad=True)
Parameter containi