In [2]:
import torch as t
from torch import nn

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 6, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(16 * 5 * 5, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 10)
        )
    def forward(self, x):
        x = self.features(x)
        x = x.view(-1, 16 * 5 * 5) # 这个x的大小是根据后期数据size来定
        x = self.classifier(x)
        return x
# 构建网络
net = Net()

In [7]:
from torch import optim
optimizer = optim.SGD(params=net.parameters(), lr=1)
optimizer.zero_grad()
input = t.randn(1, 3, 32, 32, requires_grad=True)
output = net(input)

output.backward(t.ones(1,10)) # fake backward, only for test
optimizer.step()

#### pytorch的优化器还允许我们对不同的参数设置不同的学习速率的
1. 为不同子网络（层）设置不同的学习速率

In [8]:
from torch import optim
# 如果对某个参数不指定学习率，就使用默认学习率
optimizer = optim.SGD([
    {'params': net.features.parameters()},  # 学习速率为1e-5
    {'params': net.classifier.parameters(), 'lr':1e-2}
], lr = 1e-5)

2. 只为两个全连接层设置较大的学习速率，其余层的学习率较小

In [23]:
special_layers = nn.ModuleList([net.classifier[0], net.classifier[3]])
# id是python内置的函数, 返回对象的标识码
special_layers_params = list(map(id, special_layers.parameters()))

base_params = filter(lambda p: id(p) not in special_layers_params, net.parameters())

optimizer = optim.SGD([
    {'params': base_params},
    {'params': special_layers.parameters(), 'lr' : 0.01}
], lr=1e-5)