In [2]:
import torch.nn as nn
import torch
import torchvision.transforms as transforms

In [3]:
class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),nn.ReLU(),
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=1),nn.ReLU(),
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=1),nn.ReLU(),
        )
        self.block2 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),nn.ReLU(),
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=1),nn.ReLU(),
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=1),nn.ReLU(),)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.avg_pool = nn.AdaptiveAvgPool2d((1,1))

    def forward(self, x):
        x = self.block1(x)
        x = self.pool1(x)
        x = self.block2(x)
        x = self.pool2(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1).contiguous()
        return x

model = Model()
for name,layer in model.named_children():
    print(f"name: {name}, layer: {layer}")

for name, param in model.named_parameters():
    print(f"name: {name}")

name: block1, layer: Sequential(
  (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
  (3): ReLU()
  (4): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
  (5): ReLU()
)
name: block2, layer: Sequential(
  (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
  (3): ReLU()
  (4): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
  (5): ReLU()
)
name: pool1, layer: MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
name: pool2, layer: MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
name: avg_pool, layer: AdaptiveAvgPool2d(output_size=(1, 1))
name: block1.0.weight
name: block1.0.bias
name: block1.2.weight
name: block1.2.bias
name: block1.4.weight
name: block1.4.bias
name: block2.0.weight
name: block2.0.bias
name: block2.2.weight
name: block2.2.bias
name: block2.

冻结参数

In [4]:
#这里面要用layer_name
for param in model.avg_pool.parameters():
    param.requires_grad = False

"""
通常可以固定一些靠近数据端的一些层
"""

'\n通常可以固定一些靠近数据端的一些层\n'

修改某些特定层

In [7]:
model.avg_pool = nn.AdaptiveMaxPool2d((1,1))
print(model.avg_pool._get_name())

AdaptiveMaxPool2d


分层调整学习率

In [None]:
from torch.optim import Adam

# 分组参数：卷积层参数 vs 全连接层参数
conv_params = []
fc_params = []
for name, param in model.named_parameters():
    if "fc" in name:
        fc_params.append(param)
    else:
        conv_params.append(param)

# 不同层设置不同学习率
optimizer = Adam([
    {"params": conv_params, "lr": 1e-5},  # 底层学习率低
    {"params": fc_params, "lr": 1e-3}     # 顶层学习率高
])