In [1]:
import torch
from torch import nn
import pytorch_lightning as pl
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau

# 定义获取所有层的函数
def get_all_layers(module, layers=None):
    if layers is None:
        layers = []
    for child in module.children():
        layers.append(child)
        get_all_layers(child, layers)
    return layers

# 定义一个简单的网络结构
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.layer1 = nn.Linear(10, 5)
        self.layer2 = nn.Linear(5, 2)
    
    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        return x

# 集成config_different_lr_optimizer到PyTorch Lightning模型
class LitModel(pl.LightningModule):
    def __init__(self, args):
        super(LitModel, self).__init__()
        self.network = SimpleNet()
        self.args = args
    
    def forward(self, x):
        return self.network(x)
    
    def config_different_lr_optimizer(self):
        # 检查并设置默认的学习率
        if not hasattr(self.args, 'learnable_parameter_learning_rate'):
            setattr(self.args, 'learnable_parameter_learning_rate', self.args.learning_rate)
        
        layers = get_all_layers(self.network)
        
        parameters_conv = []
        for layer in layers:
            if isinstance(layer, nn.Linear):
                # 为线性层的权重设置不同的学习率
                parameters_conv.append({'params': layer.weight, 'lr': self.args.learning_rate, 'weight_decay': self.args.weight_decay})
                
        base_params = filter(lambda p: id(p) not in [id(param['params']) for param in parameters_conv], self.network.parameters())
        
        optimizer = Adam([
            {'params': base_params, 'lr': self.args.learnable_parameter_learning_rate, 'weight_decay': self.args.weight_decay},
            *parameters_conv
        ])
        
        return optimizer

    def configure_optimizers(self):
        optimizer = self.config_different_lr_optimizer()
        scheduler = {
            "scheduler": ReduceLROnPlateau(optimizer),
            "monitor": "val_loss",
            "frequency": 1
        }
        return {"optimizer": optimizer, "lr_scheduler": scheduler}

# 模拟args对象和测试函数
class Args:
    learning_rate = 0.001
    weight_decay = 0.0001
    learnable_parameter_learning_rate = 0.0005  # 假设的额外属性

def test_config_different_lr_optimizer():
    args = Args()
    model = LitModel(args)
    optimizer_config = model.configure_optimizers()
    optimizer = optimizer_config["optimizer"]
    for group in optimizer.param_groups:
        print(group)

# 执行测试
test_config_different_lr_optimizer()


  from .autonotebook import tqdm as notebook_tqdm


{'params': [Parameter containing:
tensor([-0.1677,  0.1214,  0.2673,  0.0457, -0.0326], requires_grad=True), Parameter containing:
tensor([-0.4060, -0.1955], requires_grad=True)], 'lr': 0.0005, 'weight_decay': 0.0001, 'betas': (0.9, 0.999), 'eps': 1e-08, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': None}
{'params': [Parameter containing:
tensor([[ 0.2264, -0.1615, -0.2389,  0.2064, -0.0162, -0.3075, -0.1194, -0.2321,
          0.0734, -0.0276],
        [ 0.2333,  0.1604,  0.0593,  0.1787, -0.0939,  0.2699, -0.2416, -0.0875,
         -0.2475,  0.0998],
        [-0.1323, -0.2750, -0.1204, -0.1857, -0.0255,  0.1255,  0.0262,  0.2441,
         -0.1546,  0.0575],
        [-0.2895, -0.2489, -0.2097, -0.2053, -0.1518, -0.0767,  0.2292, -0.1299,
         -0.0530,  0.0015],
        [ 0.1378,  0.1026, -0.0169,  0.1740, -0.1494,  0.0857,  0.1532,  0.2018,
          0.0559,  0.0380]], requires_grad=True)], 'lr': 0.001, 'weight_decay':