In [4]:
import torch
import torch.nn as nn

In [10]:
class SimpleFNN(nn.Module):
    def __init__(self, in_features: int, out_features: int):
        super().__init__()
        self.fc1 = nn.Sequential(
            nn.Linear(in_features, out_features),
            nn.SELU(),
            nn.BatchNorm2d(in_features),
            nn.Linear(in_features, in_features),
        )
        self.fc2 = nn.Sequential(
            nn.Linear(in_features, out_features),
            nn.SELU(),
            nn.BatchNorm2d(out_features),
            nn.Linear(out_features, out_features),
        )
        self.out_fc = nn.Linear(out_features, out_features)
    def forward(self, x):
        x = self.fc2(self.fc1(x))
        return self.out_fc(x)

model = SimpleFNN(10, 5)
# model.parameters()
optimizer = torch.optim.AdamW([
    {'params': model.fc1.parameters(), 'lr': 1e-3},
    {'params': model.fc2.parameters(), 'lr': 1e-4, 'weight_decay': 0.01},],
    lr= 1e-5)

print("optimizer.defaults：", optimizer.defaults)
print("optimizer.param_groups长度：", len(optimizer.param_groups))
print("optimizer.param_groups一个元素包含的键：", optimizer.param_groups[0].keys())

optimizer.defaults： {'lr': 1e-05, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0.01, 'amsgrad': False, 'foreach': None, 'maximize': False, 'capturable': False, 'differentiable': False, 'fused': None}
optimizer.param_groups长度： 2
optimizer.param_groups一个元素包含的键： dict_keys(['params', 'lr', 'betas', 'eps', 'weight_decay', 'amsgrad', 'foreach', 'maximize', 'capturable', 'differentiable', 'fused'])


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import *
import matplotlib.pyplot as plt
import numpy as np

class SimpleFNN(nn.Module):
    def __init__(self, in_features: int, out_features: int):
        super().__init__()
        self.fc1 = nn.Sequential(
            nn.Linear(in_features, out_features),
            nn.SELU(),
            nn.BatchNorm1d(out_features),  # 改为 BatchNorm1d 以适应 2D 输入
            nn.Linear(out_features, in_features),
        )
        self.fc2 = nn.Sequential(
            nn.Linear(in_features, out_features),
            nn.SELU(),
            nn.BatchNorm1d(out_features),  # 改为 BatchNorm1d
            nn.Linear(out_features, out_features),
        )
        self.out_fc = nn.Linear(out_features, out_features)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        return self.out_fc(x)

# 生成随机数据
def generate_data(batch_size=32, in_features=10):
    x = torch.randn(batch_size, in_features)
    y = torch.randn(batch_size, in_features)
    return x, y

# 训练和记录学习率
def train_and_record_lr(scheduler, scheduler_name, epochs=1000, steps_per_epoch=10):
    lrs = []
    model = SimpleFNN(in_features=10, out_features=10)
    optimizer = optim.SGD(model.parameters(), lr=0.1)
    criterion = nn.MSELoss()
    
    if scheduler_name == "CyclicLR" or scheduler_name == "OneCycleLR":
        # 基于步数的调度器
        scheduler = init_scheduler(scheduler_name, optimizer, steps_per_epoch * epochs)
        for epoch in range(epochs):
            for _ in range(steps_per_epoch):
                x, y = generate_data()
                optimizer.zero_grad()
                output = model(x)
                loss = criterion(output, y)
                loss.backward()
                optimizer.step()
                scheduler.step()
                lrs.append(scheduler.get_last_lr()[0])
    elif scheduler_name == "ReduceLROnPlateau":
        # 基于指标的调度器
        scheduler = init_scheduler(scheduler_name, optimizer)
        for epoch in range(epochs):
            x, y = generate_data()
            optimizer.zero_grad()
            output = model(x)
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()
            # 模拟验证损失（随机值）
            val_loss = np.random.uniform(0.1, 1.0)
            scheduler.step(val_loss)
            lrs.append(scheduler.get_last_lr()[0])
    else:
        # 基于 epoch 的调度器
        scheduler = init_scheduler(scheduler_name, optimizer)
        for epoch in range(epochs):
            x, y = generate_data()
            optimizer.zero_grad()
            output = model(x)
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()
            scheduler.step()
            lrs.append(scheduler.get_last_lr()[0])
    
    return lrs

def init_scheduler(name, optimizer, total_steps=None):
    if name == "LambdaLR":
        return LambdaLR(optimizer, lr_lambda=lambda epoch: 0.95 ** epoch)
    elif name == "MultiplicativeLR":
        return MultiplicativeLR(optimizer, lr_lambda=lambda epoch: 0.9)
    elif name == "StepLR":
        return StepLR(optimizer, step_size=30, gamma=0.1)
    elif name == "MultiStepLR":
        return MultiStepLR(optimizer, milestones=[30, 80], gamma=0.1)
    elif name == "ConstantLR":
        return ConstantLR(optimizer, factor=0.1, total_iters=5)
    elif name == "LinearLR":
        return LinearLR(optimizer, start_factor=0.3, end_factor=1.0, total_iters=5)
    elif name == "ExponentialLR":
        return ExponentialLR(optimizer, gamma=0.9)
    elif name == "CosineAnnealingLR":
        return CosineAnnealingLR(optimizer, T_max=10, eta_min=0)
    elif name == "ReduceLROnPlateau":
        return ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
    elif name == "CyclicLR":
        return CyclicLR(optimizer, base_lr=0.001, max_lr=0.1, step_size_up=total_steps//4)
    elif name == "OneCycleLR":
        return OneCycleLR(optimizer, max_lr=0.1, total_steps=total_steps, pct_start=0.3)
    elif name == "CosineAnnealingWarmRestarts":
        return CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=0)

def plot_lr_curves(schedulers, lr_histories, epochs=100):
    plt.figure(figsize=(15, 10))
    for i, (scheduler_name, lrs) in enumerate(zip(schedulers, lr_histories)):
        plt.subplot(4, 3, i+1)
        plt.plot(lrs, label=scheduler_name)
        plt.title(scheduler_name)
        plt.xlabel("Step" if scheduler_name in ["CyclicLR", "OneCycleLR"] else "Epoch")
        plt.ylabel("Learning Rate")
        plt.grid(True)
        plt.legend()
    plt.tight_layout()
    plt.savefig("lr_schedulers.png")
    plt.close()

if __name__ == "__main__":
    schedulers = [
        "LambdaLR", "MultiplicativeLR", "StepLR", "MultiStepLR", "ConstantLR",
        "LinearLR", "ExponentialLR", "CosineAnnealingLR", "ReduceLROnPlateau",
        "CyclicLR", "OneCycleLR", "CosineAnnealingWarmRestarts"
    ]
    lr_histories = []
    
    for scheduler_name in schedulers:
        print(f"Testing {scheduler_name}...")
        lrs = train_and_record_lr(scheduler_name, scheduler_name, epochs=100, steps_per_epoch=10)
        lr_histories.append(lrs)
    
    plot_lr_curves(schedulers, lr_histories)
    print("Learning rate curves saved as 'lr_schedulers.png'")

Testing LambdaLR...
Testing MultiplicativeLR...
Testing StepLR...
Testing MultiStepLR...
Testing ConstantLR...
Testing LinearLR...
Testing ExponentialLR...
Testing CosineAnnealingLR...
Testing ReduceLROnPlateau...
Testing CyclicLR...
Testing OneCycleLR...
Testing CosineAnnealingWarmRestarts...
Learning rate curves saved as 'lr_schedulers.png'


In [None]:
torch.optim.lr_scheduler.CosineAnnealingLR