In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Define a simple 2-layer fully connected network
class SimpleNet(nn.Module):
    def __init__(self, bn=False):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(1, 10)
        self.bn = nn.BatchNorm1d(10) if bn else nn.Identity()
        self.fc2 = nn.Linear(10, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(self.bn(x))
        return x

# Generate random data that roughly follows a quadratic function
np.random.seed(0)
x = np.random.rand(100, 1).astype(np.float32) * 4 - 2  # Random x values between -2 and 2
y = x**2 + np.random.randn(100, 1).astype(np.float32) * 0.1  # Quadratic function with noise

x_tensor = torch.from_numpy(x)
y_tensor = torch.from_numpy(y)

# Redefining the training function to allow weight updates in eval mode
def train_with_eval_mode_update(model, x_tensor, y_tensor, optimizer, criterion, epochs=1000, eval_mode=False):
    for epoch in range(epochs):
        if eval_mode:
            model.eval()
        else:
            model.train()

        # Forward pass
        outputs = model(x_tensor)
        loss = criterion(outputs, y_tensor)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()  # Updating weights in both modes

        if epoch % 100 == 0:
            print(f'Epoch [{epoch}/{epochs}], Loss: {loss.item():.4f}, Mode: {"Eval" if eval_mode else "Train"}')

# initialize model, criterion, and optimizer to ensure same starting conditions
model_train = SimpleNet(bn=False)
model_eval = SimpleNet(bn=False)

# Ensuring the initial weights are the same for both models
model_eval.load_state_dict(model_train.state_dict())

criterion = nn.MSELoss()
optimizer_train = optim.SGD(model_train.parameters(), lr=0.01)
optimizer_eval = optim.SGD(model_eval.parameters(), lr=0.01)

# Train the model in training mode
print("Training in normal (train) mode...")
train_with_eval_mode_update(model_train, x_tensor, y_tensor, optimizer_train, criterion, epochs=300)

# Train the model in evaluation mode with weight updates
print("\nTraining in eval mode with weight updates...")
train_with_eval_mode_update(model_eval, x_tensor, y_tensor, optimizer_eval, criterion, epochs=300, eval_mode=True)

  from .autonotebook import tqdm as notebook_tqdm


Training in normal (train) mode...
Epoch [0/300], Loss: 1.7658, Mode: Train
Epoch [100/300], Loss: 0.6877, Mode: Train
Epoch [200/300], Loss: 0.3594, Mode: Train

Training in eval mode with weight updates...
Epoch [0/300], Loss: 1.7658, Mode: Eval
Epoch [100/300], Loss: 0.6877, Mode: Eval
Epoch [200/300], Loss: 0.3594, Mode: Eval


In [None]:
# 把模型的 BN 层打开
model_train = SimpleNet(bn=True)
model_eval = SimpleNet(bn=True)
# Ensuring the initial weights are the same for both models
model_eval.load_state_dict(model_train.state_dict())

criterion = nn.MSELoss()
optimizer_train = optim.SGD(model_train.parameters(), lr=0.01)
optimizer_eval = optim.SGD(model_eval.parameters(), lr=0.01)

# Train the model in training mode
print("Training in normal (train) mode...")
train_with_eval_mode_update(model_train, x_tensor, y_tensor, optimizer_train, criterion, epochs=300)

# Train the model in evaluation mode with weight updates
print("\nTraining in eval mode with weight updates...")
train_with_eval_mode_update(model_eval, x_tensor, y_tensor, optimizer_eval, criterion, epochs=300, eval_mode=True)

Training in normal (train) mode...
Epoch [0/300], Loss: 2.4224, Mode: Train
Epoch [100/300], Loss: 0.0195, Mode: Train
Epoch [200/300], Loss: 0.0170, Mode: Train

Training in eval mode with weight updates...
Epoch [0/300], Loss: 1.8483, Mode: Eval
Epoch [100/300], Loss: 0.4139, Mode: Eval
Epoch [200/300], Loss: 0.1168, Mode: Eval


训练时（model.train()）
使用当前 batch 的均值和方差进行标准化：
并使用它们更新内部的 running mean 和 running var：
running_mean = (1 - momentum) * running_mean + momentum * batch_mean

推理时（model.eval()）
使用 保存的 running mean 和 running var 进行归一化：
不再使用当前 batch 的统计信息，也不再更新 running 状态。