In [None]:
import numpy as np

# 假设已有MLP类定义，含forward、backward、update_weights等方法

def train_minibatch(model, X, y, epochs=10, batch_size=32, learning_rate=0.01):
    n_samples = X.shape[0]

    for epoch in range(epochs):
        permutation = np.random.permutation(n_samples)
        X_shuffled = X[permutation]
        y_shuffled = y[permutation]
        
        for i in range(0, n_samples, batch_size):
            X_batch = X_shuffled[i:i+batch_size]
            y_batch = y_shuffled[i:i+batch_size]

            # 前向传播
            outputs = model.forward(X_batch)
            
            # 计算损失
            loss = model.loss(outputs, y_batch)

            # 反向传播
            model.backward(outputs, y_batch)

            # 更新权重
            model.update_weights(learning_rate)

        # 每轮epoch结束后可输出训练进度
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}')


In [None]:
import numpy as np

class AdamOptimizer:
    def __init__(self, parameters, lr=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.parameters = parameters  # 模型待优化参数列表
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon

        # 初始化一阶矩、二阶矩
        self.m = [np.zeros_like(p) for p in self.parameters]
        self.v = [np.zeros_like(p) for p in self.parameters]
        self.t = 0

    def step(self, grads):
        self.t += 1
        updated_params = []
        for i, (p, grad) in enumerate(zip(self.parameters, grads)):
            self.m[i] = self.beta1 * self.m[i] + (1 - self.beta1) * grad
            self.v[i] = self.beta2 * self.v[i] + (1 - self.beta2) * (grad ** 2)

            # 修正一阶矩、二阶矩
            m_hat = self.m[i] / (1 - self.beta1 ** self.t)
            v_hat = self.v[i] / (1 - self.beta2 ** self.t)

            # 参数更新
            p_update = self.lr * m_hat / (np.sqrt(v_hat) + self.epsilon)
            self.parameters[i] -= p_update

            updated_params.append(self.parameters[i])

        return updated_params
