In [1]:
import numpy as np
import os
import pickle

# 定义加载 CIFAR-10 数据集的函数
def load_cifar10_batch(filename):
    """加载 CIFAR-10 单个批次"""
    with open(filename, 'rb') as f:
        datadict = pickle.load(f, encoding='bytes')
        X = datadict[b'data']
        Y = datadict[b'labels']
        X = X.reshape(10000, 3, 32, 32).astype("float")
        Y = np.array(Y)
        return X, Y

def load_cifar10(ROOT):
    """加载整个 CIFAR-10 数据集"""
    xs = []
    ys = []
    for b in range(1, 6):
        f = os.path.join(ROOT, 'data_batch_%d' % b)
        X, Y = load_cifar10_batch(f)
        xs.append(X)
        ys.append(Y)
    X_train = np.concatenate(xs)
    y_train = np.concatenate(ys)
    X_test, y_test = load_cifar10_batch(os.path.join(ROOT, 'test_batch'))

    # 数据归一化到 [0, 1]
    X_train /= 255.0
    X_test /= 255.0

    # 划分训练集和验证集
    indices = np.random.permutation(X_train.shape[0])
    training_idx, validation_idx = indices[:49000], indices[49000:]
    X_train, X_val = X_train[training_idx, :], X_train[validation_idx, :]
    y_train, y_val = y_train[training_idx], y_train[validation_idx]

    return {
        'train_images': X_train,
        'train_labels': y_train,
        'validation_images': X_val,
        'validation_labels': y_val,
        'test_images': X_test,
        'test_labels': y_test
    }

# 使用示例
cifar10_dir = 'cifar-10-batches-py'
data = load_cifar10(cifar10_dir)

X_train = data['train_images']
y_train = data['train_labels']
X_val = data['validation_images']
y_val = data['validation_labels']
X_test = data['test_images']
y_test = data['test_labels']

# 打印数据形状
print("训练集图像形状:", X_train.shape)
print("训练集标签形状:", y_train.shape)
print("验证集图像形状:", X_val.shape)
print("验证集标签形状:", y_val.shape)
print("测试集图像形状:", X_test.shape)
print("测试集标签形状:", y_test.shape)

# 对标签进行 One-hot 编码
def one_hot_encode(y, num_classes):
    encoded = np.zeros((len(y), num_classes))
    for idx, val in enumerate(y):
        encoded[idx, val] = 1
    return encoded

num_classes = 10
y_train_encoded = one_hot_encode(y_train, num_classes)
y_val_encoded = one_hot_encode(y_val, num_classes)
y_test_encoded = one_hot_encode(y_test, num_classes)

# 定义一个多层全连接神经网络类
class MultiLayerNeuralNetwork:
    def __init__(self, input_size, hidden_sizes, output_size):
        # 初始化权重和偏置
        self.layers = len(hidden_sizes) + 1
        self.W = []
        self.b = []

        # 输入层到第一个隐藏层
        self.W.append(np.random.randn(input_size, hidden_sizes[0]) * np.sqrt(2. / input_size))
        self.b.append(np.zeros((1, hidden_sizes[0])))

        # 隐藏层之间
        for i in range(1, len(hidden_sizes)):
            self.W.append(np.random.randn(hidden_sizes[i-1], hidden_sizes[i]) * np.sqrt(2. / hidden_sizes[i-1]))
            self.b.append(np.zeros((1, hidden_sizes[i])))

        # 最后一个隐藏层到输出层
        self.W.append(np.random.randn(hidden_sizes[-1], output_size) * np.sqrt(2. / hidden_sizes[-1]))
        self.b.append(np.zeros((1, output_size)))

    def relu(self, x):
        return np.maximum(0, x)

    def relu_derivative(self, x):
        return np.where(x <= 0, 0, 1)

    def softmax(self, x):
        exp_scores = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    def compute_loss(self, y_true, y_pred):
        correct_logprobs = -np.log(y_pred[range(len(y_true)), np.argmax(y_true, axis=1)])
        return np.sum(correct_logprobs) / len(y_true)

    def predict_proba(self, x):
        a = x
        for i in range(self.layers - 1):
            z = np.dot(a, self.W[i]) + self.b[i]
            a = self.relu(z)
        z = np.dot(a, self.W[-1]) + self.b[-1]
        a = self.softmax(z)
        return a

    def predict(self, x):
        return np.argmax(self.predict_proba(x), axis=1)

    def train(self, x_train, y_train, x_valid, y_valid, epochs=100, learning_rate=0.01):
        for epoch in range(epochs):
            # Forward pass
            a = x_train
            activations = [a]
            zs = []
            for i in range(self.layers - 1):
                z = np.dot(a, self.W[i]) + self.b[i]
                a = self.relu(z)
                zs.append(z)
                activations.append(a)
            z = np.dot(a, self.W[-1]) + self.b[-1]
            a = self.softmax(z)
            zs.append(z)
            activations.append(a)

            # Compute loss
            train_loss = self.compute_loss(y_train, a)
            
            # Backward pass
            delta = a - y_train
            dW = [np.dot(activations[-2].T, delta)]
            db = [np.sum(delta, axis=0, keepdims=True)]
            
            for i in range(2, self.layers + 1):
                delta = np.dot(delta, self.W[-i + 1].T) * self.relu_derivative(zs[-i])
                dW.append(np.dot(activations[-i - 1].T, delta))
                db.append(np.sum(delta, axis=0, keepdims=True))

            dW.reverse()
            db.reverse()

            # Update weights
            for i in range(self.layers):
                self.W[i] -= learning_rate * dW[i]
                self.b[i] -= learning_rate * db[i]

            # Validate the model
            val_proba = self.predict_proba(x_valid)
            val_loss = self.compute_loss(y_valid, val_proba)
            val_predictions = np.argmax(val_proba, axis=1)
            val_accuracy = np.mean(val_predictions == np.argmax(y_valid, axis=1))
            
            # Print the results for the current epoch
            print(f'Epoch [{epoch}], train_loss: {train_loss:.4f}, val_loss: {val_loss:.4f}, val_acc: {val_accuracy:.4f}')

# 定义和训练神经网络
input_size = 32 * 32 * 3
hidden_sizes = [1024, 512, 256, 128]  # 多个隐藏层的大小
output_size = 10

nn = MultiLayerNeuralNetwork(input_size, hidden_sizes, output_size)
x_train_flat = X_train.reshape(X_train.shape[0], -1)
x_val_flat = X_val.reshape(X_val.shape[0], -1)

nn.train(x_train_flat, y_train_encoded, x_val_flat, y_val_encoded)


训练集图像形状: (49000, 3, 32, 32)
训练集标签形状: (49000,)
验证集图像形状: (1000, 3, 32, 32)
验证集标签形状: (1000,)
测试集图像形状: (10000, 3, 32, 32)
测试集标签形状: (10000,)


  correct_logprobs = -np.log(y_pred[range(len(y_true)), np.argmax(y_true, axis=1)])


Epoch [0], train_loss: 2.4796, val_loss: inf, val_acc: 0.0940
Epoch [1], train_loss: inf, val_loss: inf, val_acc: 0.1050
Epoch [2], train_loss: inf, val_loss: inf, val_acc: 0.1000
Epoch [3], train_loss: inf, val_loss: inf, val_acc: 0.0890
Epoch [4], train_loss: inf, val_loss: inf, val_acc: 0.0890
Epoch [5], train_loss: inf, val_loss: inf, val_acc: 0.0930
Epoch [6], train_loss: inf, val_loss: inf, val_acc: 0.0890
Epoch [7], train_loss: inf, val_loss: inf, val_acc: 0.1070
Epoch [8], train_loss: inf, val_loss: inf, val_acc: 0.1150
Epoch [9], train_loss: inf, val_loss: inf, val_acc: 0.0940
Epoch [10], train_loss: inf, val_loss: inf, val_acc: 0.1070
Epoch [11], train_loss: inf, val_loss: inf, val_acc: 0.0960
Epoch [12], train_loss: inf, val_loss: inf, val_acc: 0.0940
Epoch [13], train_loss: inf, val_loss: inf, val_acc: 0.1070
Epoch [14], train_loss: inf, val_loss: inf, val_acc: 0.0940
Epoch [15], train_loss: inf, val_loss: inf, val_acc: 0.1150
Epoch [16], train_loss: inf, val_loss: inf, val