In [7]:
import numpy as np
from MLP.model import MLP
import pandas as pd
from MLP.layers import SoftmaxCrossEntropy

# 导入npy数据集
X_train = np.load('Assignment1-Dataset/train_data.npy')
X_test = np.load('Assignment1-Dataset/test_data.npy')
Y_train = np.load('Assignment1-Dataset/train_label.npy').flatten()
Y_test = np.load('Assignment1-Dataset/test_label.npy').flatten()


In [8]:
X_train.shape, Y_train.shape, X_test.shape, Y_test.shape

((50000, 128), (50000,), (10000, 128), (10000,))

In [9]:
# 统计数据集的类别
classes = np.unique(Y_train)
classes

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)

In [10]:


# 超参数
input_dim = 128  # 输入数据维度
hidden_dims = [64, 32]  # 隐藏层维度
output_dim = 10  # 输出类别数（例如10分类）
learning_rate = 0.001
momentum = 0.9
num_epochs = 100
batch_size = 32

# 创建MLP模型
model = MLP(input_dim, hidden_dims, output_dim, dropout_rate=0.2, weight_decay=1e-4)

# 初始化动量优化需要的速度
velocity = {}

# 训练循环
for epoch in range(num_epochs):
    
    epoch_loss = 0
    for i in range(0, len(X_train), batch_size):
        X_batch = X_train[i:i+batch_size]
        Y_batch = Y_train[i:i+batch_size]

        Y_batch_one_hot = np.eye(output_dim)[Y_batch]
        # 32X10的one-hot编码
        # 前向传播
        logits = model.forward(X_batch, training=True)
        
        # 计算损失
        loss = model.compute_loss(logits, Y_batch_one_hot)
        epoch_loss += loss
        
        # 反向传播
        model.backward()
        
        # 更新参数
        model.update(learning_rate, momentum, velocity)
    
    # 打印每个epoch的损失
    avg_loss = epoch_loss / (len(X_train) // batch_size)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}')



Epoch 1/100, Loss: 2.1379
Epoch 2/100, Loss: 1.9578
Epoch 3/100, Loss: 1.8820
Epoch 4/100, Loss: 1.8338
Epoch 5/100, Loss: 1.8003
Epoch 6/100, Loss: 1.7774
Epoch 7/100, Loss: 1.7564
Epoch 8/100, Loss: 1.7398
Epoch 9/100, Loss: 1.7217
Epoch 10/100, Loss: 1.7081
Epoch 11/100, Loss: 1.6948
Epoch 12/100, Loss: 1.6839
Epoch 13/100, Loss: 1.6747
Epoch 14/100, Loss: 1.6691
Epoch 15/100, Loss: 1.6579
Epoch 16/100, Loss: 1.6537
Epoch 17/100, Loss: 1.6442
Epoch 18/100, Loss: 1.6400
Epoch 19/100, Loss: 1.6335
Epoch 20/100, Loss: 1.6315
Epoch 21/100, Loss: 1.6230
Epoch 22/100, Loss: 1.6184
Epoch 23/100, Loss: 1.6117
Epoch 24/100, Loss: 1.6095
Epoch 25/100, Loss: 1.6030
Epoch 26/100, Loss: 1.5998
Epoch 27/100, Loss: 1.5935
Epoch 28/100, Loss: 1.5878
Epoch 29/100, Loss: 1.5876
Epoch 30/100, Loss: 1.5832
Epoch 31/100, Loss: 1.5792
Epoch 32/100, Loss: 1.5739
Epoch 33/100, Loss: 1.5746
Epoch 34/100, Loss: 1.5639
Epoch 35/100, Loss: 1.5659
Epoch 36/100, Loss: 1.5596
Epoch 37/100, Loss: 1.5626
Epoch 38/1

$$ y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta $$

In [11]:
predictions_train = model.predict_and_evaluate(X_train, Y_train)


 Test Accuracy: 52.88%


In [12]:
predictions_test = model.predict_and_evaluate(X_test, Y_test)

 Test Accuracy: 49.70%
