#### 基于KMNIST数据集实现神经网络分类

In [45]:
import torch
import torch.nn as nn
import numpy as np
from torchvision import datasets
from torchvision.transforms import ToTensor
from matplotlib import pyplot as plt
from torch.utils.data import DataLoader  # 数据加载器

In [46]:
#设置超参数
LR = 3e-4
epochs = 200
BATCH_SIZE = 512
DEVICE = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {DEVICE}")

Using device: mps


In [47]:

#数据预处理
train_data=datasets.KMNIST(root="./data",train=True,download=True,transform=ToTensor())
test_data=datasets.KMNIST(root="./data",train=False,download=True,transform=ToTensor())
trian_dl = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)#,num_workers=0
test_dl = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True)
# clzzs=set([clzz for img,clzz in train_data])
# print(clzzs)
# plt.imshow(train_data[1][0])
# plt.show()
# train_data[1][0]
# train_data[1]

In [48]:
#模型构建（神经网络层、损失函数、优化器）
model=nn.Sequential(
    nn.Linear(28*28,1024),
    nn.ReLU(),
    nn.Dropout(0.5), 
    nn.Linear(1024,512),
    nn.ReLU(),
    nn.Dropout(0.5),        # 防止过拟合
    nn.Linear(512,256),
    nn.ReLU(),
    nn.Dropout(0.3),        # 防止过拟合
    nn.Linear(256,128),
    nn.ReLU(),
    nn.Linear(128,64),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(64,10),
).to(DEVICE)
#交叉熵损失函数
loss_fn=nn.CrossEntropyLoss()
#优化器随机梯度下降，（Stochastic Gradient Descent）；每次从数据集中随机抽取一个（或一小批）样本计算梯度，并更新参数。
#optimizer=torch.optim.SGD(model.parameters(),lr=LR)
#自适应学习率，收敛速度快，适合大多数任务。对超参数鲁棒（学习率通常只需设置为3e-4）。可能在某些任务上不如调优后的SGD泛化性能好。
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)

In [49]:
for epoch in range(epochs):
    #模型训练
    best_acc = 0.0
    model.train()
    for img,clzz in trian_dl:
        #将输入的图像数据展平（flatten）成一维向量，以便输入全连接层（Dense Layer）
        #如果张量是连续的（例如直接从DataLoader加载的原始图像），view() 和 reshape() 效果相同。
        #如果张量是非连续的（例如经过转置或切片操作后），必须用 reshape() 或先调用 contiguous()。
        img=img.view(img.size(0),-1).to(DEVICE)
        #img=img.to(DEVICE)
        clzz=clzz.to(DEVICE)
        #前向传播
        pre_clzz=model.forward(img)
        #计算损失
        loss=loss_fn(pre_clzz,clzz)
        #反向传播
        optimizer.zero_grad()
        loss.backward()
        #更新参数
        optimizer.step()

    # 验证阶段
    model.eval()
    total, correct = 0, 0
    with torch.no_grad():
        for img, clzz in test_dl:
            img, clzz = img.view(img.size(0),-1).to(DEVICE), clzz.to(DEVICE)
            outputs = model(img)
            _, predicted = outputs.max(1)
            total += clzz.size(0)
            correct += predicted.eq(clzz).sum().item()
    acc = 100 * correct / total
    print(f'Epoch {epoch}: Loss={loss.item():.4f}, Val Acc={acc:.2f}%')
    
    # 保存最佳模型
    if acc > best_acc:
        best_acc = acc
        torch.save(model.state_dict(), 'KMNIST_best_model.pth')
    
    #scheduler.step()
    print(f'Epoch:{epoch} Loss: {loss.item()}')

Epoch 0: Loss=0.6570, Val Acc=64.53%
Epoch:0 Loss: 0.6569967865943909
Epoch 1: Loss=0.5611, Val Acc=75.88%
Epoch:1 Loss: 0.561084508895874
Epoch 2: Loss=0.2911, Val Acc=81.18%
Epoch:2 Loss: 0.2911463677883148
Epoch 3: Loss=0.3439, Val Acc=84.33%
Epoch:3 Loss: 0.3439289629459381
Epoch 4: Loss=0.2146, Val Acc=86.31%
Epoch:4 Loss: 0.21455204486846924
Epoch 5: Loss=0.2102, Val Acc=87.58%
Epoch:5 Loss: 0.2101944237947464
Epoch 6: Loss=0.2652, Val Acc=88.98%
Epoch:6 Loss: 0.26515865325927734
Epoch 7: Loss=0.1591, Val Acc=89.50%
Epoch:7 Loss: 0.15912383794784546
Epoch 8: Loss=0.1554, Val Acc=89.28%
Epoch:8 Loss: 0.1553899496793747
Epoch 9: Loss=0.1573, Val Acc=90.08%
Epoch:9 Loss: 0.1572674959897995
Epoch 10: Loss=0.0888, Val Acc=90.68%
Epoch:10 Loss: 0.08877026289701462
Epoch 11: Loss=0.0865, Val Acc=90.77%
Epoch:11 Loss: 0.08650527149438858
Epoch 12: Loss=0.0673, Val Acc=91.17%
Epoch:12 Loss: 0.06726304441690445
Epoch 13: Loss=0.0753, Val Acc=91.12%
Epoch:13 Loss: 0.07532978057861328
Epoch 

In [50]:
# 测试
test_dl = DataLoader(test_data, batch_size=BATCH_SIZE)

correct = 0
total = 0
with torch.no_grad():  # 不计算梯度
    for data, target in test_dl:
        data = data.view(data.size(0),-1).to(DEVICE)  
        #data = data.to(DEVICE)
        target = target.to(DEVICE) # 标签形状: [batch_size]
        #print(target)
        output = model(data)  # 数据形状: [batch_size, 10]
        #print(output)
        _, predicted = torch.max(output, 1)  # 返回每行最大值和索引 （返回张量中沿指定维度的最大值和对应的索引）
        total += target.size(0)  # size(0) 等效 shape[0]
        correct += (predicted == target).sum().item()

print(f'Accuracy: {correct/total*100}%')

Accuracy: 92.96%


####初步结论：从3层全连接网络层增加到5-6，模型评估准确率从80%+上升至约90%，尤其增加dropout丢弃部分参数后，提升明显，随后继续增加层数准确率无明显提升
增加训练轮次50->200，准确率从90%上升至92%，提升较小
使用固定学习率容易导致梯度下降到一定程度后反复波动，无法继续收敛或者收敛不明显
可能由于训练样本分辨率较低原因，暂无法进一步提升准确率。
批次大小从256提升至512，在开始几轮训练中梯度明显下降的更快。