### 加载数据集

In [1]:
import sys
sys.path.append('../../')


from datasets.datasets import DatasetManager

dataset_manager = DatasetManager(batch_size=128)
train_loader, test_loader = dataset_manager.mnist_dataset()

Using device: cpu
CIFAR-10 path: /home/shiroha/Code/Frontend/KAN/datasets/CIFAR10
MNIST path: /home/shiroha/Code/Frontend/KAN/datasets/mnist


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
# from torchmetrics import Accuracy
import torch.nn.functional as F
from models.models import ModelManager 

model_manager = ModelManager()


class PCAKAN(nn.Module):
    def __init__(self):
        super(PCAKAN, self).__init__()
        # PCA 层：将输入从 28*28 降维到 100
        self.pca = model_manager.PCALayer(input_dim=28*28, output_dim=128)
        self.fourierkan1 = model_manager.KANLinear(128, 128)
        self.fourierkan2 = model_manager.KANLinear(128, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # 展开图像为一维向量
        x = self.pca(x)  # PCA 降维
        x = self.fourierkan1(x)
        x = self.fourierkan2(x)
        return x


class FourierKAN(nn.Module):
    def __init__(self):
        super(FourierKAN, self).__init__()
        self.fourierkan1 = model_manager.KANLinear(28*28, 128)
        self.fourierkan2 = model_manager.KANLinear(128, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # 展开图像为一维向量
        x = self.fourierkan1(x)
        x = self.fourierkan2(x)
        return x
    

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.mlp1 = model_manager.MLPLinear(28*28, 128)
        self.mlp2 = model_manager.MLPLinear(128, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # 展开图像为一维向量
        x = self.mlp1(x)
        x = self.mlp2(x)
        return x
    
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # 定义 CNN 层（适配 MNIST 数据集的输入 1 通道）
        self.conv1 = model_manager.ConvLinear(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.conv2 = model_manager.ConvLinear(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        # 定义全连接层（经过两次池化操作，特征图大小从 28x28 缩小为 7x7）
        self.fc1 = model_manager.MLPLinear(32 * 7 * 7, 128)  # 第一全连接层
        self.fc2 = model_manager.MLPLinear(128, 10)  # 第二全连接层（10 分类）
        
    def forward(self, x):
        # 卷积和池化
        x = self.conv1(x)
        x = self.conv2(x)
        # 展开为一维向量
        x = x.view(x.size(0), -1)
        # 全连接层
        x = self.fc1(x)
        x = self.fc2(x)
        return x
    

class Transformer(nn.Module):
    def __init__(self):
        super(Transformer, self).__init__()
        self.embedding = nn.Linear(28, 64)  # 将每行像素映射到嵌入维度 64
        # 定义 Transformer 层
        self.transformer1 = model_manager.TransformerLayer(embed_dim=64, num_heads=4, ff_dim=256)
        self.transformer2 = model_manager.TransformerLayer(embed_dim=64, num_heads=4, ff_dim=256)
        # 定义全连接层
        self.fc1 = model_manager.MLPLinear(64, 128)  # 第一全连接层
        self.fc2 = model_manager.MLPLinear(128, 10)  # 第二全连接层（10 分类）

    def forward(self, x, mask=None):
        # 将输入形状从 (batch_size, 1, 28, 28) 转为 (batch_size, 28, 28)
        x = x.squeeze(1)  # 去掉通道维度，变为 (batch_size, 28, 28)
        # 嵌入每行像素数据，形状变为 (28, batch_size, 64)
        x = self.embedding(x).permute(1, 0, 2)
        # 通过 Transformer 层
        x = self.transformer1(x, mask)
        x = self.transformer2(x, mask)
        # 平均池化以获取全局表示
        x = torch.mean(x, dim=0)  # 形状从 (seq_len, batch_size, embed_dim) -> (batch_size, embed_dim)
        # 全连接层
        x = self.fc1(x)
        x = self.fc2(x)
        return x

### 加载模型

In [None]:
# from models.models import ModelManager
from torchinfo import summary

# Initialize the model and optimizer with a lower learning rate
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = FourierKAN().to(device)  # Use 'cuda' for GPU
# model = MLP().to(device)  # Use 'cuda' for GPU
# model = CNN().to(device)  # Use 'cuda' for GPU
# model = Transformer().to(device)  # Use 'cuda' for GPU
model = PCAKAN().to(device)  # Use 'cuda' for GPU


optimizer = optim.LBFGS(model.parameters(), lr=0.01)  # Reduced learning rate from 0.1 to 0.01

# device
# 使用torchsummary输出模型结构
summary(model, input_size=(64, 1, 28, 28))  # 假设输入特征为64维

Layer (type:depth-idx)                   Output Shape              Param #
Transformer                              [64, 10]                  --
├─Linear: 1-1                            [64, 28, 64]              1,856
├─TransformerLayer: 1-2                  [28, 64, 64]              --
│    └─MultiheadAttention: 2-1           [28, 64, 64]              16,640
│    └─Dropout: 2-2                      [28, 64, 64]              --
│    └─LayerNorm: 2-3                    [28, 64, 64]              128
│    └─Sequential: 2-4                   [28, 64, 64]              --
│    │    └─Linear: 3-1                  [28, 64, 256]             16,640
│    │    └─ReLU: 3-2                    [28, 64, 256]             --
│    │    └─Linear: 3-3                  [28, 64, 64]              16,448
│    └─Dropout: 2-5                      [28, 64, 64]              --
│    └─LayerNorm: 2-6                    [28, 64, 64]              128
├─TransformerLayer: 1-3                  [28, 64, 64]              -

### 训练(利用预训练模型可只执行第一步然后跳去评估部分)

In [4]:
from weights.weights import WeightManager
weight_manager = WeightManager()
weight_name = 'test'

In [5]:
# Define the training loop
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        def closure():
            optimizer.zero_grad()
            output = model(data)
            loss = nn.CrossEntropyLoss()(output, target)
            loss.backward()
            return loss
        data, target = data.to(device), target.to(device)
        optimizer.step(closure)
        if batch_idx % 10 == 0:
            loss = closure()
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')

# Train the model for only one epoch as per user request
for epoch in range(1, 2):
    train(model, device, train_loader, optimizer, epoch)
    weight_manager.save_model(model, optimizer, epoch=epoch, dir_name=weight_name, file_name=f'{weight_name}_{epoch}_checkpoint.pth')


KeyboardInterrupt: 

### 评估

#### 指定单个文件测试模式

In [None]:
# Evaluate the model
model, optimizer, start_epoch = weight_manager.load_model(model, optimizer, dir_name='test', file_name='test_1_checkpoint.pth', device=device)
def evaluate(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += nn.CrossEntropyLoss()(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n')

# Evaluate the trained model
evaluate(model, device, test_loader)

RuntimeError: PytorchStreamReader failed locating file data/2: file not found

#### 指定文件夹全部权重文件测试

In [None]:
pth_files = weight_manager.list_pth_files(dir_name=weight_name)

if pth_files:
    print("Available .pth files:")
    for pth_file in pth_files:
        print(f"- {pth_file}")
    for model in pth_files:
        evaluate(model, device, test_loader)


AttributeError: 'WeightManager' object has no attribute 'list_pth_files'