In [15]:
import pytorch_lightning as pl
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset, random_split
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torchmetrics import Accuracy

# 定义一个自定义的 Dataset
class MNISTDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        label = self.data.iloc[idx, 0]
        image = self.data.iloc[idx, 1:].values.astype(np.float32)
        
        # 返回一维的像素序列和标签
        return torch.tensor(image), torch.tensor(label)

# 定义一个 LightningDataModule
class MNISTDataModule(pl.LightningDataModule):
    def __init__(self, data_dir='', batch_size=64, num_workers=2):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.num_workers = num_workers

    def setup(self, stage=None):
        # 数据集的定义
        dataset = MNISTDataset(csv_file=self.data_dir + 'train.csv')
        
        # 将数据集划分为训练集和验证集
        train_size = int(0.8 * len(dataset))
        val_size = len(dataset) - train_size
        self.train_dataset, self.val_dataset = random_split(dataset, [train_size, val_size])

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, num_workers=self.num_workers)



In [16]:
# 初始化数据模块
mnist_dm = MNISTDataModule(data_dir='', batch_size=64)

# 准备数据
mnist_dm.setup()

# 获取一个批次的训练数据
train_loader = mnist_dm.train_dataloader()
batch = next(iter(train_loader))

# 检查批次中的数据维度
images, labels = batch
print(f"Batch of images shape: {images.shape}")
print(f"Batch of labels shape: {labels.shape}")


Batch of images shape: torch.Size([64, 784])
Batch of labels shape: torch.Size([64])


In [17]:
# 定义模型
class MNISTModel(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.layer_1 = nn.Linear(28 * 28, 128)
        self.layer_2 = nn.Linear(128, 10)
        self.train_accuracy = Accuracy(task='multiclass', num_classes=10)
        self.val_accuracy = Accuracy(task='multiclass', num_classes=10)
        self.test_accuracy = Accuracy(task='multiclass', num_classes=10)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # 展开图像为一维向量
        x = F.relu(self.layer_1(x))  # 应用 ReLU 激活函数
        x = self.layer_2(x)  # 第二个全连接层
        return F.log_softmax(x, dim=1)  # 使用 log_softmax 作为输出

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        
        # 计算准确率
        preds = torch.argmax(logits, dim=1)
        acc = self.train_accuracy(preds, y)
        
        # 记录 loss 和 acc 到进度条
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        
        # 计算准确率
        preds = torch.argmax(logits, dim=1)
        acc = self.val_accuracy(preds, y)
        
        # 记录 loss 和 acc 到进度条
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss


    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)  # 使用 Adam 优化器

# 初始化数据模块
mnist_dm = MNISTDataModule(data_dir='', batch_size=64)

# 使用 Trainer 进行训练、验证和测试
trainer = pl.Trainer(max_epochs=10)




GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [18]:
# 训练模型
trainer.fit(MNISTModel(), mnist_dm)

trainer.validate(datamodule=mnist_dm)


  | Name           | Type               | Params | Mode 
--------------------------------------------------------------
0 | layer_1        | Linear             | 100 K  | train
1 | layer_2        | Linear             | 1.3 K  | train
2 | train_accuracy | MulticlassAccuracy | 0      | train
3 | val_accuracy   | MulticlassAccuracy | 0      | train
4 | test_accuracy  | MulticlassAccuracy | 0      | train
--------------------------------------------------------------
101 K     Trainable params
0         Non-trainable params
101 K     Total params
0.407     Total estimated model params size (MB)
5         Modules in train mode
0         Modules in eval mode


Epoch 9: 100%|██████████| 525/525 [00:12<00:00, 42.19it/s, v_num=12, train_loss_step=0.00901, train_acc_step=1.000, val_loss=0.254, val_acc=0.958, train_loss_epoch=0.120, train_acc_epoch=0.968] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 525/525 [00:12<00:00, 42.15it/s, v_num=12, train_loss_step=0.00901, train_acc_step=1.000, val_loss=0.254, val_acc=0.958, train_loss_epoch=0.120, train_acc_epoch=0.968]


Restoring states from the checkpoint path at /lightning_logs/version_12/checkpoints/epoch=9-step=5250.ckpt
Loaded model weights from the checkpoint at /lightning_logs/version_12/checkpoints/epoch=9-step=5250.ckpt


Validation DataLoader 0: 100%|██████████| 132/132 [00:01<00:00, 72.08it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
         val_acc            0.9745237827301025
        val_loss            0.11915817856788635
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'val_loss': 0.11915817856788635, 'val_acc': 0.9745237827301025}]