In [None]:
import torch

In [None]:
print(torch.__version__)

2.6.0+cu124


In [None]:
print(torch.cuda.is_available())

True


In [39]:
import torch
import torchvision
from torch import nn
from torchvision import datasets, transforms
from torch.utils.tensorboard import SummaryWriter
import time

writer = SummaryWriter(log_dir='../logs/network')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class CIFAR10_model(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, 5, padding=2),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 32, 5, padding=2),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 5, padding=2),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(64 * 4 * 4, 64),
            nn.Linear(64, 10)
        )

    def forward(self, x):
        x = self.model(x)
        return x


class vgg16_model:
    def __init__(self):
        super().__init__()
        self.vgg16_false = torchvision.models.vgg16(pretrained=False)
        self.vgg16_true = torchvision.models.vgg16(weights=torchvision.models.VGG16_Weights.DEFAULT)

    def modify(self):
        self.vgg16_false.classifier[6] = nn.Linear(4096, 10)
        self.vgg16_true.classifier.add_module('7', nn.Linear(1000, 10))


class MyDataloader(object):

    def __init__(self):
        dataset_transform = transforms.Compose([
            transforms.ToTensor()
        ])
        train_set = datasets.CIFAR10(root='../dataset', train=True, transform=dataset_transform, download=True)
        test_set = datasets.CIFAR10(root='../dataset', train=False, transform=dataset_transform, download=True)

        self.train_loader = torch.utils.data.DataLoader(
            dataset=train_set,
            batch_size=64,
            shuffle=True,
            num_workers=0,
            drop_last=False
        )
        self.test_loader = torch.utils.data.DataLoader(
            dataset=test_set,
            batch_size=64,
            shuffle=True,
            num_workers=0,
            drop_last=True
        )


def train(model, dataloader, epochs=5):
    model.train()
    # 随机梯度下降优化器
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    # optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)
    for epoch in range(epochs):
        running_loss = 0.0
        for data in dataloader.train_loader:
            imgs, targets = data
            # 将数据移动到GPU
            imgs = imgs.to(device)
            targets = targets.to(device)

            outputs = model(imgs)
            loss = nn.CrossEntropyLoss()(outputs, targets)
            # 通过反向传播与优化器更新参数
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # 损失函数累加
            running_loss += loss.item()
        print(f"epoch: {epoch + 1}, train_loss: {running_loss / len(dataloader.train_loader)}")
        writer.add_scalar('train_loss', running_loss / len(dataloader.train_loader), epoch + 1)
        # 测试
        test(model, dataloader, epoch)
        save_model(model, f"../models/cifar10_model_{epoch + 1}.pth")


def test(model, dataloader, epoch):
    model.eval()
    total_loss = 0
    total_accuracy = 0
    with torch.no_grad():
        for data in dataloader.test_loader:
            imgs, targets = data
            # 将数据移动到GPU
            imgs = imgs.to(device)
            targets = targets.to(device)

            outputs = model(imgs)
            loss = nn.CrossEntropyLoss()(outputs, targets)
            accuracy = (outputs.argmax(dim=1) == targets).sum()
            total_loss += loss.item()
            total_accuracy += accuracy
    writer.add_scalar('test_loss', total_loss / len(dataloader.test_loader), epoch + 1)
    writer.add_scalar('test_accuracy', 100 * total_accuracy / len(dataloader.test_loader.dataset), epoch + 1)
    print(f"test_loss: {total_loss / len(dataloader.test_loader)}, test_accuracy: {100 * total_accuracy / len(dataloader.test_loader.dataset)}%")

def save_model(model, path):
    # 1.保存模型结构+模型参数
    # torch.save(models, path)
    # 2.保存模型参数
    torch.save(model.state_dict(), path)


# 加载模型函数
def load_model(model, path):
    try:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        # 1.加载模型结构+模型参数
        # models = torch.load(path)
        # 2.加载模型参数
        model.load_state_dict(torch.load(path, map_location=device))
        print(f"模型加载成功！device={device}")
    except Exception as e:
        print(f"模型加载失败: {e}")


def cifar10_model_train():
    # 创建模型
    model = CIFAR10_model().to(device)
    # 加载数据
    dataloader = MyDataloader()
    # 训练
    train(model, dataloader, epochs=20)
    print("train complete")

if __name__ == '__main__':

    print(f"current device: {device}")
    if torch.cuda.is_available():
        print(f"GPU name: {torch.cuda.get_device_name(0)}")

    start_time = time.time()
    cifar10_model_train()
    end_time = time.time()
    print(f"time: {end_time - start_time}")

    # input = torch.randn(64, 3, 32, 32)
    # output = models(input)
    # writer.add_graph(models, input)
    # writer.close()
    # print(models)


current device: cuda
GPU name: Tesla T4
epoch: 1, train_loss: 1.4473065484667678
test_loss: 1.2962258943380454, test_accuracy: 54.5%
epoch: 2, train_loss: 1.0996741218792507
test_loss: 1.0981420278549194, test_accuracy: 62.47999954223633%
epoch: 3, train_loss: 0.9668883092110724
test_loss: 0.9887493611910404, test_accuracy: 65.93999481201172%
epoch: 4, train_loss: 0.8737570137319053
test_loss: 0.9768761449899429, test_accuracy: 66.77999877929688%
epoch: 5, train_loss: 0.8049978618426701
test_loss: 0.9956574619580538, test_accuracy: 66.94999694824219%
epoch: 6, train_loss: 0.7327681240599478
test_loss: 1.0015552093585331, test_accuracy: 66.29999542236328%
epoch: 7, train_loss: 0.6704838242753387
test_loss: 0.9937529426354629, test_accuracy: 67.48999786376953%
epoch: 8, train_loss: 0.6266573231162318
test_loss: 1.020987161841148, test_accuracy: 67.20999908447266%
epoch: 9, train_loss: 0.5716447586484272
test_loss: 1.04306511198863, test_accuracy: 67.29999542236328%
epoch: 10, train_loss:

In [None]:
!nvidia-smi

Mon Aug  4 12:53:03 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   69C    P0             32W /   70W |     244MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
!pwd

/content
