实验计划

模型架构mlp+softmax多分类

1.实现pytorch版,并且稍微调参

2.复刻myTorch版

3.基于myTorch做消融实验(优化器，初始化策略)

In [1]:
# 用于记录每个单元格的运行时间

try:
    %load_ext autotime
except:
    !pip install ipython-autotime
    %load_ext autotime

time: 0 ns (started: 2024-12-10 20:21:19 +08:00)


In [2]:
#把项目目录添加至环境变量
import os
import sys
# 获取当前工作目录
current_dir = os.getcwd()

# 获取上级目录
parent_dir = os.path.abspath(os.path.join(current_dir, '..'))
print(parent_dir)
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)



/root/autodl-tmp/myTorch
time: 1.85 ms (started: 2024-11-07 17:27:47 +08:00)


In [None]:
# 基于pytorch导入数据
import torch
from torchvision import datasets, transforms
from torch.utils.data import random_split
import MyTorch.Dataloader
data_dir = "../../dataset"
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

time: 1.97 s (started: 2024-11-07 17:27:49 +08:00)


In [4]:

data_transforms = transforms.Compose([
    transforms.ToTensor(),  # 转换为Tensor
    transforms.Normalize((0.5,), (0.5,)),  # 归一化，均值为0.5，标准差为0.5
])
# MNIST dataset
train_data_full = datasets.MNIST(data_dir, train=True, download=True, transform=data_transforms)
test_data = datasets.MNIST(data_dir, train=False, download=True, transform=data_transforms)

print(f"shape:{train_data_full.data.shape}")
print(f"mean:{train_data_full.data.float().mean()}")#pytorch dataset的transform只会在取数据进入model时调用，故此时print仍然是原始数据

val_size = int(0.1 * len(train_data_full))
train_data, val_data = random_split(train_data_full, [len(train_data_full) - val_size, val_size])
print("train_data:", len(train_data))
print("val_data:", len(val_data))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"device:{device}")


shape:torch.Size([60000, 28, 28])
mean:33.31842041015625
train_data: 54000
val_data: 6000
device:cuda
time: 160 ms (started: 2024-11-07 17:27:51 +08:00)


In [5]:
#定义固定参数
input_size = 28 * 28
num_classes = 10
#定义超参
batch_size = 64
hidden_size = 128
lr=0.1
momentum = 0.9
epochs = 10

time: 1.34 ms (started: 2024-11-07 17:27:54 +08:00)


In [6]:
# 构建model
from torch import nn
import torch.nn.init as init
layer1=nn.Linear(input_size, hidden_size)
init.kaiming_normal_(layer1.weight)
layer2=nn.Linear(hidden_size, num_classes)
init.kaiming_normal_(layer2.weight)
model=torch.nn.Sequential(layer1,nn.ReLU(),layer2,nn.LogSoftmax(dim=1)).to(device)


time: 120 ms (started: 2024-11-07 17:27:56 +08:00)


In [7]:
def val_eval(model,val_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.view(-1, 28 * 28)
            images,labels=images.to(device),labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += len(labels)
            correct += (predicted == labels).sum().item()
    acc=correct/total
    print(f"acc:{acc}")
    return acc

time: 3.23 ms (started: 2024-11-07 17:27:58 +08:00)


In [8]:
try:
    import optuna
except ModuleNotFoundError:
    !pip install optuna
    import optuna

try:
    import tqdm
except ModuleNotFoundError:
    !pip install tqdm
    import tqdm

time: 158 ms (started: 2024-11-07 17:27:59 +08:00)


In [9]:
def obj(trails):
    kwargs = {
        "hidden_size": trails.suggest_int("hidden_size", 32, 128),
        "batch_size": trails.suggest_int("batch_size", 64, 64),
        "lr": trails.suggest_float("lr", 0.001, 0.1),
        "momentum": trails.suggest_float("momentum", 0.5, 0.9),
        epochs: trails.suggest_int("epochs", 2,10 ),
    }
    layer1=nn.Linear(input_size, kwargs["hidden_size"])
    init.kaiming_normal_(layer1.weight)
    layer2=nn.Linear(kwargs["hidden_size"], num_classes)
    init.kaiming_normal_(layer2.weight)
    model=torch.nn.Sequential(layer1,nn.ReLU(),layer2,nn.LogSoftmax(dim=1)).to(device)

    optimizer = torch.optim.SGD(model.parameters(), lr=kwargs["lr"],weight_decay=0.01,momentum=kwargs["momentum"])
    criterion = nn.NLLLoss()
    train_loader =torch.utils.data.DataLoader(train_data, batch_size=kwargs["batch_size"], shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=kwargs["batch_size"], shuffle=False)
    for epoch in range(kwargs[epochs]):
        for i, (images, labels) in enumerate(train_loader):
            images=images.view(-1,28*28)
            images,labels=images.to(device),labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if (i + 1) % 256 == 0:
                print('Epcho [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch, kwargs[epochs], i + 1, len(train_loader), loss.item()))
    return val_eval(model,val_loader)

time: 4.84 ms (started: 2024-11-07 17:28:02 +08:00)


In [10]:
from tqdm import tqdm

class TqdmCallback(object):
    def __init__(self, n_trials):
        self.n_trials = n_trials
        self.pbar = tqdm(total=n_trials)

    def __call__(self, study, trial):
        self.pbar.update(1)

n_trials = 10
tqdm_callback = TqdmCallback(n_trials)
study = optuna.create_study(direction='maximize')
study.optimize(obj, n_trials=n_trials, callbacks=[tqdm_callback])

  0%|          | 0/10 [00:00<?, ?it/s][I 2024-11-07 17:28:05,031] A new study created in memory with name: no-name-f841aeb3-fdcc-4d4d-b3ea-4b95729f65e6


Epcho [0/5], Step [256/844], Loss: 0.8550
Epcho [0/5], Step [512/844], Loss: 0.7175
Epcho [0/5], Step [768/844], Loss: 0.6255
Epcho [1/5], Step [256/844], Loss: 0.7711
Epcho [1/5], Step [512/844], Loss: 0.8772
Epcho [1/5], Step [768/844], Loss: 0.2677
Epcho [2/5], Step [256/844], Loss: 0.8278
Epcho [2/5], Step [512/844], Loss: 0.6913
Epcho [2/5], Step [768/844], Loss: 0.7306
Epcho [3/5], Step [256/844], Loss: 0.4388
Epcho [3/5], Step [512/844], Loss: 0.7705
Epcho [3/5], Step [768/844], Loss: 0.3491
Epcho [4/5], Step [256/844], Loss: 0.5291
Epcho [4/5], Step [512/844], Loss: 0.5115
Epcho [4/5], Step [768/844], Loss: 1.2833


[I 2024-11-07 17:28:39,898] Trial 0 finished with value: 0.7641666666666667 and parameters: {'hidden_size': 58, 'batch_size': 64, 'lr': 0.09363608837545294, 'momentum': 0.7751116211930444, 'epochs': 5}. Best is trial 0 with value: 0.7641666666666667.
 10%|█         | 1/10 [00:34<05:13, 34.87s/it]

acc:0.7641666666666667
Epcho [0/4], Step [256/844], Loss: 0.4676
Epcho [0/4], Step [512/844], Loss: 0.2855
Epcho [0/4], Step [768/844], Loss: 0.4662
Epcho [1/4], Step [256/844], Loss: 0.2430
Epcho [1/4], Step [512/844], Loss: 0.3370
Epcho [1/4], Step [768/844], Loss: 0.3057
Epcho [2/4], Step [256/844], Loss: 0.4552
Epcho [2/4], Step [512/844], Loss: 0.1579
Epcho [2/4], Step [768/844], Loss: 0.1917
Epcho [3/4], Step [256/844], Loss: 0.2461
Epcho [3/4], Step [512/844], Loss: 0.1661
Epcho [3/4], Step [768/844], Loss: 0.1831


[I 2024-11-07 17:29:07,231] Trial 1 finished with value: 0.9466666666666667 and parameters: {'hidden_size': 65, 'batch_size': 64, 'lr': 0.017213312890302247, 'momentum': 0.6947199922197161, 'epochs': 4}. Best is trial 1 with value: 0.9466666666666667.
 20%|██        | 2/10 [01:02<04:03, 30.44s/it]

acc:0.9466666666666667
Epcho [0/8], Step [256/844], Loss: 0.3906
Epcho [0/8], Step [512/844], Loss: 0.2535
Epcho [0/8], Step [768/844], Loss: 0.2174
Epcho [1/8], Step [256/844], Loss: 0.1446
Epcho [1/8], Step [512/844], Loss: 0.2367
Epcho [1/8], Step [768/844], Loss: 0.3320
Epcho [2/8], Step [256/844], Loss: 0.2609
Epcho [2/8], Step [512/844], Loss: 0.1452
Epcho [2/8], Step [768/844], Loss: 0.1895
Epcho [3/8], Step [256/844], Loss: 0.1076
Epcho [3/8], Step [512/844], Loss: 0.2536
Epcho [3/8], Step [768/844], Loss: 0.4376
Epcho [4/8], Step [256/844], Loss: 0.3061
Epcho [4/8], Step [512/844], Loss: 0.2792
Epcho [4/8], Step [768/844], Loss: 0.2072
Epcho [5/8], Step [256/844], Loss: 0.1160
Epcho [5/8], Step [512/844], Loss: 0.2877
Epcho [5/8], Step [768/844], Loss: 0.1291
Epcho [6/8], Step [256/844], Loss: 0.1961
Epcho [6/8], Step [512/844], Loss: 0.1996
Epcho [6/8], Step [768/844], Loss: 0.1210
Epcho [7/8], Step [256/844], Loss: 0.4517
Epcho [7/8], Step [512/844], Loss: 0.2635
Epcho [7/8]

[I 2024-11-07 17:30:01,436] Trial 2 finished with value: 0.943 and parameters: {'hidden_size': 118, 'batch_size': 64, 'lr': 0.06783496989477338, 'momentum': 0.5609595281963902, 'epochs': 8}. Best is trial 1 with value: 0.9466666666666667.
 30%|███       | 3/10 [01:56<04:49, 41.29s/it]

acc:0.943
Epcho [0/4], Step [256/844], Loss: 0.4721
Epcho [0/4], Step [512/844], Loss: 0.3789
Epcho [0/4], Step [768/844], Loss: 0.5104
Epcho [1/4], Step [256/844], Loss: 0.4100
Epcho [1/4], Step [512/844], Loss: 0.3160
Epcho [1/4], Step [768/844], Loss: 0.2389
Epcho [2/4], Step [256/844], Loss: 0.4462
Epcho [2/4], Step [512/844], Loss: 0.6645
Epcho [2/4], Step [768/844], Loss: 0.3019
Epcho [3/4], Step [256/844], Loss: 0.4261
Epcho [3/4], Step [512/844], Loss: 0.5145
Epcho [3/4], Step [768/844], Loss: 0.2485


[I 2024-11-07 17:30:29,098] Trial 3 finished with value: 0.8401666666666666 and parameters: {'hidden_size': 68, 'batch_size': 64, 'lr': 0.04602792461265085, 'momentum': 0.8814288980357559, 'epochs': 4}. Best is trial 1 with value: 0.9466666666666667.
 40%|████      | 4/10 [02:24<03:35, 35.91s/it]

acc:0.8401666666666666
Epcho [0/6], Step [256/844], Loss: 0.2775
Epcho [0/6], Step [512/844], Loss: 0.3278
Epcho [0/6], Step [768/844], Loss: 0.2856
Epcho [1/6], Step [256/844], Loss: 0.1650
Epcho [1/6], Step [512/844], Loss: 0.3033
Epcho [1/6], Step [768/844], Loss: 0.4171
Epcho [2/6], Step [256/844], Loss: 0.2250
Epcho [2/6], Step [512/844], Loss: 0.2740
Epcho [2/6], Step [768/844], Loss: 0.2036
Epcho [3/6], Step [256/844], Loss: 0.1410
Epcho [3/6], Step [512/844], Loss: 0.2874
Epcho [3/6], Step [768/844], Loss: 0.4322
Epcho [4/6], Step [256/844], Loss: 0.1674
Epcho [4/6], Step [512/844], Loss: 0.1273
Epcho [4/6], Step [768/844], Loss: 0.2394
Epcho [5/6], Step [256/844], Loss: 0.2204
Epcho [5/6], Step [512/844], Loss: 0.1722
Epcho [5/6], Step [768/844], Loss: 0.1675


[I 2024-11-07 17:31:10,156] Trial 4 finished with value: 0.9446666666666667 and parameters: {'hidden_size': 94, 'batch_size': 64, 'lr': 0.025435377018633028, 'momentum': 0.698413791161292, 'epochs': 6}. Best is trial 1 with value: 0.9466666666666667.
 50%|█████     | 5/10 [03:05<03:08, 37.77s/it]

acc:0.9446666666666667
Epcho [0/5], Step [256/844], Loss: 0.3119
Epcho [0/5], Step [512/844], Loss: 0.4392
Epcho [0/5], Step [768/844], Loss: 0.3356
Epcho [1/5], Step [256/844], Loss: 0.1610
Epcho [1/5], Step [512/844], Loss: 0.2271
Epcho [1/5], Step [768/844], Loss: 0.2455
Epcho [2/5], Step [256/844], Loss: 0.2995
Epcho [2/5], Step [512/844], Loss: 0.1524
Epcho [2/5], Step [768/844], Loss: 0.1936
Epcho [3/5], Step [256/844], Loss: 0.1663
Epcho [3/5], Step [512/844], Loss: 0.1625
Epcho [3/5], Step [768/844], Loss: 0.1508
Epcho [4/5], Step [256/844], Loss: 0.1937
Epcho [4/5], Step [512/844], Loss: 0.1477
Epcho [4/5], Step [768/844], Loss: 0.3790


[I 2024-11-07 17:31:44,548] Trial 5 finished with value: 0.9356666666666666 and parameters: {'hidden_size': 127, 'batch_size': 64, 'lr': 0.02271215096315039, 'momentum': 0.7951040092645951, 'epochs': 5}. Best is trial 1 with value: 0.9466666666666667.
 60%|██████    | 6/10 [03:39<02:26, 36.62s/it]

acc:0.9356666666666666
Epcho [0/8], Step [256/844], Loss: 0.4787
Epcho [0/8], Step [512/844], Loss: 0.3965
Epcho [0/8], Step [768/844], Loss: 0.4197
Epcho [1/8], Step [256/844], Loss: 0.3096
Epcho [1/8], Step [512/844], Loss: 0.3284
Epcho [1/8], Step [768/844], Loss: 0.3242
Epcho [2/8], Step [256/844], Loss: 0.1396
Epcho [2/8], Step [512/844], Loss: 0.4161
Epcho [2/8], Step [768/844], Loss: 0.2931
Epcho [3/8], Step [256/844], Loss: 0.3109
Epcho [3/8], Step [512/844], Loss: 0.4898
Epcho [3/8], Step [768/844], Loss: 0.2310
Epcho [4/8], Step [256/844], Loss: 0.2457
Epcho [4/8], Step [512/844], Loss: 0.3042
Epcho [4/8], Step [768/844], Loss: 0.2673
Epcho [5/8], Step [256/844], Loss: 0.2814
Epcho [5/8], Step [512/844], Loss: 0.3122
Epcho [5/8], Step [768/844], Loss: 0.2271
Epcho [6/8], Step [256/844], Loss: 0.2466
Epcho [6/8], Step [512/844], Loss: 0.2513
Epcho [6/8], Step [768/844], Loss: 0.2046
Epcho [7/8], Step [256/844], Loss: 0.1257
Epcho [7/8], Step [512/844], Loss: 0.3649
Epcho [7/8]

[I 2024-11-07 17:32:38,466] Trial 6 finished with value: 0.9075 and parameters: {'hidden_size': 108, 'batch_size': 64, 'lr': 0.0601126226051278, 'momentum': 0.794311341290086, 'epochs': 8}. Best is trial 1 with value: 0.9466666666666667.
 70%|███████   | 7/10 [04:33<02:06, 42.27s/it]

acc:0.9075
Epcho [0/4], Step [256/844], Loss: 0.5187
Epcho [0/4], Step [512/844], Loss: 0.3345
Epcho [0/4], Step [768/844], Loss: 0.2243
Epcho [1/4], Step [256/844], Loss: 0.2330
Epcho [1/4], Step [512/844], Loss: 0.4510
Epcho [1/4], Step [768/844], Loss: 0.2642
Epcho [2/4], Step [256/844], Loss: 0.1962
Epcho [2/4], Step [512/844], Loss: 0.1550
Epcho [2/4], Step [768/844], Loss: 0.2110
Epcho [3/4], Step [256/844], Loss: 0.3027
Epcho [3/4], Step [512/844], Loss: 0.2255
Epcho [3/4], Step [768/844], Loss: 0.1527


[I 2024-11-07 17:33:05,954] Trial 7 finished with value: 0.9066666666666666 and parameters: {'hidden_size': 51, 'batch_size': 64, 'lr': 0.043069068294051355, 'momentum': 0.5138260989824033, 'epochs': 4}. Best is trial 1 with value: 0.9466666666666667.
 80%|████████  | 8/10 [05:00<01:15, 37.57s/it]

acc:0.9066666666666666
Epcho [0/10], Step [256/844], Loss: 0.7148
Epcho [0/10], Step [512/844], Loss: 0.5283
Epcho [0/10], Step [768/844], Loss: 0.2874
Epcho [1/10], Step [256/844], Loss: 0.2689
Epcho [1/10], Step [512/844], Loss: 0.4881
Epcho [1/10], Step [768/844], Loss: 0.3077
Epcho [2/10], Step [256/844], Loss: 0.1962
Epcho [2/10], Step [512/844], Loss: 0.3512
Epcho [2/10], Step [768/844], Loss: 0.3501
Epcho [3/10], Step [256/844], Loss: 0.3353
Epcho [3/10], Step [512/844], Loss: 0.3423
Epcho [3/10], Step [768/844], Loss: 0.2761
Epcho [4/10], Step [256/844], Loss: 0.2394
Epcho [4/10], Step [512/844], Loss: 0.2994
Epcho [4/10], Step [768/844], Loss: 0.3579
Epcho [5/10], Step [256/844], Loss: 0.2878
Epcho [5/10], Step [512/844], Loss: 0.3021
Epcho [5/10], Step [768/844], Loss: 0.2282
Epcho [6/10], Step [256/844], Loss: 0.4267
Epcho [6/10], Step [512/844], Loss: 0.2937
Epcho [6/10], Step [768/844], Loss: 0.3970
Epcho [7/10], Step [256/844], Loss: 0.3169
Epcho [7/10], Step [512/844], L

[I 2024-11-07 17:34:14,219] Trial 8 finished with value: 0.9376666666666666 and parameters: {'hidden_size': 52, 'batch_size': 64, 'lr': 0.0030028901572230005, 'momentum': 0.7941979293066601, 'epochs': 10}. Best is trial 1 with value: 0.9466666666666667.
 90%|█████████ | 9/10 [06:09<00:47, 47.16s/it]

acc:0.9376666666666666
Epcho [0/4], Step [256/844], Loss: 0.2099
Epcho [0/4], Step [512/844], Loss: 0.3566
Epcho [0/4], Step [768/844], Loss: 0.1543
Epcho [1/4], Step [256/844], Loss: 0.2817
Epcho [1/4], Step [512/844], Loss: 0.3819
Epcho [1/4], Step [768/844], Loss: 0.3301
Epcho [2/4], Step [256/844], Loss: 0.2877
Epcho [2/4], Step [512/844], Loss: 0.4215
Epcho [2/4], Step [768/844], Loss: 0.2570
Epcho [3/4], Step [256/844], Loss: 0.3878
Epcho [3/4], Step [512/844], Loss: 0.2558
Epcho [3/4], Step [768/844], Loss: 0.3839


[I 2024-11-07 17:34:41,483] Trial 9 finished with value: 0.9215 and parameters: {'hidden_size': 85, 'batch_size': 64, 'lr': 0.05818322257556641, 'momentum': 0.7642612519505405, 'epochs': 4}. Best is trial 1 with value: 0.9466666666666667.
100%|██████████| 10/10 [06:36<00:00, 41.02s/it]

acc:0.9215
time: 6min 36s (started: 2024-11-07 17:28:05 +08:00)


In [11]:
#固定参数
input_size = 28 * 28
num_classes = 10
batch_size = 64
#固定掉超参数
lr= 0.05818322257556641
hidden_size=85
momentum=0.7642612519505405
epochs=4

time: 1.8 ms (started: 2024-11-07 17:35:42 +08:00)


In [12]:
#使用完整训练集训练并测试pytorch版
import torch.nn as nn
train_loader_full = torch.utils.data.DataLoader(train_data_full, batch_size=batch_size, shuffle=True)
layer1=nn.Linear(input_size, hidden_size)
init.kaiming_normal_(layer1.weight)
layer2=nn.Linear(hidden_size, num_classes)
init.kaiming_normal_(layer2.weight)
model=torch.nn.Sequential(layer1,nn.ReLU(),layer2,nn.LogSoftmax(dim=1)).to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)
criterion = nn.NLLLoss()

for epoch in range(epochs):
    model.train()
    for images, labels in train_loader_full:
        images = images.view(-1, 28 * 28).to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

Epoch [1/4], Loss: 0.0818
Epoch [2/4], Loss: 0.2056
Epoch [3/4], Loss: 0.1554
Epoch [4/4], Loss: 0.0102
time: 29.3 s (started: 2024-11-07 17:35:44 +08:00)


In [13]:
#使用测试集测试
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False)
acc=val_eval(model,test_loader)
print(f"final acc:{acc}")

acc:0.9583
final acc:0.9583
time: 1.11 s (started: 2024-11-07 17:36:16 +08:00)


In [45]:
#基于myTorch进行构建
#基于myTorch的dataset导入数据
import MyTorch.Dataloader
import numpy as np
from torchvision import datasets,transforms
from PIL  import Image
import importlib
importlib.reload(MyTorch.Dataloader)
importlib.reload(MyTorch.my_nn)
importlib.reload(MyTorch.loss_func)
importlib.reload(MyTorch.myTensor)
#把dataset转为numpy的形式
class trans2normalizedNumpy(object):
    def __call__(self, img):
        np_img=np.array(img)
        np_img_flat=np_img.flatten()
        #归一化到
        np_img_flat = np_img_flat.astype(float)
        np_img_flat/=255
        return np_img_flat
myTensor_transforms = trans2normalizedNumpy()
train_data_full = datasets.MNIST(data_dir, train=True, download=True, transform=myTensor_transforms)
test_data = datasets.MNIST(data_dir, train=False, download=True, transform=myTensor_transforms)

train_dataloader = MyTorch.Dataloader.DataLoader(train_data_full, batch_size=batch_size, shuffle=True)
test_data_loader = MyTorch.Dataloader.DataLoader(test_data, batch_size=batch_size, shuffle=False)

time: 57 ms (started: 2024-11-07 18:14:47 +08:00)


In [46]:
from MyTorch import my_nn
import MyTorch.loss_func
import importlib
importlib.reload(MyTorch)
#构建model
model=my_nn.Sequential(
    my_nn.MyLinearLayer(input_size, hidden_size,initial_policy="He"),
    my_nn.ReLU(),
    my_nn.MyLinearLayer(hidden_size, num_classes,initial_policy="He"),
    my_nn.LogSoftmax(dim=1)
)

# optimizer = MyTorch.optim.SGD(model.parameters, lr=lr, momentum=momentum)
optimizer=MyTorch.optim.Adam(model.parameters,lr=lr)
criterion = MyTorch.loss_func.NLLLoss()
epochs=10
for epoch in range(epochs):
    for images, labels in train_dataloader:
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.data.astype(float):.4f}')

Epoch [1/10], Loss: 0.5016
Epoch [2/10], Loss: 0.4083
Epoch [3/10], Loss: 0.6258
Epoch [4/10], Loss: 0.4067
Epoch [5/10], Loss: 0.3322
Epoch [6/10], Loss: 0.5318
Epoch [7/10], Loss: 0.5812
Epoch [8/10], Loss: 0.4305
Epoch [9/10], Loss: 0.6970
Epoch [10/10], Loss: 0.5107
time: 3min 45s (started: 2024-11-07 18:14:49 +08:00)


In [47]:
#使用测试集测试
correct = 0
total = 0
for images, labels in test_data_loader:
    outputs = model(images)
    predicted = np.argmax(outputs.data, axis=1,keepdims=True)
    total += len(labels.data)
    correct += (predicted == labels.data).sum()
acc=correct/total
print(f"final acc:{acc}")

final acc:0.8581730769230769
time: 781 ms (started: 2024-11-07 18:18:37 +08:00)
