In [28]:
import gzip
import pickle
import torch
from matplotlib import pyplot as plt
# 打开文件
with gzip.open('../data/mnist/mnist.pkl.gz', 'rb') as f:
    # 使用pickle加载数据
    ((x_train,y_train),(x_valid,y_valid), test_set) = pickle.load(f, encoding='latin1')

In [29]:
(x_train,y_train,x_valid,y_valid) = map(torch.tensor, (x_train,y_train,x_valid,y_valid))

In [30]:
x_train.shape,y_train.shape,x_valid.shape,y_valid.shape

(torch.Size([50000, 784]),
 torch.Size([50000]),
 torch.Size([10000, 784]),
 torch.Size([10000]))

In [31]:
from torch import nn
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28,256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.out = nn.Linear(64, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        """
        前向传播
        :param x:【64 X 28 X 28】
        :return:
        """
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.relu(self.fc3(x))
        x = self.out(x)
        return x

In [32]:
model = Net()
model

Net(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=64, bias=True)
  (out): Linear(in_features=64, out_features=10, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
  (softmax): Softmax(dim=1)
)

In [33]:
for name, para in model.named_parameters():
    print(name,para.shape)

fc1.weight torch.Size([256, 784])
fc1.bias torch.Size([256])
fc2.weight torch.Size([128, 256])
fc2.bias torch.Size([128])
fc3.weight torch.Size([64, 128])
fc3.bias torch.Size([64])
out.weight torch.Size([10, 64])
out.bias torch.Size([10])


In [34]:
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
bs = 64
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)

valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(valid_ds, batch_size=bs * 2)

In [35]:
from torch import optim
"定义激活函数"
optim = optim.Adam(model.parameters(),lr=0.001)

In [36]:
import torch.nn.functional as F
loss_func = F.cross_entropy
for epoch in range(20):
    for batch_idx, (x, y) in enumerate(train_dl):
        output = model(x)
        # 计算损失rsdf
        loss = loss_func(output, y)
        # 梯度下降
        loss.backward()
        optim.step()
        optim.zero_grad()
        if batch_idx % 100 == 0:
            print(f'Epoch {epoch}, Batch {batch_idx}, Loss: {loss.item()}')
    total_num,total_cor = 0,0
    for x,y in valid_dl:
        output = model(x)
        _, predicted = torch.max(output.data, 1)  # 获取预测的类别
        total_num += y.size(0)  # 累加样本总数
        total_cor += (predicted == y).sum().item()  # 累加预测正确的样本数
    print(f'Accuracy: {total_cor/total_num*100:.2f}%')
        # 计算准确率


Epoch 0, Batch 0, Loss: 2.303443431854248
Epoch 0, Batch 100, Loss: 0.6949055790901184
Epoch 0, Batch 200, Loss: 0.5451064109802246
Epoch 0, Batch 300, Loss: 0.31943637132644653
Epoch 0, Batch 400, Loss: 0.24207109212875366
Epoch 0, Batch 500, Loss: 0.21811063587665558
Epoch 0, Batch 600, Loss: 0.40940171480178833
Epoch 0, Batch 700, Loss: 0.44048959016799927
Accuracy: 92.52%
Epoch 1, Batch 0, Loss: 0.18826018273830414
Epoch 1, Batch 100, Loss: 0.2606894075870514
Epoch 1, Batch 200, Loss: 0.435675710439682
Epoch 1, Batch 300, Loss: 0.21994651854038239
Epoch 1, Batch 400, Loss: 0.20616388320922852
Epoch 1, Batch 500, Loss: 0.4820975959300995
Epoch 1, Batch 600, Loss: 0.08763349801301956
Epoch 1, Batch 700, Loss: 0.157307967543602
Accuracy: 94.29%
Epoch 2, Batch 0, Loss: 0.2292371541261673
Epoch 2, Batch 100, Loss: 0.24579384922981262
Epoch 2, Batch 200, Loss: 0.30122891068458557
Epoch 2, Batch 300, Loss: 0.13454151153564453
Epoch 2, Batch 400, Loss: 0.23077796399593353
Epoch 2, Batch 50