In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms
#导入torch中的dataset模块
from torch.utils.data import Dataset
from tqdm import tqdm
from tqdm import trange
import tensorboardX

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),
    nn.Linear(120, 84), nn.Sigmoid(),
    nn.Linear(84, 10))


In [4]:
X = torch.rand(size=(2, 1, 28, 28), dtype=torch.float32)
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__,'output shape: \t',X.shape)

Conv2d output shape: 	 torch.Size([2, 6, 28, 28])
Sigmoid output shape: 	 torch.Size([2, 6, 28, 28])
AvgPool2d output shape: 	 torch.Size([2, 6, 14, 14])
Conv2d output shape: 	 torch.Size([2, 16, 10, 10])
Sigmoid output shape: 	 torch.Size([2, 16, 10, 10])
AvgPool2d output shape: 	 torch.Size([2, 16, 5, 5])
Flatten output shape: 	 torch.Size([2, 400])
Linear output shape: 	 torch.Size([2, 120])
Sigmoid output shape: 	 torch.Size([2, 120])
Linear output shape: 	 torch.Size([2, 84])
Sigmoid output shape: 	 torch.Size([2, 84])
Linear output shape: 	 torch.Size([2, 10])


In [5]:
train_loader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('data/', train=True, download=False,
                               transform=torchvision.transforms.Compose([
                                   torchvision.transforms.ToTensor(),
                                   torchvision.transforms.Normalize(
                                       (0.1307,), (0.3081,))
                               ])),
    batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('data/', train=False, download=False,
                               transform=torchvision.transforms.Compose([
                                   torchvision.transforms.ToTensor(),
                                   torchvision.transforms.Normalize(
                                       (0.1307,), (0.3081,))
                               ])),
    batch_size=64, shuffle=True)

In [6]:
#训练模型
def train(net, train_loader, optimizer, device, num_epochs):
    net = net.to(device)
    print("training on ", device)
    loss = torch.nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in tqdm(train_loader):
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
        print('epoch %d, loss %.4f, train acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n))

In [7]:
#测试模型
def test(net, test_loader, device):
    net.eval()
    print("testing on ", device)
    acc_sum, n = 0.0, 0
    for X, y in tqdm(test_loader):
        X = X.to(device)
        y = y.to(device)
        acc_sum += (net(X).argmax(dim=1) == y).sum().cpu().item()
        n += y.shape[0]
    print('test acc %.3f' % (acc_sum / n))

In [8]:
train(net, train_loader, torch.optim.SGD(net.parameters(), lr=0.1), 'cuda', 10)
#train(net, train_loader, torch.optim.Adam(net.parameters(), lr=0.01), 'cuda', 10)

training on  cuda


100%|██████████| 938/938 [00:17<00:00, 55.03it/s]


epoch 1, loss 0.0361, train acc 0.102


100%|██████████| 938/938 [00:11<00:00, 80.58it/s]


epoch 2, loss 0.0361, train acc 0.106


100%|██████████| 938/938 [00:10<00:00, 86.44it/s]


epoch 3, loss 0.0360, train acc 0.106


100%|██████████| 938/938 [00:11<00:00, 82.94it/s]


epoch 4, loss 0.0360, train acc 0.110


100%|██████████| 938/938 [00:11<00:00, 82.75it/s]


epoch 5, loss 0.0359, train acc 0.119


100%|██████████| 938/938 [00:12<00:00, 77.73it/s]


epoch 6, loss 0.0321, train acc 0.268


100%|██████████| 938/938 [00:11<00:00, 81.17it/s]


epoch 7, loss 0.0164, train acc 0.644


100%|██████████| 938/938 [00:11<00:00, 81.85it/s]


epoch 8, loss 0.0103, train acc 0.791


100%|██████████| 938/938 [00:11<00:00, 81.91it/s]


epoch 9, loss 0.0069, train acc 0.871


100%|██████████| 938/938 [00:11<00:00, 79.76it/s]

epoch 10, loss 0.0052, train acc 0.901





In [9]:
test(net, test_loader, 'cuda') #测试模型 

testing on  cuda


100%|██████████| 157/157 [00:01<00:00, 100.65it/s]

test acc 0.913



