In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision as tv
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch import optim

%matplotlib inline

In [2]:
train = tv.datasets.MNIST('/Users/lostleaf/dev/pytorch-data/mnist',
                          train=True,
                          download=True,
                          transform=transforms.ToTensor())
train_set = torch.utils.data.DataLoader(train, batch_size=32, shuffle=True)

test = tv.datasets.MNIST('/Users/lostleaf/dev/pytorch-data/mnist',
                          train=False,
                          download=True,
                          transform=transforms.ToTensor())
test_set = torch.utils.data.DataLoader(test, batch_size=32, shuffle=True)

In [3]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)
params = list()
for n, p in net.named_parameters():
    print(n, p.size())
    

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
conv1.weight torch.Size([6, 1, 3, 3])
conv1.bias torch.Size([6])
conv2.weight torch.Size([16, 6, 3, 3])
conv2.bias torch.Size([16])
fc1.weight torch.Size([120, 400])
fc1.bias torch.Size([120])
fc2.weight torch.Size([84, 120])
fc2.bias torch.Size([84])
fc3.weight torch.Size([10, 84])
fc3.bias torch.Size([10])


In [28]:
criterion = nn.CrossEntropyLoss() # 交叉熵损失函数
optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.5)

for epoch in range(20):  
    
    running_loss, num = 0.0, 0
    for i, data in enumerate(train_set, 0):
        
        # 输入数据
        inputs, labels = data
        
        # 梯度清零
        optimizer.zero_grad()
        
        # forward + backward 
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()   
        
        # 更新参数 
        optimizer.step()
        
        # 打印log信息
        running_loss += loss.item()
        num += inputs.size(0)

    print('Epoch %d Loss: %f' % (epoch + 1, running_loss / num))
print('Finished Training')

Epoch 1 Loss: 0.000007
Epoch 2 Loss: 0.000007
Epoch 3 Loss: 0.000007
Epoch 4 Loss: 0.000007
Epoch 5 Loss: 0.000006
Epoch 6 Loss: 0.000006
Epoch 7 Loss: 0.000006
Epoch 8 Loss: 0.000006
Epoch 9 Loss: 0.000006
Epoch 10 Loss: 0.000006
Epoch 11 Loss: 0.000006
Epoch 12 Loss: 0.000005
Epoch 13 Loss: 0.000005
Epoch 14 Loss: 0.000005
Epoch 15 Loss: 0.000005
Epoch 16 Loss: 0.000005
Epoch 17 Loss: 0.000005
Epoch 18 Loss: 0.000005
Epoch 19 Loss: 0.000005
Epoch 20 Loss: 0.000005
Finished Training


In [29]:
correct, total = 0, 0
with torch.no_grad():
    for inputs, labels in test_set:
        logits = net(inputs)
        _, pred = torch.max(logits, 1)
        total += len(labels)
        correct += torch.sum(labels == pred).item()
    print(correct, total)
print("Accuracy: ", correct / total)

9911 10000
Accuracy:  0.9911
