In [19]:
%matplotlib inline
from IPython import display
import matplotlib.pyplot as plt

import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
import time

import sys
sys.path.append("../")
import d2lzh1981 as d2l

from tqdm import tqdm

print(torch.__version__)
print(torchvision.__version__)

1.3.1
0.4.2


In [8]:
mnist_train = torchvision.datasets.FashionMNIST(root='/Users/nick/Documents/dataset/FashionMNIST2065', train=True, download=False, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='/Users/nick/Documents/dataset/FashionMNIST2065', train=False, download=False, transform=transforms.ToTensor())

In [23]:
batch_size = 100

if sys.platform.startswith('win'):
    num_workers = 0
else:
    num_workers = 4
    
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=False, num_workers=num_workers)

In [21]:
class FlattenLayer(torch.nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x): # x shape: (batch, *, *, ...)
        return x.view(x.shape[0], -1)

def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        # 如果没指定device就使用net的device
        device = list(net.parameters())[0].device 
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(net, torch.nn.Module):
                net.eval() # 评估模式, 这会关闭dropout
                acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                net.train() # 改回训练模式
            else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
                if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
                    # 将is_training设置成False
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() 
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() 
            n += y.shape[0]
    return acc_sum / n

In [16]:
num_inputs, num_outputs, num_hiddens = 784, 10, 256
    
net = nn.Sequential(
        FlattenLayer(),
        nn.Linear(num_inputs, num_hiddens),
        nn.ReLU(),
        nn.Linear(num_hiddens, num_outputs), 
        )
    
for params in net.parameters():
    nn.init.normal_(params, mean=0, std=0.01)

In [17]:
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)

In [24]:
num_epochs = 5

for epoch in range(num_epochs):
    train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
    for X, y in tqdm(train_iter):
        y_hat = net(X)
        l = loss(y_hat, y).sum()

        # 梯度清零
        if optimizer is not None:
            optimizer.zero_grad()
        elif params is not None and params[0].grad is not None:
            for param in params:
                param.grad.data.zero_()

        l.backward()
        if optimizer is None:
            sgd(params, lr, batch_size)
        else:
            optimizer.step()  # “softmax回归的简洁实现”一节将用到


        train_l_sum += l.item()
        train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
        n += y.shape[0]
    test_acc = evaluate_accuracy(test_iter, net)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
          % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

100%|██████████| 600/600 [00:02<00:00, 281.09it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

epoch 1, loss 0.0039, train acc 0.855, test acc 0.873


100%|██████████| 600/600 [00:02<00:00, 289.78it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

epoch 2, loss 0.0035, train acc 0.869, test acc 0.842


100%|██████████| 600/600 [00:02<00:00, 261.83it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

epoch 3, loss 0.0033, train acc 0.877, test acc 0.857


100%|██████████| 600/600 [00:02<00:00, 270.33it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

epoch 4, loss 0.0031, train acc 0.884, test acc 0.871


100%|██████████| 600/600 [00:02<00:00, 274.71it/s]


epoch 5, loss 0.0030, train acc 0.889, test acc 0.894


In [29]:
for X, y in test_iter:
    s = net(X).argmax(dim=1)
    break
s

tensor([9, 0, 0, 3, 0, 2, 7, 2, 5, 5, 0, 9, 5, 5, 7, 9, 1, 0, 6, 4, 3, 1, 4, 8,
        2, 3, 0, 2, 4, 4, 5, 3, 4, 6, 0, 8, 5, 2, 1, 6, 6, 7, 9, 5, 9, 2, 7, 3,
        0, 3, 3, 3, 7, 2, 2, 0, 0, 8, 3, 3, 5, 6, 5, 5, 0, 2, 0, 0, 4, 1, 0, 1,
        6, 3, 1, 4, 4, 6, 1, 5, 1, 3, 5, 7, 9, 7, 1, 7, 9, 7, 7, 3, 2, 9, 3, 6,
        6, 1, 1, 8])