In [1]:
%matplotlib inline
from IPython import display
import matplotlib.pyplot as plt

import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
import time

import sys
sys.path.append("../")
import d2lzh1981 as d2l

from tqdm import tqdm

print(torch.__version__)
print(torchvision.__version__)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

1.3.1
0.4.2


In [4]:
mnist_train = torchvision.datasets.FashionMNIST(root='/Users/nick/Documents/dataset/FashionMNIST2065', 
                                                train=True, download=False)
mnist_test = torchvision.datasets.FashionMNIST(root='/Users/nick/Documents/dataset/FashionMNIST2065', 
                                               train=False, download=False)

In [10]:
num_id = 0
for x, y in mnist_train:
    if num_id % 1000 == 0:
        print(num_id)
    x.save("/Users/nick/Documents/dataset/FashionMNIST_img/train/{}_{}.png".format(y, num_id))
    num_id += 1


0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000
57000
58000
59000


In [11]:
num_id = 0
for x, y in mnist_test:
    if num_id % 1000 == 0:
        print(num_id)
    x.save("/Users/nick/Documents/dataset/FashionMNIST_img/test/{}_{}.png".format(y, num_id))
    num_id += 1


0
1000
2000
3000
4000
5000
6000
7000
8000
9000


In [2]:
mnist_train = torchvision.datasets.FashionMNIST(root='/Users/nick/Documents/dataset/FashionMNIST2065', 
                                                train=True, download=False, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='/Users/nick/Documents/dataset/FashionMNIST2065', 
                                               train=False, download=False, transform=transforms.ToTensor())

In [12]:
def vgg_block(num_convs, in_channels, out_channels): #卷积层个数，输入通道数，输出通道数
    blk = []
    for i in range(num_convs):
        if i == 0:
            blk.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        else:
            blk.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1))
        blk.append(nn.ReLU())
    blk.append(nn.MaxPool2d(kernel_size=2, stride=2)) # 这里会使宽高减半
    return nn.Sequential(*blk)

def vgg(conv_arch, fc_features, fc_hidden_units=4096):
    net = nn.Sequential()
    # 卷积层部分
    for i, (num_convs, in_channels, out_channels) in enumerate(conv_arch):
        # 每经过一个vgg_block都会使宽高减半
        net.add_module("vgg_block_" + str(i+1), vgg_block(num_convs, in_channels, out_channels))
    # 全连接层部分
    net.add_module("fc", nn.Sequential(d2l.FlattenLayer(),
                                 nn.Linear(fc_features, fc_hidden_units),
                                 nn.ReLU(),
                                 nn.Dropout(0.5),
                                 nn.Linear(fc_hidden_units, fc_hidden_units),
                                 nn.ReLU(),
                                 nn.Dropout(0.5),
                                 nn.Linear(fc_hidden_units, 10)
                                ))
    return net

def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        # 如果没指定device就使用net的device
        device = list(net.parameters())[0].device 
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(net, torch.nn.Module):
                net.eval() # 评估模式, 这会关闭dropout
                acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                net.train() # 改回训练模式
            else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
                if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
                    # 将is_training设置成False
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() 
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() 
            n += y.shape[0]
    return acc_sum / n

In [4]:
batch_size = 100

if sys.platform.startswith('win'):
    num_workers = 0
else:
    num_workers = 4
    
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, 
                                         shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, 
                                        shuffle=False, num_workers=num_workers)

In [5]:
conv_arch = ((1, 1, 64), (1, 64, 128))
# 经过5个vgg_block, 宽高会减半5次, 变成 224/32 = 7
fc_features = 128 * 7 * 7 # c * w * h
fc_hidden_units = 4096 # 任意

# ratio = 8
# small_conv_arch = [(1, 1, 64//ratio), (1, 64//ratio, 128//ratio), (2, 128//ratio, 256//ratio), 
#                    (2, 256//ratio, 512//ratio), (2, 512//ratio, 512//ratio)]
# net = vgg(small_conv_arch, fc_features // ratio, fc_hidden_units // ratio)

In [13]:
net = vgg(conv_arch, fc_features, fc_hidden_units)

lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

net = net.to(device)
print("training on ", device)
loss = torch.nn.CrossEntropyLoss()

training on  cpu


In [16]:
for epoch in range(num_epochs):
    train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
    for X, y in tqdm(train_iter):
        X = X.to(device)
        y = y.to(device)
        y_hat = net(X)
        l = loss(y_hat, y)
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        train_l_sum += l.cpu().item()
        train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
        n += y.shape[0]
        batch_count += 1
    test_acc = evaluate_accuracy(test_iter, net)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
          % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))



  0%|          | 0/600 [00:00<?, ?it/s][A[A

  0%|          | 1/600 [00:01<15:08,  1.52s/it][A[A

  0%|          | 2/600 [00:02<12:29,  1.25s/it][A[A

  0%|          | 3/600 [00:02<10:43,  1.08s/it][A[A

  1%|          | 4/600 [00:03<09:23,  1.06it/s][A[A

  1%|          | 5/600 [00:04<08:26,  1.17it/s][A[A

  1%|          | 6/600 [00:04<07:46,  1.27it/s][A[A

  1%|          | 7/600 [00:05<07:18,  1.35it/s][A[A

  1%|▏         | 8/600 [00:06<07:01,  1.41it/s][A[A

  2%|▏         | 9/600 [00:06<06:48,  1.45it/s][A[A

  2%|▏         | 10/600 [00:07<06:36,  1.49it/s][A[A

  2%|▏         | 11/600 [00:07<06:31,  1.50it/s][A[A

  2%|▏         | 12/600 [00:08<06:25,  1.53it/s][A[A

  2%|▏         | 13/600 [00:09<06:19,  1.55it/s][A[A

  2%|▏         | 14/600 [00:09<06:17,  1.55it/s][A[A

  2%|▎         | 15/600 [00:10<06:13,  1.57it/s][A[A

  3%|▎         | 16/600 [00:11<06:12,  1.57it/s][A[A

  3%|▎         | 17/600 [00:11<06:12,  1.56it/s][A[A

  3%|▎  

 48%|████▊     | 288/600 [03:18<03:55,  1.32it/s][A[A

 48%|████▊     | 289/600 [03:19<03:59,  1.30it/s][A[A

 48%|████▊     | 290/600 [03:19<04:00,  1.29it/s][A[A

 48%|████▊     | 291/600 [03:20<03:56,  1.30it/s][A[A

 49%|████▊     | 292/600 [03:21<03:48,  1.35it/s][A[A

 49%|████▉     | 293/600 [03:22<03:42,  1.38it/s][A[A

 49%|████▉     | 294/600 [03:22<03:37,  1.41it/s][A[A

 49%|████▉     | 295/600 [03:23<03:35,  1.41it/s][A[A

 49%|████▉     | 296/600 [03:24<03:32,  1.43it/s][A[A

 50%|████▉     | 297/600 [03:24<03:34,  1.41it/s][A[A

 50%|████▉     | 298/600 [03:25<03:32,  1.42it/s][A[A

 50%|████▉     | 299/600 [03:26<03:32,  1.42it/s][A[A

 50%|█████     | 300/600 [03:26<03:30,  1.42it/s][A[A

 50%|█████     | 301/600 [03:27<03:28,  1.44it/s][A[A

 50%|█████     | 302/600 [03:28<03:30,  1.41it/s][A[A

 50%|█████     | 303/600 [03:28<03:25,  1.45it/s][A[A

 51%|█████     | 304/600 [03:29<03:28,  1.42it/s][A[A

 51%|█████     | 305/600 [03:30

 96%|█████████▌| 574/600 [06:28<00:17,  1.48it/s][A[A

 96%|█████████▌| 575/600 [06:28<00:17,  1.45it/s][A[A

 96%|█████████▌| 576/600 [06:29<00:16,  1.46it/s][A[A

 96%|█████████▌| 577/600 [06:30<00:15,  1.48it/s][A[A

 96%|█████████▋| 578/600 [06:30<00:14,  1.50it/s][A[A

 96%|█████████▋| 579/600 [06:31<00:14,  1.47it/s][A[A

 97%|█████████▋| 580/600 [06:32<00:13,  1.45it/s][A[A

 97%|█████████▋| 581/600 [06:32<00:12,  1.47it/s][A[A

 97%|█████████▋| 582/600 [06:33<00:12,  1.44it/s][A[A

 97%|█████████▋| 583/600 [06:34<00:11,  1.45it/s][A[A

 97%|█████████▋| 584/600 [06:34<00:11,  1.44it/s][A[A

 98%|█████████▊| 585/600 [06:35<00:10,  1.40it/s][A[A

 98%|█████████▊| 586/600 [06:36<00:10,  1.37it/s][A[A

 98%|█████████▊| 587/600 [06:37<00:09,  1.36it/s][A[A

 98%|█████████▊| 588/600 [06:37<00:08,  1.37it/s][A[A

 98%|█████████▊| 589/600 [06:38<00:08,  1.36it/s][A[A

 98%|█████████▊| 590/600 [06:39<00:07,  1.35it/s][A[A

 98%|█████████▊| 591/600 [06:40

epoch 1, loss 0.4186, train acc 0.845, test acc 0.901, time 475.2 sec




  0%|          | 1/600 [00:01<15:59,  1.60s/it][A[A

  0%|          | 2/600 [00:02<13:35,  1.36s/it][A[A

  0%|          | 3/600 [00:03<11:59,  1.21s/it][A[A

  1%|          | 4/600 [00:04<10:40,  1.08s/it][A[A

  1%|          | 5/600 [00:04<09:53,  1.00it/s][A[A

  1%|          | 6/600 [00:05<09:22,  1.06it/s][A[A

  1%|          | 7/600 [00:06<08:54,  1.11it/s][A[A

  1%|▏         | 8/600 [00:07<08:37,  1.14it/s][A[A

  2%|▏         | 9/600 [00:08<08:27,  1.16it/s][A[A

  2%|▏         | 10/600 [00:08<08:21,  1.18it/s][A[A

  2%|▏         | 11/600 [00:09<08:26,  1.16it/s][A[A

  2%|▏         | 12/600 [00:10<08:22,  1.17it/s][A[A

  2%|▏         | 13/600 [00:11<08:15,  1.18it/s][A[A

  2%|▏         | 14/600 [00:12<08:10,  1.19it/s][A[A

  2%|▎         | 15/600 [00:13<08:27,  1.15it/s][A[A

  3%|▎         | 16/600 [00:14<08:41,  1.12it/s][A[A

  3%|▎         | 17/600 [00:15<08:53,  1.09it/s][A[A

  3%|▎         | 18/600 [00:16<08:52,  1.09it/s][A[A


 48%|████▊     | 289/600 [05:14<05:12,  1.00s/it][A[A

 48%|████▊     | 290/600 [05:15<05:11,  1.01s/it][A[A

 48%|████▊     | 291/600 [05:16<05:10,  1.01s/it][A[A

 49%|████▊     | 292/600 [05:17<05:13,  1.02s/it][A[A

 49%|████▉     | 293/600 [05:18<05:13,  1.02s/it][A[A

 49%|████▉     | 294/600 [05:19<05:16,  1.03s/it][A[A

 49%|████▉     | 295/600 [05:20<05:21,  1.05s/it][A[A

 49%|████▉     | 296/600 [05:21<05:20,  1.05s/it][A[A

 50%|████▉     | 297/600 [05:22<05:15,  1.04s/it][A[A

 50%|████▉     | 298/600 [05:23<05:14,  1.04s/it][A[A

 50%|████▉     | 299/600 [05:24<05:19,  1.06s/it][A[A

 50%|█████     | 300/600 [05:25<05:38,  1.13s/it][A[A

 50%|█████     | 301/600 [05:27<05:34,  1.12s/it][A[A

 50%|█████     | 302/600 [05:28<05:27,  1.10s/it][A[A

 50%|█████     | 303/600 [05:29<05:22,  1.09s/it][A[A

 51%|█████     | 304/600 [05:30<05:19,  1.08s/it][A[A

 51%|█████     | 305/600 [05:31<05:18,  1.08s/it][A[A

 51%|█████     | 306/600 [05:32

KeyboardInterrupt: 

In [9]:
test_acc = evaluate_accuracy(test_iter, net)
test_acc

0.8979666666666667

In [17]:
for X, y in train_iter:
    X = X.to(device)
    predict_y = net(X)
    print(y)
    print(predict_y.argmax(dim=1))
    break

# predict_y.argmax(dim=1)

tensor([0, 3, 3, 5, 7, 2, 6, 9, 6, 7, 9, 4, 6, 8, 8, 5, 9, 3, 5, 6, 4, 2, 3, 5,
        0, 6, 7, 6, 4, 6, 3, 9, 3, 4, 3, 0, 7, 5, 0, 0, 1, 4, 9, 7, 4, 6, 6, 9,
        6, 7, 3, 5, 3, 1, 2, 8, 0, 3, 6, 8, 7, 4, 6, 9, 4, 1, 0, 8, 6, 0, 2, 3,
        9, 2, 9, 5, 1, 2, 1, 6, 3, 8, 3, 3, 2, 3, 4, 8, 6, 7, 4, 7, 8, 6, 8, 4,
        7, 7, 1, 2])
tensor([6, 4, 3, 5, 7, 2, 4, 9, 6, 7, 9, 4, 4, 8, 8, 5, 9, 3, 5, 6, 4, 4, 3, 5,
        0, 6, 7, 4, 4, 3, 3, 9, 3, 4, 3, 2, 7, 5, 0, 0, 1, 4, 9, 7, 4, 6, 6, 9,
        0, 7, 3, 5, 3, 1, 2, 8, 6, 3, 6, 8, 9, 4, 6, 9, 4, 1, 0, 8, 6, 6, 6, 3,
        9, 2, 9, 5, 1, 2, 1, 4, 3, 8, 3, 3, 2, 3, 4, 8, 6, 7, 4, 7, 8, 6, 8, 4,
        7, 7, 1, 2])
