In [None]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict
import torch
import numpy as np
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from d2l import torch as d2l
from torchvision import  transforms


# data = unpickle('./CIFAR10/cifar-10-batches-py/test_batch')
#
# data[b'data'][0] # array([158, 159, 165, ..., 124, 129, 110], dtype=uint8)

In [10]:
from torch import nn
class Residual(nn.Module):
    # 残差块
    def __init__(self, input_channels, num_channels,
                 use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels,
                               kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.Conv2d(num_channels, num_channels,
                               kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, num_channels,
                                   kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)


# ResNet-18
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                   nn.BatchNorm2d(64), nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))


def resnet_block(input_channels, num_channels, num_residuals,
                 first_block=False):
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(input_channels, num_channels,
                                use_1x1conv=True, strides=2))
        else:
            blk.append(Residual(num_channels, num_channels))
    return blk


b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
b3 = nn.Sequential(*resnet_block(64, 128, 2))
b4 = nn.Sequential(*resnet_block(128, 256, 2))
b5 = nn.Sequential(*resnet_block(256, 512, 2))

net = nn.Sequential(b1, b2, b3, b4, b5,                     #Squential是个有序的容器，网络层将按照传入该容器的顺序依次加入，用[]来访问任意一层
                    nn.AdaptiveAvgPool2d((1, 1)),
                    nn.Flatten(), nn.Linear(512, 10))


In [19]:

# 下载并配置数据集
trans = transforms.Compose(
    [transforms.Resize((224,224)), transforms.ToTensor()])#和d2l不同 重塑为224
train_dataset = datasets.FashionMNIST( root=r'./datebase/',
     train=True, transform=trans, download=True)
test_dataset = datasets.FashionMNIST(
    root=r'./datebase/', train=False, transform=trans, download=True)

# 配置数据加载器
# batch_size = 64
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
                         batch_size=64, shuffle=True)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./datebase/FashionMNIST\raw\train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting ./datebase/FashionMNIST\raw\train-images-idx3-ubyte.gz to ./datebase/FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./datebase/FashionMNIST\raw\train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting ./datebase/FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./datebase/FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./datebase/FashionMNIST\raw\t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting ./datebase/FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./datebase/FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./datebase/FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting ./datebase/FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./datebase/FashionMNIST\raw



In [21]:
def train(net, train_iter, test_iter, epochs, lr, device):
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            # nn.init.xavier_uniform_(m.weight )#80.87%
            nn.init.kaiming_uniform_(m.weight)#
    net.apply(init_weights)

    # net.load_state_dict(torch.load("./resnet18-f37072fd.pth"),strict=False)

    print(f'Training on:[{device}]')
    net.to(device)#.to(device) 可以指定CPU 或者GPU
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    timer, num_batches = d2l.Timer(), len(train_iter)
    for epoch in range(epochs):
        # 训练损失之和，训练准确率之和，样本数
        metric = d2l.Accumulator(3)
        net.train()#在使用 pytorch 构建神经网络的时候，训练过程中会在程序上方添加一句model.train()，作用是 启用 batch normalization 和 dropout 。
        for i, (X, y) in enumerate(train_iter):
            timer.start()
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            timer.stop()
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
            if (i + 1) % (num_batches // 30) == 0 or i == num_batches - 1:
                print(f'Epoch: {epoch+1}, Step: {i+1}, Loss: {train_l:.4f}')
        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        print(            f'Train Accuracy: {train_acc*100:.2f}%, Test Accuracy: {test_acc*100:.2f}%')
    print(f'{metric[2] * epochs / timer.sum():.1f} examples/sec '
          f'on: [{str(device)}]')
    torch.save(net.state_dict(),
               f".\\model\\\ResNet-18_CIFAR-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
    # torch.save(net,f".\\model\\net_CIFAR-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")


In [23]:
# 下载并配置数据集
trans = transforms.Compose(
    [transforms.Resize((224,224)), transforms.ToTensor()])#和d2l不同 重塑为224
train_dataset = datasets.FashionMNIST( root=r'./datebase/',
     train=True, transform=trans, download=True)
test_dataset = datasets.FashionMNIST(
    root=r'./datebase/', train=False, transform=trans, download=True)
# 配置数据加载器
# batch_size = 64
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
                         batch_size=64, shuffle=True)
epochs, lr = 20, 0.001#epochs,lr = 20, 0.001
device = torch.device("cuda:0")
train(net, train_loader, test_loader, epochs, lr, device)

Training on:[cuda:0]
Epoch: 1, Step: 31, Loss: 1.6733
Epoch: 1, Step: 62, Loss: 1.2108
Epoch: 1, Step: 93, Loss: 1.0021
Epoch: 1, Step: 124, Loss: 0.8934
Epoch: 1, Step: 155, Loss: 0.8192
Epoch: 1, Step: 186, Loss: 0.7670
Epoch: 1, Step: 217, Loss: 0.7277
Epoch: 1, Step: 248, Loss: 0.6888
Epoch: 1, Step: 279, Loss: 0.6638
Epoch: 1, Step: 310, Loss: 0.6365
Epoch: 1, Step: 341, Loss: 0.6182
Epoch: 1, Step: 372, Loss: 0.6006
Epoch: 1, Step: 403, Loss: 0.5815
Epoch: 1, Step: 434, Loss: 0.5674
Epoch: 1, Step: 465, Loss: 0.5527
Epoch: 1, Step: 496, Loss: 0.5393
Epoch: 1, Step: 527, Loss: 0.5276
Epoch: 1, Step: 558, Loss: 0.5158
Epoch: 1, Step: 589, Loss: 0.5050
Epoch: 1, Step: 620, Loss: 0.4947
Epoch: 1, Step: 651, Loss: 0.4845
Epoch: 1, Step: 682, Loss: 0.4773
Epoch: 1, Step: 713, Loss: 0.4716
Epoch: 1, Step: 744, Loss: 0.4654
Epoch: 1, Step: 775, Loss: 0.4597
Epoch: 1, Step: 806, Loss: 0.4528
Epoch: 1, Step: 837, Loss: 0.4466
Epoch: 1, Step: 868, Loss: 0.4414
Epoch: 1, Step: 899, Loss: 0.4

RuntimeError: Parent directory .\model\ does not exist.

In [14]:
lr, num_epochs, batch_size = 0.05, 10, 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())