<a href="https://colab.research.google.com/github/mengcius/pytorch-learn/blob/master/13_ResNet10%E5%AE%9E%E6%88%98CIFAR10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 13_ResNet10实战CIFAR10

### CIFAR10

In [0]:
import  torch
from    torch.utils.data import DataLoader
from    torchvision import datasets
from    torchvision import transforms

batchsz = 128

# 加载数据集
cifar_train = datasets.CIFAR10('cifar', True, transform=transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225])
]), download=True)
cifar_train = DataLoader(cifar_train, batch_size=batchsz, shuffle=True)

cifar_test = datasets.CIFAR10('cifar', False, transform=transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225])
]), download=True)
cifar_test = DataLoader(cifar_test, batch_size=batchsz, shuffle=True)

x, label = iter(cifar_train).next() # 一个batch
print('x:', x.shape, 'label:', label.shape) # x: torch.Size([32, 3, 32, 32]) label: torch.Size([32])

0it [00:00, ?it/s]

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to cifar/cifar-10-python.tar.gz


170500096it [00:02, 71928917.16it/s]                               


Files already downloaded and verified
x: torch.Size([128, 3, 32, 32]) label: torch.Size([128])


### MNIST

In [0]:
import  torch
from    torch.utils.data import DataLoader
from    torchvision import datasets
from    torchvision import transforms
from    torch import nn, optim

batch_size=200

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('mnist_data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.RandomHorizontalFlip(), # 水平翻转，Random是随机做
                       transforms.RandomVerticalFlip(), # 竖直翻转
                       transforms.RandomRotation(15), # 随机旋转-15~15度之间任意角度
                       transforms.RandomRotation([90, 270]), # 随机旋转这几个选项中的一个角度（只能有两个值）
                       transforms.Resize([32, 32]), # 缩放
                       transforms.RandomCrop([28, 28]), # 裁剪
                       transforms.ToTensor(), # numpy格式转为Tensor
                       # transforms.Normalize((0.1307,), (0.3081,)) # 归一化
                   ])),
    batch_size=batch_size, shuffle=True) # 一次加载batch_size张图片，随机打散

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('mnist_data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        # transforms.Normalize((0.1307,), (0.3081,))
    ])),
    batch_size=batch_size, shuffle=True)

x, label = iter(train_loader).next()
print('x:', x.shape, 'label:', label.shape) #x: torch.Size([200, 1, 28, 28]) label: torch.Size([200])

  0%|          | 16384/9912422 [00:00<01:10, 141179.65it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist_data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:00, 31513003.04it/s]                          


Extracting mnist_data/MNIST/raw/train-images-idx3-ubyte.gz


32768it [00:00, 462537.83it/s]
  2%|▏         | 40960/1648877 [00:00<00:04, 401402.56it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 8370367.83it/s]                          
8192it [00:00, 176438.13it/s]


Extracting mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Processing...
Done!
x: torch.Size([200, 1, 28, 28]) label: torch.Size([200])


### lenet5.py
10 loss: 0.799041211605072
10 test acc: 0.6468

30 loss: 0.38023799657821655
30 test acc: 0.6309

80 loss: 0.13082881271839142
80 test acc: 0.601

In [0]:
import  torch
from    torch import nn
from    torch.nn import functional as F


class Lenet5(nn.Module): # 继承自nn.Module
    """
    for cifar10 dataset.
    """
    def __init__(self): # 初始化
        super(Lenet5, self).__init__()

        # 卷积层部分
        self.conv_unit = nn.Sequential( # 把网络结构包在Sequential里，方便用各层类
            # x: [b, 3, 32, 32] =>
            nn.Conv2d(3, 6, kernel_size=5, stride=1, padding=0), # 输入3通道，输出16通道
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0), # 或AvgPool2d
            nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            # 输出=>[b, 16, 5, 5]
        )

        # 自定义fc unit实现全连接层（flatten打平后作为它的输入）
        self.fc_unit = nn.Sequential(
            nn.Linear(16*5*5, 120), # 16*5*5是卷积层输出的尺寸
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 10) # 10分类
        )

        # 计算卷积层输出的大小，以算出全连接层的输入大小
        tmp = torch.randn(2, 3, 32, 32) # [b, 3, 32, 32]
        out = self.conv_unit(tmp)
        print('conv out:', out.shape) # [2, 16, 5, 5]

        # 定义损失，可以在类外写，这里省略
        # self.criteon = nn.CrossEntropyLoss() # 交叉熵损失，分类问题，包含了softmax
        # self.criteon = nn.MSELoss() # 均方差损失，逼近、回归问题

    def forward(self, x): # 前向运算，反向传播不用自己写
        """
        :param x: [b, 3, 32, 32]
        :return:
        """
        batchsz = x.size(0) # 取batch size
        x = self.conv_unit(x) # 卷积层部分，[b, 3, 32, 32] => [b, 16, 5, 5]
        x = x.view(batchsz, 16*5*5) # 展平，[b, 16, 5, 5] => [b, 16*5*5]
        logits = self.fc_unit(x) # 全连接层部分，[b, 16*5*5] => [b, 10] 

        # 计算损失，可以在类外写，这里省略，输入[b, 10]
        # pred = F.softmax(logits, dim=1) # 因有了CrossEntropyLoss免写，在10的维度上做softmax
        # loss = self.criteon(logits, y) # 计算损失

        return logits # 返回


def main():

    net = Lenet5() # 实例化Lenet5对象

    tmp = torch.randn(2, 3, 32, 32)
    out = net(tmp)
    print('lenet out:', out.shape) # 计算整个网路输出的大小，[2, 10]


if __name__ == '__main__':
    main()

conv out: torch.Size([2, 16, 5, 5])
lenet out: torch.Size([2, 10])


### retnet.py

ResNet10(ResNet18的缩减版)

残差块的通道按照论文上安排时，效果更好。但尺寸好像越大越好，可能cifar尺寸太小。

1、正常残差块训练：

10 loss: 0.1671239137649536
10 test acc: 0.7388

25 loss: 0.0216824971139431
25 test acc: 0.7451

80 loss: 0.0015973985427990556
80 test acc: 0.7414

100 loss: 0.0008557319524697959
100 test acc: 0.7452

2、残差块尺寸修改，self.blk4 = ResBlk(256, 512, stride=1)：

9 loss: 0.16980847716331482
9 test acc: 0.7335

31 loss: 0.03963826224207878
31 test acc: 0.7496

39 loss: 0.006199670024216175
39 test acc: 0.7516

100 loss: 0.00445919344201684
100 test acc: 0.7539

3、正常残差块+数据增强（裁剪和水平翻转）：

11 loss: 0.6354655623435974
11 test acc: 0.8089

30 loss: 0.22230911254882812
30 test acc: 0.8447

50 loss: 0.18418802320957184
50 test acc: 0.8512

4、残差块尺寸全修改为stride=1+数据增强（裁剪和水平翻转）:

11 loss: 0.5003647804260254
11 test acc: 0.8295

16 loss: 0.5709465742111206
16 test acc: 0.8421

22 loss: 0.24603290855884552
22 test acc: 0.8581

31 loss: 0.26374727487564087
31 test acc: 0.8662

50 loss: 0.1871185153722763
50 test acc: 0.8702


In [0]:
import  torch
from    torch import  nn
from    torch.nn import functional as F


# 残差基本单元BasicBlock（适用于浅层网络，中间有两层卷积层）
class ResBlk(nn.Module):
    """
    resnet block
    """

    def __init__(self, ch_in, ch_out, stride=1):
        """
        :param ch_in:
        :param ch_out:
        """
        super(ResBlk, self).__init__()

        # 中间卷积层部分，2层，we add stride support for resbok, which is distinct from tutorials.
        self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=stride, padding=1) # 第一层，能升维就升维，stride变量
        self.bn1 = nn.BatchNorm2d(ch_out) # 和上层的通道ch_out一样
        self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1) # 这层尺寸不变
        self.bn2 = nn.BatchNorm2d(ch_out)

        # 确保跳连的输出通道是ch_out，以便与中间卷积层部分逐像素相加（通道和尺寸一样）
        self.extra = nn.Sequential()
        if ch_out != ch_in: # shortcut的ch_in和ch_out维度不匹配时
            # [b, ch_in, h, w] => [b, ch_out, h, w]
            self.extra = nn.Sequential(
                nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=stride), # 1*1卷积核来升维或降维，stride使尺寸同样变化
                nn.BatchNorm2d(ch_out)
            )

    def forward(self, x):
        """
        :param x: [b, ch, h, w]
        :return:
        """
        # 中间卷积部分
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out)) # 可不加F.relu
        # print('conv:', out.shape)
        # print('short:', self.extra(x).shape)
       
        # 跳连并element-wise add
        out = self.extra(x) + out # 需要输入ch_in与输出ch_out大小相等
        out = F.relu(out)
        # print('ResBlk:', out.shape)
        
        return out


# ResNet10，ResNet18的缩减版
class ResNet10(nn.Module):

    def __init__(self):
        super(ResNet10, self).__init__()

        # 预处理卷积层
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=3, padding=0),
            nn.BatchNorm2d(64)
        )

        # 残差块，followed 4 blocks
        # [b, 64, h, w] => [b, 128, h ,w]
        self.blk1 = ResBlk(64, 64, stride=1) # 注意当ch_in=ch_out时，stride设为1，因为此时跳连不能做缩放尺寸
        # [b, 128, h, w] => [b, 256, h, w]
        self.blk2 = ResBlk(64, 128, stride=1) # 调用ResBlk，输入通道，输出通道，
        # [b, 256, h, w] => [b, 512, h, w]
        self.blk3 = ResBlk(128, 256, stride=1)
        # [b, 512, h, w] => [b, 512, h, w]，h w是变化的
        self.blk4 = ResBlk(256, 512, stride=1) 

        self.outlayer = nn.Linear(512*1*1, 10) # 全连接层，要先池化成1*1后输入，10分类

    def forward(self, x):
        """
        :param x:
        :return:
        """
        # 预处理卷积层，[b, 3, h, w] => [b, 64, h*, w*]
        x = F.relu(self.conv1(x))

        # 4组残差块，即4*2层卷积层，[b, 64, h*, w*] => [b, 512, h`, w`]
        x = self.blk1(x)
        x = self.blk2(x)
        x = self.blk3(x)
        x = self.blk4(x)
        # print('after res conv:', x.shape) # 打印输出大小，[b, 512, 2, 2]

        # 池化层，[b, 512, h`, w`] => [b, 512, 1, 1]
        x = F.adaptive_avg_pool2d(x, [1, 1]) # 等价于在int里的类写法self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        # print('after pool:', x.shape) # [2, 512, 1, 1]
        # print('size:', x.size(0)) # 2

        # 全连接层
        x = x.view(x.size(0), -1) # 拉平c h w，x.size(0)=2
        # print('after view:', x.shape) # [2, 512]

        x = self.outlayer(x) 

        return x


def main():

    # 测试ResBlk的输出大小
    blk = ResBlk(64, 128, stride=2)
    tmp = torch.randn(2, 64, 32, 32)
    out = blk(tmp)
    print('Resblock:', out.shape) # [2, 128, 16, 16]，尺寸减少参数减少

    # 测试ResNet的输出大小
    x = torch.randn(2, 3, 32, 32) # ResNet原始版本是正式图片大小224*224，输入大小改变时网络结构也要重新设计
    model = ResNet10()
    out = model(x)
    print('resnet:', out.shape) # [2, 10]


if __name__ == '__main__':
    main()

Resblock: torch.Size([2, 128, 16, 16])
after pool: torch.Size([2, 512, 1, 1])
size: 2
after view: torch.Size([2, 512])
resnet: torch.Size([2, 10])


### main.py


In [0]:
import  torch
from    torch.utils.data import DataLoader
from    torchvision import datasets
from    torchvision import transforms
from    torch import nn, optim

# from    lenet5 import Lenet5
# from    resnet import ResNet10


def main():
    batchsz = 128

    # 加载数据集
    cifar_train = datasets.CIFAR10('cifar', True, transform=transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], # 数据归一化，R G B上的标准差
                             std=[0.229, 0.224, 0.225]) # R G B上的方标准差
    ]), download=True)
    cifar_train = DataLoader(cifar_train, batch_size=batchsz, shuffle=True)

    cifar_test = datasets.CIFAR10('cifar', False, transform=transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ]), download=True)
    cifar_test = DataLoader(cifar_test, batch_size=batchsz, shuffle=True)

    x, label = iter(cifar_train).next() # 一个batch
    print('x:', x.shape, 'label:', label.shape)
    # x:torch.Size([32,3,32,32]) label:torch.Size([32])


    # 调用构建网络实例
    device = torch.device('cuda') # GPU
    # model = Lenet5().to(device)
    model = ResNet10().to(device)
    print(model) # 打印结构（初始化类里的结构，forward里打印不出来）

    criteon = nn.CrossEntropyLoss().to(device) # 定义交叉熵损失，分类问题，包含了softmax
    optimizer = optim.Adam(model.parameters(), lr=1e-3) # 定义优化器，传入网络参数


    for epoch in range(200):

        # 训练
        model.train() # 有些模型在train和eval时计算不同，要声明什么模式，如BN、Dropout
        for batchidx, (x, label) in enumerate(cifar_train): # 迭代
            x, label = x.to(device), label.to(device) # GPU，x:[b, 3, 32, 32]，label:[b]

            logits = model(x) # 调用model的forward，传进x
            # logits:[b, 10]，label:[b]
            loss = criteon(logits, label) # 计算loss，输入是logits而不是经过softmax的predect

            # backprop
            optimizer.zero_grad() # 梯度清零，否则默认累加
            loss.backward() # 计算梯度，累加到了0上面
            optimizer.step() # 沿梯度更新

        # 这打印的是此epoch中最后一个batch的loss，loss数据仅供参考，主要看TEST ACC
        print(epoch, 'loss:', loss.item()) # loss是tensor标量，要通过item转为numpy打印出来

        # 测试
        model.eval() # eval模式
        with torch.no_grad(): # 测试时不需要backprop计算梯度，不需要构建计算图，防止打乱计算图更安全
            # test
            total_correct = 0 # 全局
            total_num = 0
            for x, label in cifar_test:
                x, label = x.to(device), label.to(device)
              
                logits = model(x) # [b, 10]
                pred = logits.argmax(dim=1) # 对logits做argmax得到预测标签，[b]

                # [b] vs [b] => scalar tensor byteTensor => float，再累加，转为numpy
                total_correct += torch.eq(pred, label).float().sum().item() # 预测与真实标签逐元素比较
                total_num += x.size(0) # batch数累加

            acc = total_correct / total_num # 测试集总的准确率
            print(epoch, 'test acc:', acc)


if __name__ == '__main__':
    main()


Files already downloaded and verified
Files already downloaded and verified
x: torch.Size([128, 3, 32, 32]) label: torch.Size([128])
ResNet10(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (blk1): ResBlk(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (extra): Sequential()
  )
  (blk2): ResBlk(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(1