论文中有两种不同的`residual block`的设计     
- 浅层网络时， 采用的是两个3x3conv
- 深层网络（>50）时，使用的是`bottle neck`,三层卷积，(1x1, 64),  (3x3, 64),  (1x1, 256)

In [0]:
import numpy as np

import torch
import torch as t
from torch import  nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torch import nn, optim

In [0]:
class ResBlk(nn.Module):
    def __init__(self, ch_in, ch_out):

        super(ResBlk, self).__init__()

        self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(ch_out)
        self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(ch_out)

        self.extra = nn.Sequential()
        if ch_out != ch_in:
            self.extra = nn.Sequential(
                nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=1),
                nn.BatchNorm2d(ch_out)
            )


    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.extra(x)

        return F.relu(out)




class ResNet18(nn.Module):

    def __init__(self):
        super(ResNet18, self).__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(16)
        )
        self.blk1 = ResBlk(16, 16)
        self.blk2 = ResBlk(16, 32)
        self.fc1 = nn.Linear(32*32*32, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.blk1(x)
        x = self.blk2(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)

        return x

In [3]:
batch_size = 32

def data_tf(x):
    x = np.array(x, dtype='float32') / 255
    x = (x - 0.5) / 0.5 # 标准化，这个技巧之后会讲到
    x = x.transpose((2, 0, 1)) # 将 channel 放到第一维，只是 pytorch 要求的输入方式
    x = t.from_numpy(x)
    return x

cifar_train = datasets.CIFAR10('cifar', train=True, transform=data_tf, download=True)
train_loader = DataLoader(cifar_train, batch_size=batch_size, shuffle=True)

cifar_test = datasets.CIFAR10('cifar', train=False, transform=data_tf, download=True)
#Compose([
#     transforms.Resize(32, 32),
#     transforms.ToTensor()
# ])
test_loader = DataLoader(cifar_test, batch_size=batch_size, shuffle=True)  

x, label = iter(train_loader).next()
print('x: ', x.shape, 'label:  ', label.shape)

0it [00:00, ?it/s]

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to cifar/cifar-10-python.tar.gz


170500096it [00:09, 17147521.39it/s]                               


Files already downloaded and verified
x:  torch.Size([32, 3, 32, 32]) label:   torch.Size([32])


In [7]:
device = t.device('cuda')
model = ResNet18().to(device) 

criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
model

ResNet18(
  (conv1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (blk1): ResBlk(
    (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (extra): Sequential()
  )
  (blk2): ResBlk(
    (conv1): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (extra): Sequential(
      (0): Conv2d(16, 32, ker

In [5]:
from train_util import train

# 使用封装好的训练函数
train(model, train_loader, test_loader, 10, optimizer, criterion)

Epoch [0]
Train loss: 1975.950994, Train acc: 0.562
Test loss: 315.332603, Test acc: 0.500
Time 00:00:14
-------------------------------------------------
Epoch [1]
Train loss: 1331.101244, Train acc: 0.500
Test loss: 289.735921, Test acc: 0.750
Time 00:00:28
-------------------------------------------------
Epoch [2]
Train loss: 1011.760998, Train acc: 0.875
Test loss: 296.277999, Test acc: 0.688
Time 00:00:42
-------------------------------------------------
Epoch [3]
Train loss: 737.742431, Train acc: 0.875
Test loss: 326.853501, Test acc: 0.688
Time 00:00:56
-------------------------------------------------
Epoch [4]
Train loss: 513.490917, Train acc: 1.000
Test loss: 376.211352, Test acc: 0.688
Time 00:01:10
-------------------------------------------------
Epoch [5]
Train loss: 357.727265, Train acc: 0.875
Test loss: 442.964479, Test acc: 0.688
Time 00:01:24
-------------------------------------------------
Epoch [6]
Train loss: 270.903974, Train acc: 0.875
Test loss: 487.335838,

In [0]:
for epoch in range(10):
    
    # train
    model.train()
    for batch_idx, (x, label) in enumerate(train_loader):
        x, label = x.to(device), label.to(device)
        out = model(x)
        loss = criterion(out, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print('epoch: ', epoch, 'loss: ', loss.item())
        
    # test
    model.eval()
    with t.no_grad():
        total_correct = 0
        total_num = 0
        
        for x, label in test_loader:
            x, label = x.to(device), label.to(device)
            out = model(x)
        
            pred = out.argmax(dim=1)
            correct = t.eq(pred, label).float().sum().item()
            
            total_correct += correct
            total_num += x.size(0)
            
        acc = total_correct / total_num
    
    print('epoch: ', epoch, 'acc: ', acc)
        