In [1]:
import torch  
from torch import nn  
from torch.nn import functional as F
import sys
sys.path.append('../') 
import utils as d2l

## 一、Dense Block

1. 定义一个卷积块，“BN-激活-卷积”的架构

In [33]:
def conv_block(input_channels, num_channels):  
    return nn.Sequential(  
        nn.BatchNorm2d(input_channels), 
        nn.ReLU(),  
        nn.Conv2d(input_channels, num_channels, kernel_size=3, padding=1))

- one conv_block will change the channels,but it will not change the w/h;

2. 定义DenseBlock:连接输入和输出

In [95]:
class DenseBlock(nn.Module):  
    def __init__(self, num_convs, input_channels, growth_rate):  
        super(DenseBlock, self).__init__()  
        layer = []  
        for i in range(num_convs):  
            layer.append(conv_block(  
                growth_rate * i + input_channels, growth_rate))  
        self.net = nn.Sequential(*layer)  
    def forward(self, X):
        for blk in self.net:  
            Y = blk(X)  
            # 连接通道维度上每个块的输入和输出  
            X = torch.cat((X, Y), dim=1)  #对将X的通道数和Y的通道数进行拼接
        return X

- num_convs: the number of conv_block; only change channels
- input_channels: the channels of input
- growth_rate: every num_convs will increase num_channels,
- out_channels= input_channels+num_convs*growth rate

In [98]:
blk = DenseBlock(2, 3, 10)  

two conv_block:
- first ->conv_block(3,10)
- second->conv_block(13,10)
the changes of channels :
- first-> Y->conv_block(x_channel,num_channels)->num_channels; X->cat(X+Y)->x_channel_new1=x_channel+1*num_channels=13
- second->Y->conv_block(x_channel_new1(x_channel+1*num_channels),num_channels)->num_channels; X->cat(X+Y)->x_channel_new2=x_channel_new1+1*num_channels=x_channel+2*channels

in this blk,it will make channels form 3->3+2*10=23,w/h will not change

In [102]:
X = torch.randn(4, 3, 8, 8)  
Y = blk(X)  
Y.shape

torch.Size([4, 23, 8, 8])

## 二、transition layer

In [134]:
#def transition_block(input_channels, num_channels):  
   # return nn.Sequential(  nn.BatchNorm2d(input_channels), nn.ReLU(),  nn.Conv2d(input_channels, num_channels, kernel_size=1),  nn.AvgPool2d(kernel_size=2, stride=2))

In [141]:
class transition_block(nn.Module):  
    def __init__(self, input_channels, num_channels):  
        super(transition_block, self).__init__()   
        self.net = nn.Sequential(nn.BatchNorm2d(input_channels), 
        nn.ReLU(),  
        nn.Conv2d(input_channels, num_channels, kernel_size=1),  
        nn.AvgPool2d(kernel_size=2, stride=2))  
    def forward(self, X):
        return self.net(X)

- 控制模型的复杂度，通过1x1的卷积来控制通道数，AvgPool2d使w和h->w/2和h/2;
- change channels and one block will make h/w->h/2 /w/2

In [144]:
blk = transition_block(23, 10)  
blk(Y).shape

torch.Size([4, 10, 4, 4])

- channels: 23->10
- w/h->w/2、h/2:(8,8)->(4,4)

## 三、DenseNet

In [158]:
# input 1x224x224
b1 = nn.Sequential(  
    nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),  #64x112x112
    nn.BatchNorm2d(64), nn.ReLU(),  
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1))#64x56x56

In [160]:
# num_channels为当前的通道数  
num_channels, growth_rate = 64, 32  
num_convs_in_dense_blocks = [4, 4, 4, 4]  #the num_convs of every dense_block
blks = []  
for i, num_convs in enumerate(num_convs_in_dense_blocks):  
    blks.append(DenseBlock(num_convs, num_channels, growth_rate))
    # #out_channels=num_channels+num_convs*growth_rate
    num_channels += num_convs * growth_rate  
    # 在稠密块之间添加一个转换层,使通道数量减半  
    if i != len(num_convs_in_dense_blocks) - 1:  
        blks.append(transition_block(num_channels, num_channels // 2))  #//整除
        num_channels = num_channels // 2

blks:four DenseBlock and three transition
- DenseBlock1：64x56x56->64+32*4x56x56
- transition_block1:192x56x56->96x28x28
- DenseBlock2：96x28x28->96+32*4x28x28
- transition_block2:224x28x28->112x14x14
- DenseBlock3：112x14x14->112+32*4x14x14
- transition_block3:240x14x14->120x7x7
- DenseBlock4：120x7x7->120+32*4x7x7=248x7x7

In [153]:
net = nn.Sequential(  
    b1, *blks,  
    nn.BatchNorm2d(num_channels), 
    nn.ReLU(),  
    nn.AdaptiveAvgPool2d((1, 1)),  
    nn.Flatten(),  
    nn.Linear(num_channels, 10))

In [162]:
X = torch.rand(size=(1, 1, 224, 224))  
for layer in net:  
    X = layer(X)  
    print(layer.__class__.__name__,'output shape:\t', X.shape)

Sequential output shape:	 torch.Size([1, 64, 56, 56])
DenseBlock output shape:	 torch.Size([1, 192, 56, 56])
transition_block output shape:	 torch.Size([1, 96, 28, 28])
DenseBlock output shape:	 torch.Size([1, 224, 28, 28])
transition_block output shape:	 torch.Size([1, 112, 14, 14])
DenseBlock output shape:	 torch.Size([1, 240, 14, 14])
transition_block output shape:	 torch.Size([1, 120, 7, 7])
DenseBlock output shape:	 torch.Size([1, 248, 7, 7])
BatchNorm2d output shape:	 torch.Size([1, 248, 7, 7])
ReLU output shape:	 torch.Size([1, 248, 7, 7])
AdaptiveAvgPool2d output shape:	 torch.Size([1, 248, 1, 1])
Flatten output shape:	 torch.Size([1, 248])
Linear output shape:	 torch.Size([1, 10])
