In [2]:
import torch.nn as nn
from config import train_config as config
from CRNN_Dataset import SVHC_Dataset, SVHC_collate_fn
from torchsummary import summary
import torch

In [3]:
class CRNN(nn.Module):

    def __init__(self, img_channel, img_height, img_width, num_class,
                 map_to_seq_hidden=64, rnn_hidden=256, leaky_relu=False):
        super(CRNN, self).__init__()
        '''
           img_channel:如果是灰度图像则为1
           img_height：32
           img_width：100
           num_class: 要实现多少个字符，这里是26个
           map_to_seq_hidden: CNN 向 RNN的过度
           rnn——hidden： rnn的隐藏层
        '''
        self.cnn, (output_channel, output_height, output_width) = \
            self._cnn_backbone(img_channel, img_height, img_width, leaky_relu)

        self.map_to_seq = nn.Linear(output_channel * output_height, map_to_seq_hidden)

        self.rnn1 = nn.LSTM(map_to_seq_hidden, rnn_hidden, bidirectional=True)

        # 如果接双向lstm输出，则要 *2,固定用法
        self.rnn2 = nn.LSTM(2 * rnn_hidden, rnn_hidden, bidirectional=True)

        self.dense = nn.Linear(2 * rnn_hidden, num_class)

    # CNN主干网络
    def _cnn_backbone(self, img_channel, img_height, img_width, leaky_relu):
        assert img_height % 16 == 0
        assert img_width % 4 == 0

        # 超参设置
        channels = [img_channel, 64, 128, 256, 256, 512, 512, 512]
        kernel_sizes = [3, 3, 3, 3, 3, 3, 2]
        strides = [1, 1, 1, 1, 1, 1, 1]
        paddings = [1, 1, 1, 1, 1, 1, 0]

        cnn = nn.Sequential()

        def conv_relu(i, batch_norm=False):
            # shape of input: (batch, input_channel, height, width)
            input_channel = channels[i]
            output_channel = channels[i+1]

            cnn.add_module(
                f'conv{i}',
                nn.Conv2d(input_channel, output_channel, kernel_sizes[i], strides[i], paddings[i])
            )

            if batch_norm:
                cnn.add_module(f'batchnorm{i}', nn.BatchNorm2d(output_channel))

            relu = nn.LeakyReLU(0.2, inplace=True) if leaky_relu else nn.ReLU(inplace=True)
            cnn.add_module(f'relu{i}', relu)

        # size of image: (channel, height, width) = (img_channel, img_height, img_width)
        conv_relu(0)
        cnn.add_module('pooling0', nn.MaxPool2d(kernel_size=2, stride=2))
        # (64, img_height // 2, img_width // 2)

        conv_relu(1)
        cnn.add_module('pooling1', nn.MaxPool2d(kernel_size=2, stride=2))
        # (128, img_height // 4, img_width // 4)

        conv_relu(2)
        conv_relu(3)
        cnn.add_module(
            'pooling2',
            nn.MaxPool2d(kernel_size=(2, 1))
        )  # (256, img_height // 8, img_width // 4)

        conv_relu(4, batch_norm=True)
        conv_relu(5, batch_norm=True)
        cnn.add_module(
            'pooling3',
            nn.MaxPool2d(kernel_size=(2, 1))
        )  # (512, img_height // 16, img_width // 4)

        conv_relu(6)  # (512, img_height // 16 - 1, img_width // 4 - 1)

        output_channel, output_height, output_width = \
            channels[-1], img_height // 16 - 1, img_width // 4 - 1
        return cnn, (output_channel, output_height, output_width)

    # CNN+LSTM前向计算
    def forward(self, images):
        # shape of images: (batch, channel, height, width)

        conv = self.cnn(images)
        batch, channel, height, width = conv.size()

        conv = conv.view(batch, channel * height, width)
        conv = conv.permute(2, 0, 1)  # (width, batch, feature)

        # 卷积接全连接。全连接输入形状为(width, batch, channel*height)，
        # 输出形状为(width, batch, hidden_layer)，分别对应时序长度，batch，特征数，符合LSTM输入要求
        seq = self.map_to_seq(conv)

        recurrent, _ = self.rnn1(seq)
        recurrent, _ = self.rnn2(recurrent)

        output = self.dense(recurrent)
        return output  # shape: (seq_len, batch, num_class)


In [6]:
img_width = config['img_width']
img_height = config['img_height']

num_class = len(SVHC_Dataset.seq_to_char) + 1 # 加1是因为 L' = L +{blank} 加一个空白

In [11]:
img_width,img_height,num_class

(32, 32, 11)

In [8]:
crnn = CRNN(1, img_height, img_width, num_class,
            map_to_seq_hidden=config['map_to_seq_hidden'],
            rnn_hidden=config['rnn_hidden'],
            leaky_relu=config['leaky_relu'])

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device: {device}')

device: cuda


In [10]:
crnn.to(device)

CRNN(
  (cnn): Sequential(
    (conv0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu0): ReLU(inplace=True)
    (pooling0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu1): ReLU(inplace=True)
    (pooling1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu2): ReLU(inplace=True)
    (conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu3): ReLU(inplace=True)
    (pooling2): MaxPool2d(kernel_size=(2, 1), stride=(2, 1), padding=0, dilation=1, ceil_mode=False)
    (conv4): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batchnorm4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu4): ReLU(inplace=True)
    (conv5): Conv2d(512, 512, 

In [12]:
from tensorboardX import SummaryWriter

In [22]:
writer = SummaryWriter('F:/log') #建立一个保存数据用的东西

In [23]:
dummy_input = torch.rand(20, 1, 32, 32)
dummy_input = dummy_input.to(device)

writer.add_graph(crnn, (dummy_input),True)
writer.close()

graph(%self.1 : __torch__.___torch_mangle_335.CRNN,
      %input.1 : Float(20, 1, 32, 32)):
  %536 : __torch__.torch.nn.modules.linear.___torch_mangle_334.Linear = prim::GetAttr[name="dense"](%self.1)
  %533 : __torch__.torch.nn.modules.rnn.___torch_mangle_333.LSTM = prim::GetAttr[name="rnn2"](%self.1)
  %524 : __torch__.torch.nn.modules.rnn.___torch_mangle_332.LSTM = prim::GetAttr[name="rnn1"](%self.1)
  %515 : __torch__.torch.nn.modules.linear.___torch_mangle_331.Linear = prim::GetAttr[name="map_to_seq"](%self.1)
  %512 : __torch__.torch.nn.modules.container.___torch_mangle_330.Sequential = prim::GetAttr[name="cnn"](%self.1)
  %567 : __torch__.torch.nn.modules.activation.___torch_mangle_329.ReLU = prim::GetAttr[name="relu6"](%512)
  %568 : __torch__.torch.nn.modules.conv.___torch_mangle_328.Conv2d = prim::GetAttr[name="conv6"](%512)
  %569 : __torch__.torch.nn.modules.pooling.___torch_mangle_327.MaxPool2d = prim::GetAttr[name="pooling3"](%512)
  %570 : __torch__.torch.nn.modules.acti




In [None]:
stride with padding: (n + 2p - f)/s + 1
conv0 (32+2-3)/1+1 = 32 (32,32)
conv1