# PP复现RepVGG

![](https://ai-studio-static-online.cdn.bcebos.com/652da8d77afb4614be737ef7e630af14f48f766682dd46baa11c208c42b995bf)


### 让vgg再一次伟大
Hi guy！欢迎来到这里，这是对RepVGG代码的深入讲解，本notebook将逐行讲解代码，为什么要讲解呢，因为RepVGG的思想和工程意义吸引着大家，大简之道而不失性能，简单而又有效这是多么令人着迷！

详细论文细节可以去github看我编写的md文件，这里不再阐述：

如果对您有帮助，欢迎给个github star，希望你阅读快乐，每天基本都在，有问题可以及时反馈


![](https://ai-studio-static-online.cdn.bcebos.com/92e22df6fcf44e7d99555daba4ab6bba82cdba4c47024d3fb16376692fe1cc7f)


In [1]:
import paddle
import numpy as np
import paddle.nn as nn

#国际惯例，导入所需要的包
print('当前版本为:',paddle.__version__)

当前版本为: 2.0.1


In [2]:
def conv_bn(in_channels, out_channels, kernel_size, stride, padding, groups=1):
    result = paddle.nn.Sequential(
        ('conv',nn.Conv2D(in_channels=in_channels, out_channels=out_channels,kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias_attr=False)),
        ('bn',nn.BatchNorm2D(num_features=out_channels))
    )
    return result

# 构造conv+bn组合

In [3]:
#构建RepVGGBlock模块
#RepVGG除了最后的池化层和分类层之外，都是清一色RepVGGBlock堆叠，十分简单

class RepVGGBlock(nn.Layer):

    def __init__(self, in_channels, out_channels, kernel_size,
                 stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', deploy=False):
        super(RepVGGBlock, self).__init__()
        self.deploy = deploy             #deploy是推理部署的意思
        self.groups = groups             #输入的特征层分为几组，这是分组卷积概念，单卡GPU不用考虑，默认为1，分组卷积概念详见下面
        self.in_channels = in_channels   #输入通道

        assert kernel_size == 3          #断言函数，RepVGG以 3x3 卷积闻名
        assert padding == 1              #为什么这么设置呢，图像padding=1后经过 3x3 卷积之后图像大小不变
        
        padding_11 = padding - kernel_size // 2
        
        self.nonlinearity = nn.ReLU()


        if deploy:
            self.rbr_reparam = nn.Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
                                      padding=padding, dilation=dilation, groups=groups, bias_attr=True, padding_mode=padding_mode)
        #定义推理模型时，基本block就是一个conv2D
       


        else:
            self.rbr_identity = nn.BatchNorm2D(num_features=in_channels) if out_channels == in_channels and stride == 1 else None #直接连接，类似resnet残差连接，注意当输入通道和输出通道不同时候，
                                                                                                                                  #只有 1x1 和 3x3 卷积，没有identity，下面网络图自己体会

            self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups) #3x3卷积+BN
            self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=padding_11, groups=groups)          #1x1卷积+BN
            print('RepVGG Block, identity = ', self.rbr_identity)   #这句话就是判断这个block没有identity，没有的话返回None，具体看下图输出
        #定义训练模型时，基本block是identity, 1x1 conv_bn, 3x3 conv_bn组合


    def forward(self, inputs):
        if hasattr(self, 'rbr_reparam'):
            return self.nonlinearity(self.rbr_reparam(inputs))
        #推理阶段,conv2D后Relu

        if self.rbr_identity is None:
            id_out = 0
        else:
            id_out = self.rbr_identity(inputs)

        return self.nonlinearity(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)
        #训练阶段，3x3,1x1,identity相加后Relu


    def get_equivalent_kernel_bias(self):
        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)   #卷积核两个参数W和b提出来
        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
        kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)  #为啥可以提出两个参数，看论文公式
        
        return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
    #先理解_fuse_bn_tensor这个是干啥的，这模块就好理解了
    #卷积核运算本质就是W(x)+b，但是为啥identity可以提取W，b?看后面
 

    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
        if kernel1x1 is None:
            return 0
        else:
            return nn.functional.pad(kernel1x1, [1,1,1,1])
    #这代码讲的是将1x1 conv padding一圈成3x3 conv, 填充的是0
    #                     [0  0  0] 
    # [1]  >>>padding>>>  [0  1  0]
    #                     [0  0  0]   


    def _fuse_bn_tensor(self, branch):
        if branch is None:
            return 0, 0
            #当branch不是3x3, 1x1, BN，那就返回W=0,b=0

        if isinstance(branch, nn.Sequential):
            kernel = branch.conv.weight            #conv权重
            running_mean = branch.bn._mean  #BN mean
            running_var = branch.bn._variance    #BN var
            gamma = branch.bn.weight               #BN γ 
            beta = branch.bn.bias                  #BN β
            eps = branch.bn._epsilon                    #防止分母为0
            #当branch是3x3, 1x1时候，返回以上数据，为后面做融合

        else:
            assert isinstance(branch, nn.BatchNorm2D)
            if not hasattr(self, 'id_tensor'):
                input_dim = self.in_channels // self.groups                                       #通道分组，单个GPU不用考虑，详情去搜索分组卷积
                kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32)    #定义新的3x3卷积核，参数为0，这里用到DepthWise，详情去搜索MobileNetV1
                                                                                                  #这部分看后面讲解
                for i in range(self.in_channels):
                    kernel_value[i, i % input_dim, 1, 1] = 1                                      #将卷积核对角线部分赋予1
                self.id_tensor = paddle.to_tensor(kernel_value)

            kernel = self.id_tensor               #conv权重       
            running_mean = branch._mean    #BN mean
            running_var = branch._variance     #BN var
            gamma = branch.weight                 #BN γ
            beta = branch.bias                    #BN β
            eps = branch._epsilon                      #防止分母为0
            #当branch是identity，也即只有BN时候返回以上数据


        std = (running_var + eps).sqrt()
        t = (gamma / std).reshape((-1, 1, 1, 1))
        #提取W，b，不管你是3x3 1x1 identity都要提取

        return kernel * t, beta - running_mean * gamma / std
        #细心的读者发现，上述公式没有提到conv（1x1,3x3）的bias
        #这部分是精华，也是难以理解的部分，希望读者多多阅读代码，推推公式，深入理解原理


    def repvgg_convert(self):
        kernel, bias = self.get_equivalent_kernel_bias()
        return kernel.numpy(), bias.numpy()

### 疑惑部分讲解
* `return kernel * t, beta - running_mean * gamma / std`这段代码是什么意思？
   
  ![](https://ai-studio-static-online.cdn.bcebos.com/75c1178f0b7644ee8415b5006618c06b7b95d2a899a64585b309c6d5b1b04144)
   
  官方代码里面不考虑训练模型的conv的bias，所以上面去掉b
  
* `kernel_value[i, i % input_dim, 1, 1] = 1`这段代码什么意思？
  
  首先我们看分组卷积
  
  ![](https://ai-studio-static-online.cdn.bcebos.com/72247e542a2b438fbaf5d6620ec3822309ddc1df91384685bf32a46048d7f9e8)
  
  对应这段代码`kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32) `，假设不分组单个GPU训练，group=1
  我们看上图in_channels=input_dim=12,输出一个feature map需要的filter大小是（input_dim=12,3,3）
  
  但是in_channels怎么来的?我们思考一下上图左边的input如何经过3x3卷积输出identity效果，输出feature map数要和输入feature map数相同，所以filter数量必须是12，
  所以总filter（12,12,3,3），那么新的问题来了，当input的12个feature map经过一个（12,3,3）生成一个feature map，如何跟原feature map其中一个相同。答案是将filter一个
  channel设置1，其余全部为0
  
  ![](https://ai-studio-static-online.cdn.bcebos.com/7737623e74144fa5a12e0479c7c9d09d99d1d5e3f35948279d3205ae603e7133)
  
  ![](https://ai-studio-static-online.cdn.bcebos.com/01c621d136574cf6956bc43b690602ebd96e8ea54a284acea790cea980a9c9c5)
  
  如上图所示，当单个GPU时候，group=1，in_channel=input_dim=3,需要的filter大小是（in_channel=3,input_dim=3,H=3,w=3），我们发现卷积核中参数1分别在（0,0,1,1）（1,1,1,1）（2,2,1,1）刚好对应代码`kernel_value[i, i % input_dim, 1, 1] = 1`，如下图
  
  ![](https://ai-studio-static-online.cdn.bcebos.com/290c4795e7704cf8b7e812cbc3423b6db90e7f831906402aaa56a41247321e8e)
  
  要是还没看懂，评论区问，尽力解答
  
  论文原图如下，详细说明请看原论文或者上面的论文精读
  
  ![](https://ai-studio-static-online.cdn.bcebos.com/25ee65a3a1ab4f599a6ec73303bf12f34c0751251eea48d9a1f154e8ba5f662b)
  
* `self.rbr_identity = nn.BatchNorm2D(num_features=in_channels) if out_channels == in_channels and stride == 1 else None`

  这是in_channel与out_channel不同时候的block，可以看出没有恒等连接

  ![](https://ai-studio-static-online.cdn.bcebos.com/e896c3883ef64b6788bd1106fa9a5249234c03ee29834959b54f64dd05523d81)

  这是in_channel与out_channel相同时候的block，可以看出有恒等连接

  ![](https://ai-studio-static-online.cdn.bcebos.com/1148d68a9b114cd9885c813cd74dc638dc9e132b0aa647bf9fac9bc73a239af9)

  当block负责对图像进行下采样，即stride=2时候，空间维度缩小，特征层增加，以此提取高层语义特征，这时候的block没有identity连接


In [4]:
class RepVGG(nn.Layer):

    def __init__(self, num_blocks, num_classes=1000, width_multiplier=None, override_groups_map=None, deploy=False):
        super(RepVGG, self).__init__()

        assert len(width_multiplier) == 4 #江湖人称瘦身因子，减小网络的宽度，就是输出通道乘以权重变小还是变大

        self.deploy = deploy
        self.override_groups_map = override_groups_map or dict() #这部分是分组卷积，单个GPU不用考虑

        assert 0 not in self.override_groups_map

        self.in_planes = min(64, int(64 * width_multiplier[0]))

        self.stage0 = RepVGGBlock(in_channels=3, out_channels=self.in_planes, kernel_size=3, stride=2, padding=1, deploy=self.deploy)
        self.cur_layer_idx = 1 #分组卷积
        self.stage1 = self._make_stage(int(64 * width_multiplier[0]), num_blocks[0], stride=2)
        self.stage2 = self._make_stage(int(128 * width_multiplier[1]), num_blocks[1], stride=2)
        self.stage3 = self._make_stage(int(256 * width_multiplier[2]), num_blocks[2], stride=2)
        self.stage4 = self._make_stage(int(512 * width_multiplier[3]), num_blocks[3], stride=2)
        self.gap = nn.AdaptiveAvgPool2D(output_size=1)#全局池化，变成 Nx1x1（CxHxW） 类似flatten
        self.linear = nn.Linear(int(512 * width_multiplier[3]), num_classes)
        

    def _make_stage(self, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        blocks = []
        for stride in strides:
            cur_groups = self.override_groups_map.get(self.cur_layer_idx, 1)#分组卷积
            blocks.append(RepVGGBlock(in_channels=self.in_planes, out_channels=planes, kernel_size=3,
                                      stride=stride, padding=1, groups=cur_groups, deploy=self.deploy))
            self.in_planes = planes
            self.cur_layer_idx += 1
        return nn.Sequential(*blocks)

    def forward(self, x):
        out = self.stage0(x)
        out = self.stage1(out)
        out = self.stage2(out)
        out = self.stage3(out)
        out = self.stage4(out)
        out = self.gap(out)
        out = paddle.flatten(out,start_axis=1)
        out = self.linear(out)
        return out

![](https://ai-studio-static-online.cdn.bcebos.com/a9431b0ef4d347e2b888a96213d6aea1b6797c768e544e0690aa9ffa2e06d602)


In [5]:
optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]
g2_map = {l: 2 for l in optional_groupwise_layers}
g4_map = {l: 4 for l in optional_groupwise_layers}

def create_RepVGG_A0(deploy=False,num_classes=10):
    return RepVGG(num_blocks=[2, 4, 14, 1], num_classes=num_classes,
                  width_multiplier=[0.75, 0.75, 0.75, 2.5], override_groups_map=None, deploy=deploy)

def create_RepVGG_A1(deploy=False,num_classes=10):
    return RepVGG(num_blocks=[2, 4, 14, 1], num_classes=num_classes,
                  width_multiplier=[1, 1, 1, 2.5], override_groups_map=None, deploy=deploy)

def create_RepVGG_A2(deploy=False,num_classes=10):
    return RepVGG(num_blocks=[2, 4, 14, 1], num_classes=num_classes,
                  width_multiplier=[1.5, 1.5, 1.5, 2.75], override_groups_map=None, deploy=deploy)


def create_RepVGG_B0(deploy=False,num_classes=10):
    return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=num_classes,
                  width_multiplier=[1, 1, 1, 2.5], override_groups_map=None, deploy=deploy)

def create_RepVGG_B1(deploy=False,num_classes=10):
    return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=num_classes,
                  width_multiplier=[2, 2, 2, 4], override_groups_map=None, deploy=deploy)

def create_RepVGG_B1g2(deploy=False,num_classes=10):
    return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=num_classes,
                  width_multiplier=[2, 2, 2, 4], override_groups_map=g2_map, deploy=deploy)

def create_RepVGG_B1g4(deploy=False,num_classes=10):
    return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=num_classes,
                  width_multiplier=[2, 2, 2, 4], override_groups_map=g4_map, deploy=deploy)

def create_RepVGG_B2(deploy=False,num_classes=10):
    return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=num_classes,
                  width_multiplier=[2.5, 2.5, 2.5, 5], override_groups_map=None, deploy=deploy)

def create_RepVGG_B2g2(deploy=False,num_classes=10):
    return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=num_classes,
                  width_multiplier=[2.5, 2.5, 2.5, 5], override_groups_map=g2_map, deploy=deploy)

def create_RepVGG_B2g4(deploy=False,num_classes=10):
    return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=num_classes,
                  width_multiplier=[2.5, 2.5, 2.5, 5], override_groups_map=g4_map, deploy=deploy)


def create_RepVGG_B3(deploy=False,num_classes=10):
    return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=num_classes,
                  width_multiplier=[3, 3, 3, 5], override_groups_map=None, deploy=deploy)

def create_RepVGG_B3g2(deploy=False,num_classes=10):
    return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=1000,
                  width_multiplier=[3, 3, 3, 5], override_groups_map=g2_map, deploy=deploy)

def create_RepVGG_B3g4(deploy=False,num_classes=10):
    return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=num_classes,
                  width_multiplier=[3, 3, 3, 5], override_groups_map=g4_map, deploy=deploy)


![](https://ai-studio-static-online.cdn.bcebos.com/230ad30833984dbdbd518e0b95d4c89f0084152a08d04f84809c6bc7cfaca69e)


In [6]:
repvgg_a0=create_RepVGG_A0(num_classes=10)

RepVGG Block, identity =  None
RepVGG Block, identity =  None
RepVGG Block, identity =  BatchNorm2D(num_features=48, momentum=0.9, epsilon=1e-05)
RepVGG Block, identity =  None
RepVGG Block, identity =  BatchNorm2D(num_features=96, momentum=0.9, epsilon=1e-05)
RepVGG Block, identity =  BatchNorm2D(num_features=96, momentum=0.9, epsilon=1e-05)
RepVGG Block, identity =  BatchNorm2D(num_features=96, momentum=0.9, epsilon=1e-05)
RepVGG Block, identity =  None
RepVGG Block, identity =  BatchNorm2D(num_features=192, momentum=0.9, epsilon=1e-05)
RepVGG Block, identity =  BatchNorm2D(num_features=192, momentum=0.9, epsilon=1e-05)
RepVGG Block, identity =  BatchNorm2D(num_features=192, momentum=0.9, epsilon=1e-05)
RepVGG Block, identity =  BatchNorm2D(num_features=192, momentum=0.9, epsilon=1e-05)
RepVGG Block, identity =  BatchNorm2D(num_features=192, momentum=0.9, epsilon=1e-05)
RepVGG Block, identity =  BatchNorm2D(num_features=192, momentum=0.9, epsilon=1e-05)
RepVGG Block, identity =  Batc

### repvgg_a0 可视化
![](https://ai-studio-static-online.cdn.bcebos.com/c7603f5214b4449e9f97a62db1f7d0c952e6f2124a7c44bda84e896e0c24c339)


In [7]:
import paddle.vision.transforms as T
from paddle.vision.datasets import Cifar10

#数据准备
transform = T.Compose([
    T.Resize(size=(224,224)),
    T.Normalize(mean=[127.5, 127.5, 127.5],std=[127.5, 127.5, 127.5],data_format='HWC'),
    T.ToTensor()
])

train_dataset = Cifar10(mode='train', transform=transform)
val_dataset = Cifar10(mode='test',  transform=transform)


Cache file /home/aistudio/.cache/paddle/dataset/cifar/cifar-10-python.tar.gz not found, downloading https://dataset.bj.bcebos.com/cifar/cifar-10-python.tar.gz 
Begin to download

Download finished


### 高层API训练
高层API灵活度不够，暂时没有想到办法通过高层API将train model转化成deploy model，不过大家可以训练一下看看acc如何，作为参考

In [8]:
# 高层API
model = paddle.Model(repvgg_a0)
model.summary((1,3,224,224))

-------------------------------------------------------------------------------
   Layer (type)         Input Shape          Output Shape         Param #    
     Conv2D-1        [[1, 3, 224, 224]]   [1, 48, 112, 112]        1,296     
   BatchNorm2D-1    [[1, 48, 112, 112]]   [1, 48, 112, 112]         192      
     Conv2D-2        [[1, 3, 224, 224]]   [1, 48, 112, 112]         144      
   BatchNorm2D-2    [[1, 48, 112, 112]]   [1, 48, 112, 112]         192      
      ReLU-1        [[1, 48, 112, 112]]   [1, 48, 112, 112]          0       
   RepVGGBlock-1     [[1, 3, 224, 224]]   [1, 48, 112, 112]          0       
     Conv2D-3       [[1, 48, 112, 112]]    [1, 48, 56, 56]        20,736     
   BatchNorm2D-3     [[1, 48, 56, 56]]     [1, 48, 56, 56]          192      
     Conv2D-4       [[1, 48, 112, 112]]    [1, 48, 56, 56]         2,304     
   BatchNorm2D-4     [[1, 48, 56, 56]]     [1, 48, 56, 56]          192      
      ReLU-2         [[1, 48, 56, 56]]     [1, 48, 56, 56]    

{'total_params': 7864426, 'trainable_params': 7817130}

In [None]:
#开始训练，也可以不训练，不影响后面运行，建议跳过这一部分
model.prepare(optimizer=paddle.optimizer.Adam(learning_rate=0.001,parameters=model.parameters()),
              loss=paddle.nn.CrossEntropyLoss(),
              metrics=paddle.metric.Accuracy())

vdl_callback = paddle.callbacks.VisualDL(log_dir='log') #训练可视化

model.fit(
    train_data=train_dataset, 
    eval_data=val_dataset, 
    batch_size=64, 
    epochs=10, 
    save_dir='save_models', 
    verbose=1, 
    callbacks=vdl_callback #训练可视化
)

### 基础API训练

In [11]:
train_batch = paddle.io.DataLoader(train_dataset, batch_size=128, shuffle=True, drop_last=True)
val_batch = paddle.io.DataLoader(val_dataset, batch_size=128, shuffle=True , drop_last=True)

for i in train_batch:
    print('迭代器第一轮批次：')
    print('图片数据',i[0].shape)
    print('标签数据',i[1].shape)
    break

print('')
print('标签数据需要利用paddle.unsqueeze()变成[128,1]')

迭代器第一轮批次：
图片数据 [128, 3, 224, 224]
标签数据 [128]

标签数据需要利用paddle.unsqueeze()变成[128,1]


In [12]:
def fit(model,train_batch,val_batch,epoch):

    #参数optimizer设置优化器，参数loss损失函数
    opt = paddle.optimizer.Adam(learning_rate=0.001,parameters=model.parameters())
    #参数loss损失函数
    loss_fn = paddle.nn.CrossEntropyLoss()


    for epoch_id in range(epoch):

        model.train()

        for batch_id,batch_data in enumerate(train_batch):

            input_batch = batch_data[0]
            label_batch = paddle.unsqueeze(batch_data[1],axis=1)#标签维度变化

            predict = model(input_batch)

            loss = loss_fn(predict, label_batch)
            acc = paddle.metric.accuracy(predict, label_batch)

            #反向传播
            loss.backward()   
            #更新参数
            opt.step()
            #梯度清零
            opt.clear_grad()

            if batch_id % 100 == 0:
                print("epoch: {}, batch_id: {}, loss is: {}, acc is: {}".format(epoch_id, batch_id, loss.numpy(), acc.numpy()))
        
        model.eval()

        for batch_id,batch_data in enumerate(val_batch):

            img_batch = batch_data[0]
            label_batch = paddle.unsqueeze(batch_data[1],axis=1)

            predict = model(img_batch)

            loss = loss_fn(predict, label_batch)
            acc = paddle.metric.accuracy(predict, label_batch)

            if batch_id % 20 == 0:
                print("batch_id: {}, loss is: {}, acc is: {}".format(batch_id, loss.numpy(), acc.numpy()))

fit(repvgg_a0, train_batch, val_batch, epoch=10)

  "When training, we now always track global mean and variance.")


epoch: 0, batch_id: 0, loss is: [2.377829], acc is: [0.1015625]
epoch: 0, batch_id: 100, loss is: [1.6956916], acc is: [0.34375]
epoch: 0, batch_id: 200, loss is: [1.2747836], acc is: [0.515625]
epoch: 0, batch_id: 300, loss is: [1.0669378], acc is: [0.609375]
batch_id: 0, loss is: [1.1263782], acc is: [0.609375]
epoch: 1, batch_id: 0, loss is: [1.0189099], acc is: [0.5859375]
epoch: 1, batch_id: 100, loss is: [0.98348755], acc is: [0.6640625]
epoch: 1, batch_id: 200, loss is: [0.8737333], acc is: [0.6640625]
epoch: 1, batch_id: 300, loss is: [0.9537698], acc is: [0.6796875]
batch_id: 0, loss is: [0.6566773], acc is: [0.75]
epoch: 2, batch_id: 0, loss is: [0.65497804], acc is: [0.8046875]
epoch: 2, batch_id: 100, loss is: [0.501753], acc is: [0.8359375]
epoch: 2, batch_id: 200, loss is: [0.6873904], acc is: [0.765625]
epoch: 2, batch_id: 300, loss is: [0.7049354], acc is: [0.765625]
batch_id: 0, loss is: [0.7807969], acc is: [0.734375]
epoch: 3, batch_id: 0, loss is: [0.54755455], acc 

In [14]:
#模型转换
def repvgg_model_convert(model, build_func):
    converted_weights = {}#将训练模型各层权重bias存入字典
    for name, module in model.named_sublayers():
        if hasattr(module, 'repvgg_convert'):
            kernel, bias = module.repvgg_convert()
            converted_weights[name + '.rbr_reparam.weight'] = kernel
            converted_weights[name + '.rbr_reparam.bias'] = bias
        elif isinstance(module, nn.Linear):
            converted_weights[name + '.weight'] = module.weight.numpy()
            converted_weights[name + '.bias'] = module.bias.numpy()

    deploy_model = build_func
    for name, param in deploy_model.named_parameters():
        print('deploy param: ', name, np.mean(converted_weights[name]))
        param.data = paddle.to_tensor(converted_weights[name])

    return deploy_model

In [15]:
deploy_model = repvgg_model_convert(repvgg_a0, create_RepVGG_A0(deploy=True,num_classes=10))
#输出每一block参数

deploy param:  stage0.rbr_reparam.weight -0.012085067
deploy param:  stage0.rbr_reparam.bias 0.045714345
deploy param:  stage1.0.rbr_reparam.weight -4.6854173e-05
deploy param:  stage1.0.rbr_reparam.bias -0.020605326
deploy param:  stage1.1.rbr_reparam.weight 0.0008855257
deploy param:  stage1.1.rbr_reparam.bias -0.12333813
deploy param:  stage2.0.rbr_reparam.weight -0.00063567486
deploy param:  stage2.0.rbr_reparam.bias 0.23614872
deploy param:  stage2.1.rbr_reparam.weight -0.00056437775
deploy param:  stage2.1.rbr_reparam.bias 0.24321677
deploy param:  stage2.2.rbr_reparam.weight -0.00067081465
deploy param:  stage2.2.rbr_reparam.bias 0.2832947
deploy param:  stage2.3.rbr_reparam.weight -0.0004131663
deploy param:  stage2.3.rbr_reparam.bias 0.04254788
deploy param:  stage3.0.rbr_reparam.weight -0.0011545079
deploy param:  stage3.0.rbr_reparam.bias 0.5689105
deploy param:  stage3.1.rbr_reparam.weight -0.00042346717
deploy param:  stage3.1.rbr_reparam.bias 0.020992994
deploy param:  st

In [18]:
# 模型推理
deploy_model.eval()

loss_fn = paddle.nn.CrossEntropyLoss()

for batch_id,batch_data in enumerate(val_batch):

    img_batch = batch_data[0]
    label_batch = paddle.unsqueeze(batch_data[1],axis=1)

    predict = repvgg_a0(img_batch)

    loss = loss_fn(predict, label_batch)
    acc = paddle.metric.accuracy(predict, label_batch)

    if batch_id % 20 == 0:
        print("batch_id: {}, loss is: {}, acc is: {}".format(batch_id, loss.numpy(), acc.numpy()))

#和上面训练模型比一下acc

batch_id: 0, loss is: [0.66172945], acc is: [0.765625]
batch_id: 20, loss is: [1.0397379], acc is: [0.7890625]
batch_id: 40, loss is: [0.89671266], acc is: [0.7890625]
batch_id: 60, loss is: [0.49206328], acc is: [0.84375]


In [19]:
#查看模型
#print(deploy_model)

#高阶封装查看
deploy_model_hapi=paddle.Model(deploy_model)
deploy_model_hapi.summary((1,3,224,224))


-------------------------------------------------------------------------------
   Layer (type)         Input Shape          Output Shape         Param #    
     Conv2D-45       [[1, 3, 224, 224]]   [1, 48, 112, 112]        1,344     
      ReLU-23       [[1, 48, 112, 112]]   [1, 48, 112, 112]          0       
  RepVGGBlock-23     [[1, 3, 224, 224]]   [1, 48, 112, 112]          0       
     Conv2D-46      [[1, 48, 112, 112]]    [1, 48, 56, 56]        20,784     
      ReLU-24        [[1, 48, 56, 56]]     [1, 48, 56, 56]           0       
  RepVGGBlock-24    [[1, 48, 112, 112]]    [1, 48, 56, 56]           0       
     Conv2D-47       [[1, 48, 56, 56]]     [1, 48, 56, 56]        20,784     
      ReLU-25        [[1, 48, 56, 56]]     [1, 48, 56, 56]           0       
  RepVGGBlock-25     [[1, 48, 56, 56]]     [1, 48, 56, 56]           0       
     Conv2D-48       [[1, 48, 56, 56]]     [1, 96, 28, 28]        41,568     
      ReLU-26        [[1, 96, 28, 28]]     [1, 96, 28, 28]    

{'total_params': 7041194, 'trainable_params': 7041194}

和上面训练模型（高阶封装）对比一下大小，这个是不是很小呀，才68m，一路的3x3卷积是不是特别容易部署

![](https://ai-studio-static-online.cdn.bcebos.com/31ccb2be06434018b146705b1b0b7b46f2f7876d8dac42b78f394088a32b5da9)

现有的计算库比如CUDA对3x3运算支持有很大的优势，上图可以看出其计算密度（FLOPs/推理时间）达到了38.10!

作者在结尾补充，在低端cpu设备，mobilenetv3还是有优势，但是在低端gpu设备下，repvgg优势还是很明显


### deploy_model 可视化
![](https://ai-studio-static-online.cdn.bcebos.com/fbc60174ae374f8c9967b897050a00ca072d870a1bd44c629a9ecaa0b24d058a)
