# 模型介绍

**HarDNet: A Low Memory Traffic Network**

github pytorch代码: [https://github.com/PingoLH/Pytorch-HarDNet](https://github.com/PingoLH/Pytorch-HarDNet)

论文地址: [https://arxiv.org/pdf/1909.00948.pdf](https://arxiv.org/pdf/1909.00948.pdf)
- 作者假设中间特征图之间的memory traffic是推理延迟的主要因素
- 主要是优化计算量 low MACs， 显存交换memory traffic， 推理时间提升30% ~ 40%
- 从动态内存DRAM中加载权重参数比算术操作的功耗更大
- 本文着重贡献通过设计cnn, 不以牺牲精度为代价减少特征图DRAM内存交互
- 设计了个衡量指标CIO， 卷积层输入输出，大致衡量DRAM交互情况，只适用于计算密度低于某一特定比例。
- shortcuts的弊端是加长了tensor的生命周期，导致DRAM和缓存间频繁的数据交换。
- k层跟k-2n层连接，形成2的n次方波重叠，当2的n次方层处理完成后就可以清空layer 1. (2的n次方减一)
- densenet中每个block层直接反向传播梯度到之前的层，缓解降级，L和L之前的奇数层， HDB结束后其中2到L-2就会立即丢弃，内存专用减少2到3倍。
- HDB的每一层都有较宽的输入和较窄的输出，inverting the order会很大增加CIO。DW较大的MAC差异，CIO就不合适。




![](https://ai-studio-static-online.cdn.bcebos.com/f7d634542fdd42f296204a055a9be6a55f25b5b2b5c54fbd88ec5751f55df378)

![](https://ai-studio-static-online.cdn.bcebos.com/90f80bba37a44cdb976370918b0ee50394bd7576e86a4b6ca0bb9a4517945c36)



# 关于数据集ImageNet

ImageNet图像数据集始于2009年，当时李飞飞教授等在CVPR2009上发表了一篇名为《ImageNet: A Large-Scale Hierarchical Image Database》的论文，之后就是基于ImageNet数据集的7届ImageNet挑战赛(2010年开始)，2017年后，ImageNet由Kaggle(Kaggle公司是由联合创始人兼首席执行官Anthony Goldbloom 2010年在墨尔本创立的，主要是为开发商和数据科学家提供举办机器学习竞赛、托管数据库、编写和分享代码的平台)继续维护。

本AIStudio项目在线下进行的训练， 所以只使用了验证集进行验证

![](https://ai-studio-static-online.cdn.bcebos.com/1e8613aebb754b96bc799dd3c0c51278da5ab0599e264467912c9e2782821a24)



In [None]:
#数据集解压
!mkdir ~/data/ILSVRC2012
!tar -xf ~/data/data68594/ILSVRC2012_img_val.tar -C ~/data/ILSVRC2012

In [1]:
#加载数据集
import os
import shutil
import numpy as np
import paddle
from paddle.io import Dataset
from paddle.vision.datasets import DatasetFolder, ImageFolder
# from paddle.vision.transforms import Compose, Resize, Transpose, Normalize
import paddle.vision.transforms as T
train_parameters = {
    'train_image_dir': '/home/aistudio/data/ILSVRC2012',
    'eval_image_dir': '/home/aistudio/data/ILSVRC2012',
    'test_image_dir': '/home/aistudio/data/ILSVRC2012',
}

class CatDataset(Dataset):
    def __init__(self, mode='train'):
        super(CatDataset, self).__init__()
        train_image_dir = train_parameters['train_image_dir']
        eval_image_dir = train_parameters['eval_image_dir']
        test_image_dir = train_parameters['test_image_dir']

        data_transforms = T.Compose([
            T.Resize(256, interpolation='bicubic'),
            T.CenterCrop(224),
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        train_data_folder = DatasetFolder(train_image_dir, transform=data_transforms)
        eval_data_folder = DatasetFolder(eval_image_dir, transform=data_transforms)
        test_data_folder = ImageFolder(test_image_dir, transform=data_transforms)
        self.mode = mode
        if self.mode  == 'train':
            self.data = train_data_folder
        elif self.mode  == 'eval':
            self.data = eval_data_folder
        elif self.mode  == 'test':
            self.data = test_data_folder
        print(mode, len(self.data))

    def __getitem__(self, index):
        data = self.data[index][0].astype('float32')
        if self.mode  == 'test':
            return data
        else:
            label = np.array([self.data[index][1]]).astype('int64')
            return data, label

    def __len__(self):
        return len(self.data)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  def convert_to_list(value, n, name, dtype=np.int):


# 模型结构搭建

In [2]:
#构建hardet68网络
import paddle
import paddle.nn as nn
from math import ceil
from paddle.vision.models import resnet50
import pickle
import numpy as np

class ConvBNLayer(nn.Layer):
    def __init__(self, in_channels, channels, kernel=3, stride=1, pad=0, num_group=1, bias=False, act="relu6"):
        super(ConvBNLayer, self).__init__()
       
        conv_ = None
        if stride == 2:
            conv_ = nn.Conv2D(in_channels, channels, kernel, stride, [1, 0, 1, 0], groups=num_group, bias_attr=bias)
        else:
            conv_ = nn.Conv2D(in_channels, channels, kernel, stride, kernel//2, groups=num_group, bias_attr=bias)

        bn_ = nn.BatchNorm2D(channels)
        act_ = None
        if act == 'swish':
            act_ = nn.Swish()
        elif act == 'relu':
            act_ = nn.ReLU()
        elif act == 'relu6':
            act_ = nn.ReLU6()

        self.conv_bn = nn.Sequential(
                            conv_,
                            bn_
                        )

        if act_ is not None:
            self.conv_bn = nn.Sequential(
                                conv_,
                                bn_,
                                act_
                            )

    def forward(self, inputs):
        return self.conv_bn(inputs)

class HarDBlock(nn.Layer):

    #获取层连接
    def get_link(self, layer, base_ch, growth_rate, grmul):
        #检查层
        if layer == 0:
            return base_ch, 0, []
        
        #计算输出的通道数
        out_channels = growth_rate
        link = []
        for i in range(10):
            dv = 2 ** i
            if layer % dv == 0: #间隔2的n次方
                k = layer - dv
                link.append(k)
                if i > 0:
                    out_channels *= grmul

        out_channels = int(int(out_channels + 1) / 2) * 2
       

        #计算之前层的输出，也就是当前层的输出
        in_channels = 0
        for i in link:
            ch,_,_ = self.get_link(i, base_ch, growth_rate, grmul)
            in_channels += ch
        return out_channels, in_channels, link

    def get_out_ch(self):
        return self.out_channels

    def __init__(self, in_channels, growth_rate, grmul, n_layers, keepBase=False, residual_out=False, dwconv=False):
        super(HarDBlock, self).__init__()

        self.keepBase = keepBase
        self.links = []
        layers_ = []
        self.out_channels = 0 # if upsample else in_channels
        for i in range(n_layers):
            outch, inch, link = self.get_link(i+1, in_channels, growth_rate, grmul)
            self.links.append(link)
            use_relu = residual_out
            layers_.append(ConvBNLayer(inch, outch))
            if (i % 2 == 0) or (i == n_layers - 1):
                self.out_channels += outch
        self.layers = nn.LayerList(layers_)
        # print("layers: ", len(self.layers))
        
    def forward(self, x):
        layers_ = [x]
        
        for layer in range(len(self.layers)):
            
            link = self.links[layer]
            # print("HarDBlock layer: ", layer, link)
            tin = []
            for i in link:
                tin.append(layers_[i])

            if len(tin) > 1:            
                x = paddle.concat(x=tin, axis=1)
                # print("===>concat: ", x.shape)
            else:
                x = tin[0]
            # print(self.layers[layer])
            out = self.layers[layer](x)
            # print(x.shape, out.shape)
            layers_.append(out)
            
        t = len(layers_)
        out_ = []
        for i in range(t):
          if (i == 0 and self.keepBase) or (i == t-1) or (i%2 == 1):
                out_.append(layers_[i])

        out = paddle.concat(x=out_, axis=1)
        return out

class HarDNet68(nn.Layer):
    def __init__(self, cls_num=1000):
        super(HarDNet68, self).__init__()

        #模型的head
        base = []
        base.append(ConvBNLayer(3, 32, kernel=3, stride=2,  bias=False))
        base.append(ConvBNLayer(32, 64, kernel=3))
        base.append(nn.MaxPool2D(kernel_size=3, stride=2, padding=1))

        #构建HarDBlock
        ch_list = [  128, 256, 320, 640, 1024]
        gr       = [  14, 16, 20, 40,160]
        n_layers = [   8, 16, 16, 16,  4]
        downSamp = [   1,  0,  1,  1,  0]
        grmul = 1.7
        drop_rate = 0.1
        blks = len(n_layers)

        ch = 64
        for i in range(blks):

            #blk = self.add_sublayer("HarDBlock_" + str(i), HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=False))
            blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=False)

            ch = blk.get_out_ch()
            base.append(blk)
            
            
            # print("fucking...===>", ch, ch_list[i])
            base.append(ConvBNLayer(ch, ch_list[i], kernel=1))
            # print(self.base[-1])
    
            ch = ch_list[i]
            if downSamp[i] == 1:
                base.append(nn.MaxPool2D(kernel_size=2, stride=2))

        ch = ch_list[blks-1]
        base.append(nn.AdaptiveAvgPool2D(output_size=1))
        base.append(nn.Flatten())
        base.append(nn.Dropout(drop_rate))
        base.append(nn.Linear(ch, cls_num))

        self.base = nn.Sequential(*base)

    def forward(self, x):
        for i, layer in enumerate(self.base):
            x = layer(x)
        return x

#  精度对齐

因为是简单的图像分类模型，这里只做一个相同输入下的输出结果验证

**torch的最终输出:**

![](https://ai-studio-static-online.cdn.bcebos.com/f6bc4c38ba9d4d09a107793f049c3b39bdba95e066114982a29f21c8aad9ea1e)

**paddle的最终输出:**

![](https://ai-studio-static-online.cdn.bcebos.com/5c2edf3f5ced4471bbcb2040738200076eb7d7cd0b134698bf48c069d6b378d9)

# 训练模型

由于训练集特别大, AIStduio暂时还受不了, 这里只用验证集数据训练了两轮

In [3]:
# 在AIStuido里测试时加载的数据集
import cv2
from PIL import Image
transforms = T.Compose([
    T.Resize(256, interpolation='bicubic'),
    T.CenterCrop(224),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 构建数据集
class ILSVRC2012(paddle.io.Dataset):
    def __init__(self, root, label_list, transform, backend='pil'):
        self.transform = transform
        self.root = root
        self.label_list = label_list
        self.backend = backend
        self.load_datas()

    def load_datas(self):
        self.imgs = []
        self.labels = []
        with open(self.label_list, 'r') as f:
            for line in f:
                img, label = line[:-1].split(' ')
                self.imgs.append(os.path.join(self.root, img))
                self.labels.append(int(label))

    def __getitem__(self, idx):
        label = self.labels[idx]
        image = self.imgs[idx]
        if self.backend=='cv2':
            image = cv2.imread(image)
        else:
            image = Image.open(image).convert('RGB')
        image = self.transform(image)
        return image.astype('float32'), np.array(label).astype('int64')

    def __len__(self):
        return len(self.imgs)

val_dataset = ILSVRC2012('data/ILSVRC2012', transform=transforms, label_list='data/data68594/val_list.txt')

In [4]:
#保存训练结果
callback = paddle.callbacks.ModelCheckpoint(save_dir='./checkpoints', save_freq=1)

#加载模型及预训练参数
model = HarDNet68(cls_num=1000)
run_model = paddle.Model(model)

#模型训练
optim = paddle.optimizer.SGD(learning_rate=0.0001, weight_decay=6e-5, parameters=run_model.parameters())
run_model.prepare(optimizer= optim,
              loss=paddle.nn.CrossEntropyLoss(),
              metrics=paddle.metric.Accuracy())

run_model.fit(val_dataset,
          val_dataset,
          epochs=2,
          batch_size=256,
          callbacks=callback,
          verbose=1)

The loss value printed in the log is the current step, and the metric is the average value of previous step.
Epoch 1/2


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if data.dtype == np.object:
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if isinstance(slot[0], (np.ndarray, np.bool, numbers.Number)):
  return (isinstance(seq, collections.Sequence) and
  "When training, we now always track global mean and variance.")


save checkpoint at /home/aistudio/checkpoints/0
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
Eval samples: 50000
Epoch 2/2
save checkpoint at /home/aistudio/checkpoints/1
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
Eval samples: 50000
save checkpoint at /home/aistudio/checkpoints/final


# 验证模型

验证的最终效果能接近论文的精度

In [5]:
model = HarDNet68(cls_num=1000)
model_state_dict = paddle.load("/home/aistudio/work/hardnet_best.pdparams")
model.set_state_dict(model_state_dict)
run_model = paddle.Model(model)

#模型训练
optim = paddle.optimizer.SGD(learning_rate=0.0001, weight_decay=6e-5, parameters=run_model.parameters())
run_model.prepare(optimizer= optim,
              loss=paddle.nn.CrossEntropyLoss(),
              metrics=paddle.metric.Accuracy())
run_model.evaluate(val_dataset, batch_size=256, verbose=1)

Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
Eval samples: 50000


{'loss': [0.9376896], 'acc': 0.7605}

# 总结

因训练硬件资源有限，本次复现过程还有很多缺失和不足，后续持续改进。

请点击[此处](https://ai.baidu.com/docs#/AIStudio_Project_Notebook/a38e5576)查看本环境基本用法.  <br>
Please click [here ](https://ai.baidu.com/docs#/AIStudio_Project_Notebook/a38e5576) for more detailed instructions. 