# 模型介绍

**Deep Layer Aggregation**

github pytorch代码: [https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/dla.py](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/dla.py)

论文地址: [https://arxiv.org/pdf/1707.06484.pdf](https://arxiv.org/pdf/1707.06484.pdf)
- 作者探讨了深度网络的跨层信息融合， skip connection一定成都也浅化了自身网络。
- 更多非线性，较大网络容量和更大的感受野一方面提升精度，但是对优化和计算比较麻烦。
- 作者提出的想法是结合FPN和densenet， 从垂直到迭代短连接，深化表示层和分辨率。
- 设计两种层聚合，包括迭代深度聚合(IDA)以及层级聚合(HDA)。IDA主要是融合分辨率和尺度(空间信息where)， HDA主要是融合各个模块和通道的特征(语义信息what)， 最终提升了性能，参数量和内存使用。
- 尝试的不同聚合方式，b. 只是跟最后一层进行融合，通常用于语义分割和目标检测，c. 中间层迭代短连接，循环使用最浅的网络层， d.属性结构跨不同深度，e和f进一步优化d，将中间聚合结果导回到原来的网络结构，在同一深度连续融合提升效果。本文主要基于c和f进行识别和分解。
- HDA一定程度上解决了梯度消失和爆炸的问题， 残差链接只有在4层以上的网络有用。

![](https://ai-studio-static-online.cdn.bcebos.com/0e135a135ec144feb2aed45a59ee89aa4d9ee334fb8244d79553233e96477bb4)


# 关于数据集ImageNet

ImageNet图像数据集始于2009年，当时李飞飞教授等在CVPR2009上发表了一篇名为《ImageNet: A Large-Scale Hierarchical Image Database》的论文，之后就是基于ImageNet数据集的7届ImageNet挑战赛(2010年开始)，2017年后，ImageNet由Kaggle(Kaggle公司是由联合创始人兼首席执行官Anthony Goldbloom 2010年在墨尔本创立的，主要是为开发商和数据科学家提供举办机器学习竞赛、托管数据库、编写和分享代码的平台)继续维护。

本AIStudio项目在线下进行的训练， 所以只使用了验证集进行验证

![](https://ai-studio-static-online.cdn.bcebos.com/1e8613aebb754b96bc799dd3c0c51278da5ab0599e264467912c9e2782821a24)


In [None]:
#数据集解压
!mkdir ~/data/ILSVRC2012
!tar -xf ~/data/data68594/ILSVRC2012_img_val.tar -C ~/data/ILSVRC2012

In [None]:
#加载数据集
import os
import shutil
import numpy as np
import paddle
from paddle.io import Dataset
from paddle.vision.datasets import DatasetFolder, ImageFolder
# from paddle.vision.transforms import Compose, Resize, Transpose, Normalize
import paddle.vision.transforms as T
train_parameters = {
    'train_image_dir': '/home/aistudio/data/ILSVRC2012',
    'eval_image_dir': '/home/aistudio/data/ILSVRC2012',
    'test_image_dir': '/home/aistudio/data/ILSVRC2012',
}

class CatDataset(Dataset):
    def __init__(self, mode='train'):
        super(CatDataset, self).__init__()
        train_image_dir = train_parameters['train_image_dir']
        eval_image_dir = train_parameters['eval_image_dir']
        test_image_dir = train_parameters['test_image_dir']

        data_transforms = T.Compose([
            T.Resize(256, interpolation='bicubic'),
            T.CenterCrop(224),
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        train_data_folder = DatasetFolder(train_image_dir, transform=data_transforms)
        eval_data_folder = DatasetFolder(eval_image_dir, transform=data_transforms)
        test_data_folder = ImageFolder(test_image_dir, transform=data_transforms)
        self.mode = mode
        if self.mode  == 'train':
            self.data = train_data_folder
        elif self.mode  == 'eval':
            self.data = eval_data_folder
        elif self.mode  == 'test':
            self.data = test_data_folder
        print(mode, len(self.data))

    def __getitem__(self, index):
        data = self.data[index][0].astype('float32')
        if self.mode  == 'test':
            return data
        else:
            label = np.array([self.data[index][1]]).astype('int64')
            return data, label

    def __len__(self):
        return len(self.data)

# 模型结构搭建

In [None]:
import paddle.vision.transforms as T
from PIL import Image
import paddle.optimizer as opt
import paddle.distributed as dist
import paddle

import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0,1,2,3"
paddle.set_device('gpu:0')


import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from math import ceil
import pickle
import numpy as np
import math

#DLA瓶颈模块
class DlaBottleneck(nn.Layer):
    """DLA/DLA-X Bottleneck"""
    expansion = 2

    def __init__(self, inplanes, outplanes, stride=1, dilation=1, cardinality=1, base_width=64):
        super(DlaBottleneck, self).__init__()

        self.stride = stride
        mid_planes = int(math.floor(outplanes * (base_width / 64)) * cardinality)
        mid_planes = mid_planes // self.expansion

        self.conv1 = nn.Conv2D(inplanes, mid_planes, kernel_size=1, bias_attr=False)
        self.bn1 = nn.BatchNorm2D(mid_planes)

        self.conv2 = nn.Conv2D(
            mid_planes, mid_planes, kernel_size=3, stride=stride, padding=dilation,
            bias_attr=False, dilation=dilation, groups=cardinality) #tocheck

        if stride == 2:
            self.conv2 = nn.Conv2D(
                mid_planes, mid_planes, kernel_size=3, stride=stride, padding=[1,0,1,0],
                bias_attr=False, dilation=dilation, groups=cardinality)

        self.bn2 = nn.BatchNorm2D(mid_planes)

        self.conv3 = nn.Conv2D(mid_planes, outplanes, kernel_size=1, bias_attr=False)
        self.bn3 = nn.BatchNorm2D(outplanes)
        self.relu = nn.ReLU()
        self.outplanes = outplanes

    def forward(self, x, residual=None):
        if residual is None:
            residual = x

        out = self.conv1(x)  #断点测试OK
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out) #确实存在明显差异， 断点测试是对齐，但是整齐跑就会有问题

        out = self.conv3(out)#断点测试OK
        out = self.bn3(out)
        out += residual
        out = self.relu(out)
        return out


#没找到对应接口，手写一个
class Identity(nn.Layer):
    def __init__(self):
        super(Identity, self).__init__()
    def forward(self, x):
        return x

class DlaRoot(nn.Layer):
    def __init__(self, in_channels, out_channels, kernel_size, residual):
        super(DlaRoot, self).__init__()
        self.conv = nn.Conv2D(
            in_channels, out_channels, 1, stride=1, bias_attr=False, padding=(kernel_size - 1) // 2)
        self.bn = nn.BatchNorm2D(out_channels)
        self.relu = nn.ReLU()
        self.residual = residual

    def forward(self, *x):
        children = x
        x = self.conv(paddle.concat(x, axis=1))
        x = self.bn(x)
        if self.residual:
            x += children[0]
        x = self.relu(x)

        return x

#树状连接
class DlaTree(nn.Layer):
    def __init__(self, levels, block, in_channels, out_channels, stride=1, dilation=1, cardinality=1, base_width=64,
                 level_root=False, root_dim=0, root_kernel_size=1, root_residual=False):
        super(DlaTree, self).__init__()
        if root_dim == 0:
            root_dim = 2 * out_channels
        if level_root:
            root_dim += in_channels

        self.downsample = nn.MaxPool2D(stride, stride=stride) if stride > 1 else Identity()
        self.project = Identity()

        cargs = dict(dilation=dilation, cardinality=cardinality, base_width=base_width)
        if levels == 1:
            self.tree1 = block(in_channels, out_channels, stride, **cargs)
            self.tree2 = block(out_channels, out_channels, 1, **cargs)
            if in_channels != out_channels:
                self.project = nn.Sequential(
                    nn.Conv2D(in_channels, out_channels, kernel_size=1, stride=1, bias_attr=False),
                    nn.BatchNorm2D(out_channels))
        else:
            cargs.update(dict(root_kernel_size=root_kernel_size, root_residual=root_residual))
            self.tree1 = DlaTree(
                levels - 1, block, in_channels, out_channels, stride, root_dim=0, **cargs)
            self.tree2 = DlaTree(
                levels - 1, block, out_channels, out_channels, root_dim=root_dim + out_channels, **cargs)

        if levels == 1:
            self.root = DlaRoot(root_dim, out_channels, root_kernel_size, root_residual)

        self.level_root = level_root
        self.root_dim = root_dim
        self.levels = levels

        self.out_channels = out_channels

    def forward(self, x, residual=None, children=None):
        children = [] if children is None else children
        bottom = self.downsample(x)
        residual = self.project(bottom)
        if self.level_root:
            children.append(bottom)
        x1 = self.tree1(x, residual)

        if self.levels == 1:
            x2 = self.tree2(x1) 
            x = self.root(x2, x1, *children)
        else:
            if self.out_channels == 1024:
                print("debuging....2")
            children.append(x1)
            x = self.tree2(x1, children=children)
        return x

#DLA60模型
class DLA60(nn.Layer):
    def __init__(self, output_stride=32, num_classes=1000, in_chans=3,
                 cardinality=1, base_width=64, block=DlaBottleneck, residual_root=False,
                 drop_rate=0.0, global_pool='avg'):
        super(DLA60, self).__init__()

        #dla60配置
        levels=[1, 1, 1, 2, 3, 1]
        channels=[16, 32, 128, 256, 512, 1024]
        self.channels = channels
        self.num_classes = num_classes
        self.cardinality = cardinality
        self.base_width = base_width
        self.drop_rate = drop_rate
        assert output_stride == 32  # FIXME support dilation

        self.base_layer = nn.Sequential(
            nn.Conv2D(in_chans, channels[0], 7, 1, 3, bias_attr=False),
            nn.BatchNorm2D(channels[0]),
            nn.ReLU())

        self.level0 = self._make_conv_level(channels[0], channels[0], levels[0])
        self.level1 = self._make_conv_level(channels[0], channels[1], levels[1], stride=2)

        cargs = dict(cardinality=cardinality, base_width=base_width, root_residual=residual_root)
        self.level2 = DlaTree(levels[2], block, channels[1], channels[2], 2, level_root=False, **cargs)
        self.level3 = DlaTree(levels[3], block, channels[2], channels[3], 2, level_root=True, **cargs)
        self.level4 = DlaTree(levels[4], block, channels[3], channels[4], 2, level_root=True, **cargs)
        self.level5 = DlaTree(levels[5], block, channels[4], channels[5], 2, level_root=True, **cargs)

        self.feature_info = [
            dict(num_chs=channels[0], reduction=1, module='level0'),  # rare to have a meaningful stride 1 level
            dict(num_chs=channels[1], reduction=2, module='level1'),
            dict(num_chs=channels[2], reduction=4, module='level2'),
            dict(num_chs=channels[3], reduction=8, module='level3'),
            dict(num_chs=channels[4], reduction=16, module='level4'),
            dict(num_chs=channels[5], reduction=32, module='level5'),
        ]

        self.num_features = channels[-1]
        self.pool = nn.AdaptiveAvgPool2D(1)
        self.fc = nn.Conv2D(self.num_features, num_classes, 1,  bias_attr=True)

    def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
        modules = []
        for i in range(convs):
            modules.extend([
                nn.Conv2D(inplanes, planes, 3, stride if i == 0 else 1,
                        dilation, bias_attr=False, dilation=dilation),
                nn.BatchNorm2D(planes),
                nn.ReLU()])

            inplanes = planes
        return nn.Sequential(*modules)

    def forward_features(self, x):
        x = self.base_layer(x) 
        x = self.level0(x)
        x = self.level1(x)
        x = self.level2(x)
        x = self.level3(x)
        x = self.level4(x)
        x = self.level5(x)
        return x

    def forward(self, x):
        x = self.forward_features(x)
        x = self.pool(x)
        if self.drop_rate > 0.:
            x = F.dropout(x, dropout_prob=self.drop_rate)
        x = self.fc(x)
        x = x.flatten(1)  # conv classifier, flatten if pooling isn't pass-through (disabled)
        return x


#  精度对齐

因为是简单的图像分类模型，这里只做一个相同输入下的输出结果验证

**torch的输出**

![](https://ai-studio-static-online.cdn.bcebos.com/97745d1de1414aa9ad49fb6ee47208d8fe460e9cd1fd4968947b331a03ef1a10)

**paddle的输出**

![](https://ai-studio-static-online.cdn.bcebos.com/8595a35a04d94012b60c2f71360a74766445bde9520f46199515aef0ac4f772d)

**验证集上验证**

step 98/98 [==============================] - loss: 0.7075 - acc: 0.7682 - 11s/step 离提交要求还有轻微距离， 要求是0.769 训练时间比较慢近5h一轮


# 训练模型

由于训练集特别大, AIStduio暂时还受不了, 这里只用验证集数据训练了两轮

In [None]:
# 在AIStuido里测试时加载的数据集
import cv2
transforms = T.Compose([
    T.Resize(256, interpolation='bicubic'),
    T.CenterCrop(224),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 构建数据集
class ILSVRC2012(paddle.io.Dataset):
    def __init__(self, root, label_list, transform, backend='pil'):
        self.transform = transform
        self.root = root
        self.label_list = label_list
        self.backend = backend
        self.load_datas()

    def load_datas(self):
        self.imgs = []
        self.labels = []
        with open(self.label_list, 'r') as f:
            for line in f:
                img, label = line[:-1].split(' ')
                self.imgs.append(os.path.join(self.root, img))
                self.labels.append(int(label))

    def __getitem__(self, idx):
        label = self.labels[idx]
        image = self.imgs[idx]
        if self.backend=='cv2':
            image = cv2.imread(image)
        else:
            image = Image.open(image).convert('RGB')
        image = self.transform(image)
        return image.astype('float32'), np.array(label).astype('int64')

    def __len__(self):
        return len(self.imgs)

val_dataset = ILSVRC2012('data/ILSVRC2012', transform=transforms, label_list='data/data68594/val_list.txt')


In [None]:
#保存训练结果
callback = paddle.callbacks.ModelCheckpoint(save_dir='./checkpoints', save_freq=1)

#加载模型及预训练参数
model = DLA60(num_classes=1000)
run_model = paddle.Model(model)

#模型训练
optim = paddle.optimizer.SGD(learning_rate=0.0001, weight_decay=6e-5, parameters=run_model.parameters())
run_model.prepare(optimizer= optim,
              loss=paddle.nn.CrossEntropyLoss(),
              metrics=paddle.metric.Accuracy())
run_model.fit(val_dataset, val_dataset, epochs=2, batch_size=256, callbacks=callback, verbose=1)

The loss value printed in the log is the current step, and the metric is the average value of previous step.
Epoch 1/2


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if data.dtype == np.object:
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if isinstance(slot[0], (np.ndarray, np.bool, numbers.Number)):
  return (isinstance(seq, collections.Sequence) and
  "When training, we now always track global mean and variance.")


save checkpoint at /home/aistudio/checkpoints/0
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
Eval samples: 50000
Epoch 2/2
save checkpoint at /home/aistudio/checkpoints/1
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
Eval samples: 50000
save checkpoint at /home/aistudio/checkpoints/final


# 验证模型

模型验证的结果非常接近论文的精度

In [7]:
model = DLA60(num_classes=1000)
model_state_dict = paddle.load("/home/aistudio/work/dla60_best.pdparams")
model.set_state_dict(model_state_dict)
run_model = paddle.Model(model)
optim = paddle.optimizer.SGD(learning_rate=0.0001, weight_decay=6e-5, parameters=run_model.parameters())
run_model.prepare(optimizer= optim,
              loss=paddle.nn.CrossEntropyLoss(),
              metrics=paddle.metric.Accuracy())

#模型验证
run_model.evaluate(val_dataset, batch_size=512, verbose=1)

Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
Eval samples: 50000


{'loss': [1.015959], 'acc': 0.76638}

# 总结

因训练硬件资源和时间有限，本次复现过程还有很多缺失和不足，后续持续改进。

请点击[此处](https://ai.baidu.com/docs#/AIStudio_Project_Notebook/a38e5576)查看本环境基本用法.  <br>
Please click [here ](https://ai.baidu.com/docs#/AIStudio_Project_Notebook/a38e5576) for more detailed instructions. 