In [1]:
import math
import numpy as np
import torch
from kitti.kitti_dataset import get_dataloader
import yaml
from easydict import EasyDict
from pathlib import Path
from basic.utils.vis_utils import VisualWindow
# %matplotlib inline
torch.cuda.empty_cache()
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# DataLoader

In [2]:
cfg_path = Path("../kitti/cfg/kitti_dataset.yaml")
batch_size = 4
dataloader = get_dataloader(data_cfg_path=cfg_path, class_name_list=['Car'], batch_size=batch_size)  # 'Pedestrian','Cyclist'

  cfg = EasyDict(yaml.load(f))


In [3]:
from basic.utils.common_utils import put_data_to_gpu
# 取一批数据用于模块测试
for data in dataloader:
    test_data = data
    break
# 单独把某些数据放在GPU中，注意frame_id这种还是为np.ndarray
test_data = put_data_to_gpu(test_data)
print(f"input voxels shape:", test_data['voxels'].shape)
test_data

input voxels shape: torch.Size([63662, 5, 4])


{'frame_id': array(['006631', '006627', '006382', '006995'], dtype='<U6'),
 'gt_boxes': tensor([[[ 2.9326e+01,  8.4339e+00, -7.4018e-01,  3.8006e+00,  1.5304e+00,
            1.5709e+00, -2.7242e+00,  1.0000e+00],
          [ 4.9416e+01, -1.9758e+00, -5.5213e-01,  3.9830e+00,  1.6013e+00,
            1.9459e+00, -2.7342e+00,  1.0000e+00],
          [ 3.0372e+01,  1.2644e+01, -7.9539e-01,  4.5607e+00,  1.6520e+00,
            1.6418e+00,  4.1902e-01,  1.0000e+00],
          [ 4.3164e+01, -3.0087e+00, -6.7769e-01,  4.2161e+00,  1.8344e+00,
            1.4493e+00, -2.7042e+00,  1.0000e+00],
          [ 1.2676e+01,  1.5234e+01, -5.4629e-01,  3.3242e+00,  1.6013e+00,
            1.5405e+00, -1.8442e+00,  1.0000e+00],
          [ 3.2002e+01,  2.2141e+01, -2.8475e-01,  3.7499e+00,  1.6925e+00,
            1.7331e+00, -1.1410e+00,  1.0000e+00],
          [ 1.0148e+01,  8.7813e+00, -7.8756e-01,  3.4762e+00,  1.6925e+00,
            1.6013e+00,  5.6902e-01,  1.0000e+00],
          [ 5.4691e+00, 

# Model cfg
最终的目的是想用字符文档生成模型。但是下面只是单一测试每一个模块
- model cfg 中包含各个模块的配置：module cfg
- 在模型全局中使用model_info_dict记录一些必要的模型信息


In [4]:
from pprint import pprint
from basic.utils.config_utils import cfg_from_yaml_file
model_cfg = cfg_from_yaml_file('../basic/model/model_cfg/second.yaml')
model_info_dict = {
    'module_list': [],
    'training': True,
}
data_infos = dataloader.dataset.get_data_infos()
model_info_dict.update(data_infos)
pprint(model_cfg)

{'DATASET_CONFIG': {'CONFIG_PATH': '/home/ph/Desktop/PointCloud/utils_my/kitti/cfg/kitti_dataset.yaml',
                    'DATASET': 'KittiDataset',
                    'DATA_AUGMENTOR': {'AUG_CONFIG_LIST': [{'DATABASE_WITH_FAKELIDAR': False,
                                                            'DB_INFO_PATH': ['db_infos_train.pkl'],
                                                            'LIMIT_WHOLE_SCENE': True,
                                                            'NAME': 'gt_sampling',
                                                            'NUM_POINT_FEATURES': 4,
                                                            'PREPARE': {'filter_by_difficulty': [-1],
                                                                        'filter_by_min_points': ['Car:5',
                                                                                                 'Pedestrian:5',
                                                                               

初始模型信息，注意经过每一个模块处理后,更新以下信息。
- 更新module_list记录的模块
- 当前特征图中每个点的特征维度
- 后面模块可能会使用到的当前模块的一些信息

In [5]:
for key, value in model_info_dict.items():
    print(f"{key}:{value}")

module_list:[]
training:True
raw_point_feature_dims:4
cur_point_feature_dims:4
point_cloud_range:[  0.  -40.   -3.   70.4  40.    1. ]
voxel_size:[0.05, 0.05, 0.1]
grid_size:[1408 1600   40]
class_names:['Car']


# Feature Extractor/Encoding Module
点云特征提取模块目的是：从无序的原始点云数据中提取出有序的初步特征,或者说找到一种方式编码原始点云，
令其有序。其实就是把原始点云转换为有序的张量矩阵
常见PointNet的方式，就是为了提取有序的初步特征；而体素的方式，是为了用体素这种格式编码原始点云，令其有序
为什么要这样做？我的理解是，现有CNN只能处理有序的张量！！！不管是3d卷积还是2d卷积



Voxel Feature Extractor(VFE)
- 提取体素级别的特征
输入：体素，以及体素相关的信息
输出：提取的体素特征
- Mean VFE：取每个体素内所有点的平均值作为输出特征
- MLP VFE:对每个体素内的点集，做类似PointNet的操作。即用MLP + Max pooling 提取点集的特征

In [6]:
#mean vfe
from basic.module.feature_extractor import MeanVFE
mean_vfe_module = MeanVFE(model_cfg, model_info_dict).cuda()
output = mean_vfe_module(test_data)
model_info_dict['cur_point_feature_dims'] = mean_vfe_module.output_feature_dims
model_info_dict['module_list'].append(mean_vfe_module)
print(f"Mean VFE： voxel_features shape:", output['voxel_features'].shape)

Mean VFE： voxel_features shape: torch.Size([63662, 4])


In [7]:
print(f"current model infos:")
for key, value in model_info_dict.items():
    print(f"{key}:{value}")

current model infos:
module_list:[MeanVFE()]
training:True
raw_point_feature_dims:4
cur_point_feature_dims:4
point_cloud_range:[  0.  -40.   -3.   70.4  40.    1. ]
voxel_size:[0.05, 0.05, 0.1]
grid_size:[1408 1600   40]
class_names:['Car']


In [8]:
# mlp vfe just test
from basic.module.feature_extractor import MlpVFE

cfg = {'mlp_dims': [32, 64, 64, 128, 128],
       'input_channels': 4}
mlp_vfe_module = MlpVFE(cfg).cuda()
t = mlp_vfe_module(test_data)
print(f"Mlp VFE： voxel_features shape:", t.shape)

Mlp VFE： voxel_features shape: torch.Size([63662, 128])


Point Feature Extractor(PFE)
- 直接提取原始点云的特征
- 代表方法PointNet++的SetAbstract layer

In [9]:
#todo

# Backbone3D
- 经过原始点云的特征提取/编码后，一般会得到B,C,VH,VW,VD的体素张量特征矩阵。或B,C,H,W的点云特征张量矩阵。
根据特征张量维度选择用3D卷积还是2D卷积网络来进一步提取特征。
- 因为体素张量特征矩阵非常稀疏，多使用稀疏卷积。使用spconv库来进行稀疏3D卷积

In [10]:
from basic.module.backbone3d import VoxelBackBone8x

back3d_cfg = model_cfg.MODEL.BACKBONE3D
backbone3d_module = VoxelBackBone8x(back3d_cfg, model_info_dict).cuda()
output = backbone3d_module(output)
print(f"spconv_tensor_shape:", output['encoded_spconv_tensor'].dense().shape)

spconv_tensor_shape: torch.Size([4, 128, 2, 200, 176])


In [11]:
model_info_dict['module_list'].append(backbone3d_module)
model_info_dict['cur_point_feature_dims'] = backbone3d_module.output_feature_dims
model_info_dict['feature_map_size'] = backbone3d_module.output_feature_size
model_info_dict['backbone_channels'] = backbone3d_module.backbone_channels\
    if hasattr(backbone3d_module, 'backbone_channels') else None

In [12]:
print(f"current model infos:")
for key, value in model_info_dict.items():
    print(f"{key}:{value}")

current model infos:
module_list:[MeanVFE(), VoxelBackBone8x(
  (conv_input): SparseSequential(
    (0): SubMConv3d()
    (1): BatchNorm1d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv1): SparseSequential(
    (0): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (conv2): SparseSequential(
    (0): SparseSequential(
      (0): SparseConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (2): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (conv3): SparseSequential(
    (0)

# NECK
BackBone3D提取的特征向量依然处于3维空间内。目前一般不会在3维空间内提取ROIs。因为3DNMS，3DIOU等都很麻烦....。
因此直接在前视图FOV或在鸟瞰图BEV上提取ROIs。为此需要将3d特征转换为2d特征。
- 常用的Neck：
直接压缩：比如将B,C,D,H,W的特征压缩为B，C*H，D,W,此时的特征图可以认为是BEV视角下的二维特征图

In [13]:
from basic.module.neck import DimCompression

neck_cfg = model_cfg.MODEL.NECK
neck_module = DimCompression(module_cfg=neck_cfg, model_info_dict=model_info_dict)
output = neck_module(output)
print("直接压缩", output['spatial_features'].shape)

直接压缩 torch.Size([4, 256, 200, 176])


In [14]:
model_info_dict['module_list'].append(neck_module)
model_info_dict['cur_point_feature_dims'] = neck_module.output_feature_dims
model_info_dict['feature_map_size'] = neck_module.output_feature_size

In [15]:
print(f"current model infos:")
for key, value in model_info_dict.items():
    print(f"{key}:{value}")

current model infos:
module_list:[MeanVFE(), VoxelBackBone8x(
  (conv_input): SparseSequential(
    (0): SubMConv3d()
    (1): BatchNorm1d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv1): SparseSequential(
    (0): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (conv2): SparseSequential(
    (0): SparseSequential(
      (0): SparseConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (2): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (conv3): SparseSequential(
    (0)

# Backbone2D
与BackBone3D一样，根据输入的张量维度。选择用2D卷积网络提取特征。通常如果使用NECK 模块将3维空间内的特征压缩为2维空间的特征后
也会再次使用2D的卷积网络再次提取特征。

In [16]:
# from basic.module.backbone2d import BEVExtractor
# conv_channels = [32, 64, 128, 256]
# conv_kernel = [2, 2, 3, 3]
# backbone2d = BEVExtractor(128, conv_channels, conv_kernel)
# output = backbone2d(output)

In [17]:
from basic.module.backbone2d.base_bev_backbone import BaseBEVBackbone

backbone2d_cfg = model_cfg.MODEL.BACKBONE2D
backbone2d = BaseBEVBackbone(backbone2d_cfg, model_info_dict).cuda()
output = backbone2d(output)
output['spatial_features_2d'].shape

torch.Size([4, 512, 200, 176])

In [18]:
model_info_dict['module_list'].append(backbone2d)
model_info_dict['cur_point_feature_dims'] = backbone2d.output_feature_dims

In [19]:
print(f"current model infos:")
for key, value in model_info_dict.items():
    print(f"{key}:{value}")

current model infos:
module_list:[MeanVFE(), VoxelBackBone8x(
  (conv_input): SparseSequential(
    (0): SubMConv3d()
    (1): BatchNorm1d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv1): SparseSequential(
    (0): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (conv2): SparseSequential(
    (0): SparseSequential(
      (0): SparseConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (2): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (conv3): SparseSequential(
    (0)

截止目前为止：输入点云的shape变化为
- 原始点云->体素：183149, 5, 4
- VFE：183149, 4
- BackBone3D：12， 128， 2， 200， 176
- neck：12，256，200，176
- BackBone2D：12，256，200，176，shape未变因为卷积过后，又转置卷积回了原始大小
经过上面的各个模块，从原始点云中获取了能代表该点云的二维特征图。接下来是3D目标识别中最重要的部分：Dense Head 与 ROI head。

# Dense Head
BackBone2D的输出为用于Bbox回归的，和Bbox分类的两个likelihood矩阵

anchor generator（一）
- 枚举7种anchor可能用到的特征，即x，y，z，h，w，l，r。然后通过mesh gird产生所有anchors。其中根据anchor中心坐标xyz的取法不同又分为Range和Stride两种方案
    - Range：在点云范围内，给定每个轴的取值范围。每个轴按照特征图中对应的维度平均划分这些轴。比如特征图对应X轴的维度大小为176，就在X轴范围内平均划分176个。
    - Stride：给定xyz坐标下的原点坐标，分别以x stride，y stride，z stride沿着各个轴的正方向按步长获得anchor中心坐标xyz。
    - 代码接口虽然可以自定义Range和Stride。但是为了将特征图上的每个特征点与原图上的每个anchor关联起来，一定要平均划分！！即Range取值为点云的范围，而Stride取值为
  点云采样范围 / 特征图大小。即\[z_stride, x_stride, y_stride\]=\[z_len, x_len, y_len\] / \[H, W, L\]。这样看按Range还是Stride的方案取得的结果应该差距不大。。。
    - 实际上就是把特征图上的每个特征点，映射回了原始数据上对应区域的中心？假如原始点云下采样了8倍得到特征图，则特征图中\[0,0,0\]点对应原点云（点云原点坐标为000）中以\[8,8,8\]为中心，边长为8的正方形区域？

In [20]:
# anchor generator config
anchor_gen_cfg = model_cfg.MODEL.DENSE_HEAD.ANCHOR_GENERATOR_CONFIG
anchor_gen_cfg, model_info_dict

({'NAME': 'AnchorGenerator',
  'DEVICE': 'cuda',
  'CLASS_CONFIG': [{'class_name': 'Car',
    'mode': 'Range',
    'anchor_dims': 3,
    'boxes_size': [[3.9, 1.6, 1.56]],
    'rotations': [0, 1.57],
    'ratios': [1],
    'center_aligned': True,
    'road_plane_aligned': True,
    'road_plane_height': -1.2},
   {'class_name': 'Pedestrian',
    'mode': 'Range',
    'anchor_dims': 3,
    'boxes_size': [[0.8, 0.6, 1.73]],
    'rotations': [0, 1.57],
    'ratios': [1],
    'center_aligned': True,
    'road_plane_aligned': True,
    'road_plane_height': -1.2},
   {'class_name': 'Cyclist',
    'mode': 'Range',
    'anchor_dims': 3,
    'boxes_size': [[1.76, 0.6, 1.73]],
    'rotations': [0, 1.57],
    'ratios': [1],
    'center_aligned': True,
    'anchor_bottom_heights': [-0.6],
    'road_plane_aligned': True,
    'road_plane_height': -1.2}]},
 {'module_list': [MeanVFE(),
   VoxelBackBone8x(
     (conv_input): SparseSequential(
       (0): SubMConv3d()
       (1): BatchNorm1d(16, eps=0.001,

In [21]:
from basic.module.dense_head.anchor_generator.anchor_gen_base import AnchorGenerator

anchor_generator = AnchorGenerator(anchor_gen_cfg, model_info_dict, class_type='Car', dtype=torch.float32)
anchors = anchor_generator.gen_anchors(flatten_output=False)
print("Range anchors shape:", anchors.shape)
print("Range stride:", anchors[1, 1] - anchors[0, 0])
print("begin:", anchors[0, 0])
print("end:", anchors[-1, -1])

Range anchors shape: torch.Size([176, 200, 1, 1, 2, 7])
Range stride: tensor([[[[0.4000, 0.4000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
          [0.4000, 0.4000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]]],
       device='cuda:0')
begin: tensor([[[[  0.2000, -39.8000,  -1.2000,   3.9000,   1.6000,   1.5600,   0.0000],
          [  0.2000, -39.8000,  -1.2000,   3.9000,   1.6000,   1.5600,   1.5700]]]],
       device='cuda:0')
end: tensor([[[[70.2000, 39.8000, -1.2000,  3.9000,  1.6000,  1.5600,  0.0000],
          [70.2000, 39.8000, -1.2000,  3.9000,  1.6000,  1.5600,  1.5700]]]],
       device='cuda:0')


In [22]:
anchor_generator.set_mode('Stride')
anchors = anchor_generator.gen_anchors(flatten_output=False)
print("stride:", anchor_generator.stride)
print("Stride anchors shape:", anchors.shape)
print("begin:", anchors[0, 0])
print("end:", anchors[-1, -1])
final_anchors = anchors.view(-1, 7)
print("output anchor shape:", final_anchors.shape)

stride: tensor([0.4000, 0.4000, 4.0000])
Stride anchors shape: torch.Size([176, 200, 1, 1, 2, 7])
begin: tensor([[[[  0.2000, -39.8000,  -1.2000,   3.9000,   1.6000,   1.5600,   0.0000],
          [  0.2000, -39.8000,  -1.2000,   3.9000,   1.6000,   1.5600,   1.5700]]]],
       device='cuda:0')
end: tensor([[[[70.2000, 39.8000, -1.2000,  3.9000,  1.6000,  1.5600,  0.0000],
          [70.2000, 39.8000, -1.2000,  3.9000,  1.6000,  1.5600,  1.5700]]]],
       device='cuda:0')
output anchor shape: torch.Size([70400, 7])


1.注意输出anchors的shape为176, 200, 1, 1, 2, 7。最后一个维度代表anchor的特征向量xyzlwhr，
其他维度分别与x y z size rot的可枚举数量一致.当然最后输出的shape为(176x200x1x1x2, 7)
2.在对齐体素中心的情况下，Range和Stride两种方案的结果都是一样的。假如点云的x轴范围为\[0, 70.4\]，
而x轴对应的维度在特征图上大小为176.则均分后相邻点的距离为70.4 / 176 = 0.4。Range和Stride
枚举X坐标的核心代码如下

In [23]:
ranges = torch.linspace(0, 70.4, 176)
range_align_center = torch.linspace(0 + 0.2, 70.4 - 0.2, 176)
stride = torch.arange(0, 176) * 0.4
stride_align_center = stride + 0.4 / 2

In [24]:
anchor_generator.set_mode('Range')
model_info_dict['raw_anchor_shape'] = anchor_generator.shape

随机绘制100个anchor box看看
- 明显anchor 产生的全部BBox能覆盖整个点云cube范围

In [25]:
%matplotlib auto
anchors = anchor_generator.gen_anchors(flatten_output=True)
w = VisualWindow(mode='3d')
points = test_data['points']
test_pc = points[points[:, 0] == 0][:, 1:]
w.draw_point_cloud(pc=test_pc.cpu().numpy())
sample_ids = torch.randperm(anchors.size(0))[:100]
w.draw_boxes3d(boxes=anchors[sample_ids].cpu().numpy(), format='corner')

Using matplotlib backend: Qt5Agg


anchor generator（二）
- 在xyz坐标原点生成基本的anchors，然后通过stride。移动这些anchors

MultiClass Generator
在同一feature map上为每种类别生成对应的anchor。输出\[class_dim，xdim，ydim，zdim，size_dim,rot_dim,7\]

In [26]:
from basic.module.dense_head.anchor_generator.anchor_gen_base import MultiClsAnchorGenerator

mul_generator = MultiClsAnchorGenerator(anchor_gen_cfg, model_info_dict,
                                        feature_map_size=output['spatial_features_2d'].shape[2:],
                                        cls_list=['Car', 'Pedestrian', 'Cyclist'])
all_anchors = mul_generator.gen_anchors(flatten_output=False)
print("class_dim，xdim，ydim，zdim，size_dim,rot_dim,7:", all_anchors.shape)
print("Car:", all_anchors[0, 0, 0])
print("Pedestrian:", all_anchors[1, 0, 0])
print("Cyclist", all_anchors[2, 0, 0])

class_dim，xdim，ydim，zdim，size_dim,rot_dim,7: torch.Size([3, 176, 200, 1, 1, 2, 7])
Car: tensor([[[[  0.2000, -39.8000,  -1.2000,   3.9000,   1.6000,   1.5600,   0.0000],
          [  0.2000, -39.8000,  -1.2000,   3.9000,   1.6000,   1.5600,   1.5700]]]],
       device='cuda:0')
Pedestrian: tensor([[[[  0.2000, -39.8000,  -1.2000,   0.8000,   0.6000,   1.7300,   0.0000],
          [  0.2000, -39.8000,  -1.2000,   0.8000,   0.6000,   1.7300,   1.5700]]]],
       device='cuda:0')
Cyclist tensor([[[[  0.2000, -39.8000,  -1.2000,   1.7600,   0.6000,   1.7300,   0.0000],
          [  0.2000, -39.8000,  -1.2000,   1.7600,   0.6000,   1.7300,   1.5700]]]],
       device='cuda:0')


Target assigner
目的：1.训练时，为每个anchor指定类别标签和Boxes偏移量标签；
输入：1.Anchors\[K,7\];2.Ground Truth Boxes\[B,N,8\],其中8=xyzhwlr+class_ind

In [27]:
print("gt_box_temp:", test_data['gt_boxes'][0, 0])
assigner_cfg = model_cfg.MODEL.DENSE_HEAD.TARGET_ASSIGNER_CONFIG
assigner_cfg

gt_box_temp: tensor([29.3264,  8.4339, -0.7402,  3.8006,  1.5304,  1.5709, -2.7242,  1.0000],
       device='cuda:0')


{'NAME': 'MaxIouTargetAssigner',
 'DEVICE': 'cuda',
 'POS_FRACTION': -1.0,
 'NORM_BY_NUM_EXAMPLES': False,
 'MATCH_HEIGHT': False,
 'CLASS_THRESHOLD': [{'class_name': 'Car',
   'pos_threshold': 0.5,
   'neg_threshold': 0.45},
  {'class_name': 'Pedestrian', 'pos_threshold': 0.5, 'neg_threshold': 0.35},
  {'class_name': 'Cyclist', 'pos_threshold': 0.5, 'neg_threshold': 0.35}],
 'IOU_CALCULATOR': {'NAME': 'Iou3DCalculator'},
 'BOX_ENCODER': {'NAME': 'ResidualCoder',
  'code_size': 7,
  'encode_angle_by_sincos': False},
 'SAMPLER': {'NAME': 'MaxSizeSubSampler', 'sample_size': 512}}

In [28]:
for key, value in model_info_dict.items():
    if key != 'module_list':
        print(key, value)

training True
raw_point_feature_dims 4
cur_point_feature_dims 512
point_cloud_range [  0.  -40.   -3.   70.4  40.    1. ]
voxel_size [0.05, 0.05, 0.1]
grid_size [1408 1600   40]
class_names ['Car']
feature_map_size [  1 200 176]
backbone_channels {'x_conv1': 16, 'x_conv2': 32, 'x_conv3': 64, 'x_conv4': 64}
raw_anchor_shape [176, 200, 1, 1, 2]


In [29]:
from basic.module.dense_head.target_assigner import MaxIouTargetAssigner
target_assigner = MaxIouTargetAssigner(assigner_cfg, model_info_dict)

In [30]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
print("labels:", test_data['gt_boxes'][:, :, -1])

labels: tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]],
       device='cuda:0')


In [31]:
# all_anchors = mul_generator.gen_anchors(flatten_output=True).cuda()
all_anchors = anchor_generator.gen_anchors(flatten_output=True).to(device)
target_dict, batch_bbox_id_dict = target_assigner.assign(gts=test_data['gt_boxes'][..., :-1], bboxes=all_anchors,
                                                         gt_labels=test_data['gt_boxes'][..., -1])
target_dict, batch_bbox_id_dict

({'cls_labels': tensor([0., 0., 0.,  ..., 1., 1., 1.], device='cuda:0'),
  'reg_labels': tensor([[0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.]], device='cuda:0')},
 {'pos': tensor([[    1,  3430],
          [    1,  3830],
          [    1, 24294],
          [    2,  8965],
          [    2,  8967],
          [    3, 35437],
          [    3, 35439],
          [    3, 35837]], device='cuda:0'),
  'neg': tensor([[    0,    13],
          [    0,   226],
          [    0,   297],
          ...,
          [    3, 69789],
          [    3, 70059],
          [    3, 70078]], device='cuda:0')})

可视化每个场景中，通过target assign匹配的anchor bbox

In [32]:
%matplotlib auto
batch_bbox = batch_bbox_id_dict['pos']
batch_ids = batch_bbox[:, 0]
bbox_ids = batch_bbox[:, 1]
points = test_data['points']
for i in range(batch_size):
    mask = batch_ids == i
    frame_bbox_ids = bbox_ids[mask]
    if frame_bbox_ids.size(0) > 0:
        print(frame_bbox_ids)
        frame_pc = points[points[:, 0] == i][:, 1:]
        frame_bbox = all_anchors[frame_bbox_ids]
        frame_gt = test_data['gt_boxes'][i]
        w = VisualWindow(mode='3d')
        w.draw_point_cloud(frame_pc.cpu().numpy())
        w.draw_boxes3d(frame_gt[:,:7].cpu().numpy())
        w.draw_boxes3d(frame_bbox.cpu().numpy(), 'corner', c='r')

Using matplotlib backend: Qt5Agg
tensor([ 3430,  3830, 24294], device='cuda:0')
tensor([8965, 8967], device='cuda:0')
tensor([35437, 35439, 35837], device='cuda:0')


上面所有子模块组成基于anchor的Dense head：anchor head

In [33]:
from basic.module.dense_head.anchor_head.anchor_head_base import AnchorHeadBase

dense_head_cfg = model_cfg.MODEL.DENSE_HEAD
anchor_head = AnchorHeadBase(dense_head_cfg, model_info_dict).to(device)
output_dict = anchor_head(output)
output_dict

{'cls_pred': tensor([[-0.0898, -0.0097],
         [-0.0898, -0.0097],
         [-0.1206, -0.0532],
         ...,
         [ 0.1119, -0.0256],
         [ 0.1119, -0.0256],
         [ 0.0344, -0.0468]], device='cuda:0', grad_fn=<CatBackward>),
 'reg_pred': tensor([[-0.1580, -0.0250,  0.0616, -0.0190, -0.0783,  0.0128,  0.0185],
         [-0.1580, -0.0250,  0.0616, -0.0190, -0.0783,  0.0128,  0.0185],
         [-0.0980,  0.0468, -0.0155, -0.0506, -0.0701, -0.0157,  0.0124],
         [-0.1458, -0.0513, -0.0208,  0.0058, -0.0292, -0.0125,  0.1173],
         [-0.1400, -0.0471, -0.0176,  0.0269, -0.0263, -0.0142,  0.1187],
         [-0.0158, -0.0739, -0.0384,  0.0837,  0.1014, -0.0379,  0.1538],
         [-0.0158, -0.0739, -0.0384,  0.0837,  0.1014, -0.0379,  0.1538],
         [-0.0004,  0.0752,  0.0042, -0.0040,  0.0619, -0.0660,  0.1203]],
        device='cuda:0', grad_fn=<IndexBackward>),
 'target_dict': {'cls_labels': tensor([0., 0., 0.,  ..., 1., 1., 1.], device='cuda:0'),
  'reg_labels'

最后直接用模型配置文档生成SECOND模型

In [34]:
from basic.model.second import SECOND

data_infos = dataloader.dataset.get_data_infos()
model = SECOND(model_cfg, data_infos).cuda()

In [35]:
loss = model(test_data)
loss

{'loss': tensor(0.0647, device='cuda:0', grad_fn=<AddBackward0>)}