In [1]:
import math
import numpy as np
import torch
from kitti.kitti_dataset import get_dataloader
import yaml
from easydict import EasyDict
from pathlib import Path
from basic.utils.vis_utils import VisualWindow
# %matplotlib inline
torch.cuda.empty_cache()
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# DataLoader

In [2]:
cfg_path = Path("../kitti/cfg/kitti_dataset.yaml")
batch_size = 4
dataloader = get_dataloader(data_cfg_path=cfg_path, class_name_list=['Car'], batch_size=batch_size)  # 'Pedestrian','Cyclist'

  cfg = EasyDict(yaml.load(f))


In [3]:
from basic.utils.common_utils import put_data_to_gpu
# 取一批数据用于模块测试
for data in dataloader:
    test_data = data
    break
# 单独把某些数据放在GPU中，注意frame_id这种还是为np.ndarray
test_data = put_data_to_gpu(test_data)
print(f"input voxels shape:", test_data['voxels'].shape)
test_data

input voxels shape: torch.Size([61791, 5, 4])


{'frame_id': array(['006443', '007446', '005995', '006138'], dtype='<U6'),
 'gt_boxes': tensor([[[26.9494, 11.7531, -0.7768,  3.6334,  1.6990,  1.6376,  0.3899,
            1.0000],
          [32.4706, 39.4742, -0.3389,  3.9609,  1.7706,  1.4636, -2.7501,
            1.0000],
          [35.3516, 19.9395, -0.4223,  3.9200,  1.6683,  1.6990,  1.4899,
            1.0000],
          [20.7630, 17.5684, -0.4604,  3.6027,  1.5045,  1.4841, -2.7301,
            1.0000],
          [11.6903, 15.1524, -1.0361,  4.1861,  1.6376,  1.3408, -2.6501,
            1.0000],
          [21.9628, 10.3106, -0.6254,  4.4829,  1.7092,  1.5557,  0.4199,
            1.0000],
          [32.8690,  8.3094, -0.4120,  4.2782,  1.9037,  1.5148, -1.2501,
            1.0000],
          [17.0427, 26.5958, -1.3451,  4.6057,  1.7092,  1.5148,  1.0499,
            1.0000],
          [16.4236,  3.2296, -0.6718,  4.2168,  1.8013,  1.7297, -1.2701,
            1.0000],
          [39.0840, 20.8829, -0.8285,  4.4215,  1.3612,  1

# Model cfg
最终的目的是想用字符文档生成模型。但是下面只是单一测试每一个模块
- model cfg 中包含各个模块的配置：module cfg
- 在模型全局中使用model_info_dict记录一些必要的模型信息


In [4]:
from pprint import pprint
from basic.utils.config_utils import cfg_from_yaml_file
model_cfg = cfg_from_yaml_file('../basic/model/model_cfg/second.yaml')
model_info_dict = {
    'module_list': [],
    'training': True,
}
data_infos = dataloader.dataset.get_data_infos()
model_info_dict.update(data_infos)
pprint(model_cfg)

{'DATASET_CONFIG': {'CONFIG_PATH': '/home/ph/Desktop/PointCloud/utils_my/kitti/cfg/kitti_dataset.yaml',
                    'DATASET': 'KittiDataset',
                    'DATA_AUGMENTOR': {'AUG_CONFIG_LIST': [{'DATABASE_WITH_FAKELIDAR': False,
                                                            'DB_INFO_PATH': ['db_infos_train.pkl'],
                                                            'LIMIT_WHOLE_SCENE': True,
                                                            'NAME': 'gt_sampling',
                                                            'NUM_POINT_FEATURES': 4,
                                                            'PREPARE': {'filter_by_difficulty': [-1],
                                                                        'filter_by_min_points': ['Car:5',
                                                                                                 'Pedestrian:5',
                                                                               

初始模型信息，注意经过每一个模块处理后,更新以下信息。
- 更新module_list记录的模块
- 当前特征图中每个点的特征维度
- 后面模块可能会使用到的当前模块的一些信息

In [5]:
for key, value in model_info_dict.items():
    print(f"{key}:{value}")

module_list:[]
training:True
raw_point_feature_dims:4
cur_point_feature_dims:4
point_cloud_range:[  0.  -40.   -3.   70.4  40.    1. ]
voxel_size:[0.05, 0.05, 0.1]
grid_size:[1408 1600   40]
class_names:['Car']


# Feature Extractor/Encoding Module
点云特征提取模块目的是：从无序的原始点云数据中提取出有序的初步特征,或者说找到一种方式编码原始点云，
令其有序。其实就是把原始点云转换为有序的张量矩阵
常见PointNet的方式，就是为了提取有序的初步特征；而体素的方式，是为了用体素这种格式编码原始点云，令其有序
为什么要这样做？我的理解是，现有CNN只能处理有序的张量！！！不管是3d卷积还是2d卷积



Voxel Feature Extractor(VFE)
- 提取体素级别的特征
输入：体素，以及体素相关的信息
输出：提取的体素特征
- Mean VFE：取每个体素内所有点的平均值作为输出特征
- MLP VFE:对每个体素内的点集，做类似PointNet的操作。即用MLP + Max pooling 提取点集的特征

In [6]:
#mean vfe
from basic.module.feature_extractor import MeanVFE
mean_vfe_module = MeanVFE(model_cfg, model_info_dict).cuda()
output = mean_vfe_module(test_data)
model_info_dict['cur_point_feature_dims'] = mean_vfe_module.output_feature_dims
model_info_dict['module_list'].append(mean_vfe_module)
print(f"Mean VFE： voxel_features shape:", output['voxel_features'].shape)

Mean VFE： voxel_features shape: torch.Size([61791, 4])


In [7]:
print(f"current model infos:")
for key, value in model_info_dict.items():
    print(f"{key}:{value}")

current model infos:
module_list:[MeanVFE()]
training:True
raw_point_feature_dims:4
cur_point_feature_dims:4
point_cloud_range:[  0.  -40.   -3.   70.4  40.    1. ]
voxel_size:[0.05, 0.05, 0.1]
grid_size:[1408 1600   40]
class_names:['Car']


In [8]:
# mlp vfe just test
from basic.module.feature_extractor import MlpVFE

cfg = {'mlp_dims': [32, 64, 64, 128, 128],
       'input_channels': 4}
mlp_vfe_module = MlpVFE(cfg).cuda()
t = mlp_vfe_module(test_data)
print(f"Mlp VFE： voxel_features shape:", t.shape)

Mlp VFE： voxel_features shape: torch.Size([61791, 128])


Point Feature Extractor(PFE)
- 直接提取原始点云的特征
- 代表方法PointNet++的SetAbstract layer

In [9]:
#todo

# Backbone3D
- 经过原始点云的特征提取/编码后，一般会得到B,C,VH,VW,VD的体素张量特征矩阵。或B,C,H,W的点云特征张量矩阵。
根据特征张量维度选择用3D卷积还是2D卷积网络来进一步提取特征。
- 因为体素张量特征矩阵非常稀疏，多使用稀疏卷积。使用spconv库来进行稀疏3D卷积

In [10]:
from basic.module.backbone3d import VoxelBackBone8x

back3d_cfg = model_cfg.MODEL.BACKBONE3D
backbone3d_module = VoxelBackBone8x(back3d_cfg, model_info_dict).cuda()
output = backbone3d_module(output)
print(f"spconv_tensor_shape:", output['encoded_spconv_tensor'].dense().shape)

spconv_tensor_shape: torch.Size([4, 128, 2, 200, 176])


In [11]:
model_info_dict['module_list'].append(backbone3d_module)
model_info_dict['cur_point_feature_dims'] = backbone3d_module.output_feature_dims
model_info_dict['feature_map_size'] = backbone3d_module.output_feature_size
model_info_dict['backbone_channels'] = backbone3d_module.backbone_channels\
    if hasattr(backbone3d_module, 'backbone_channels') else None

In [12]:
print(f"current model infos:")
for key, value in model_info_dict.items():
    print(f"{key}:{value}")

current model infos:
module_list:[MeanVFE(), VoxelBackBone8x(
  (conv_input): SparseSequential(
    (0): SubMConv3d()
    (1): BatchNorm1d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv1): SparseSequential(
    (0): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (conv2): SparseSequential(
    (0): SparseSequential(
      (0): SparseConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (2): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (conv3): SparseSequential(
    (0)

# NECK
BackBone3D提取的特征向量依然处于3维空间内。目前一般不会在3维空间内提取ROIs。因为3DNMS，3DIOU等都很麻烦....。
因此直接在前视图FOV或在鸟瞰图BEV上提取ROIs。为此需要将3d特征转换为2d特征。
- 常用的Neck：
直接压缩：比如将B,C,D,H,W的特征压缩为B，C*H，D,W,此时的特征图可以认为是BEV视角下的二维特征图

In [13]:
from basic.module.neck import DimCompression

neck_cfg = model_cfg.MODEL.NECK
neck_module = DimCompression(module_cfg=neck_cfg, model_info_dict=model_info_dict)
output = neck_module(output)
print("直接压缩", output['spatial_features'].shape)

直接压缩 torch.Size([4, 256, 200, 176])


In [14]:
model_info_dict['module_list'].append(neck_module)
model_info_dict['cur_point_feature_dims'] = neck_module.output_feature_dims
model_info_dict['feature_map_size'] = neck_module.output_feature_size

In [15]:
print(f"current model infos:")
for key, value in model_info_dict.items():
    print(f"{key}:{value}")

current model infos:
module_list:[MeanVFE(), VoxelBackBone8x(
  (conv_input): SparseSequential(
    (0): SubMConv3d()
    (1): BatchNorm1d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv1): SparseSequential(
    (0): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (conv2): SparseSequential(
    (0): SparseSequential(
      (0): SparseConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (2): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (conv3): SparseSequential(
    (0)

# Backbone2D
与BackBone3D一样，根据输入的张量维度。选择用2D卷积网络提取特征。通常如果使用NECK 模块将3维空间内的特征压缩为2维空间的特征后
也会再次使用2D的卷积网络再次提取特征。

In [16]:
# from basic.module.backbone2d import BEVExtractor
# conv_channels = [32, 64, 128, 256]
# conv_kernel = [2, 2, 3, 3]
# backbone2d = BEVExtractor(128, conv_channels, conv_kernel)
# output = backbone2d(output)

In [17]:
from basic.module.backbone2d.base_bev_backbone import BaseBEVBackbone

backbone2d_cfg = model_cfg.MODEL.BACKBONE2D
backbone2d = BaseBEVBackbone(backbone2d_cfg, model_info_dict).cuda()
output = backbone2d(output)
output['spatial_features_2d'].shape

torch.Size([4, 512, 200, 176])

In [18]:
model_info_dict['module_list'].append(backbone2d)
model_info_dict['cur_point_feature_dims'] = backbone2d.output_feature_dims

In [19]:
print(f"current model infos:")
for key, value in model_info_dict.items():
    print(f"{key}:{value}")

current model infos:
module_list:[MeanVFE(), VoxelBackBone8x(
  (conv_input): SparseSequential(
    (0): SubMConv3d()
    (1): BatchNorm1d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv1): SparseSequential(
    (0): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (conv2): SparseSequential(
    (0): SparseSequential(
      (0): SparseConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (2): SparseSequential(
      (0): SubMConv3d()
      (1): BatchNorm1d(32, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (conv3): SparseSequential(
    (0)

截止目前为止：输入点云的shape变化为
- 原始点云->体素：183149, 5, 4
- VFE：183149, 4
- BackBone3D：12， 128， 2， 200， 176
- neck：12，256，200，176
- BackBone2D：12，256，200，176，shape未变因为卷积过后，又转置卷积回了原始大小
经过上面的各个模块，从原始点云中获取了能代表该点云的二维特征图。接下来是3D目标识别中最重要的部分：Dense Head 与 ROI head。

# Dense Head
BackBone2D的输出为用于Bbox回归的，和Bbox分类的两个likelihood矩阵

anchor generator（一）
- 枚举7种anchor可能用到的特征，即x，y，z，h，w，l，r。然后通过mesh gird产生所有anchors。其中根据anchor中心坐标xyz的取法不同又分为Range和Stride两种方案
    - Range：在点云范围内，给定每个轴的取值范围。每个轴按照特征图中对应的维度平均划分这些轴。比如特征图对应X轴的维度大小为176，就在X轴范围内平均划分176个。
    - Stride：给定xyz坐标下的原点坐标，分别以x stride，y stride，z stride沿着各个轴的正方向按步长获得anchor中心坐标xyz。
    - 代码接口虽然可以自定义Range和Stride。但是为了将特征图上的每个特征点与原图上的每个anchor关联起来，一定要平均划分！！即Range取值为点云的范围，而Stride取值为
  点云采样范围 / 特征图大小。即\[z_stride, x_stride, y_stride\]=\[z_len, x_len, y_len\] / \[H, W, L\]。这样看按Range还是Stride的方案取得的结果应该差距不大。。。
    - 实际上就是把特征图上的每个特征点，映射回了原始数据上对应区域的中心？假如原始点云下采样了8倍得到特征图，则特征图中\[0,0,0\]点对应原点云（点云原点坐标为000）中以\[8,8,8\]为中心，边长为8的正方形区域？

In [20]:
# anchor generator config
anchor_gen_cfg = model_cfg.MODEL.DENSE_HEAD.ANCHOR_GENERATOR_CONFIG
anchor_gen_cfg, model_info_dict

({'NAME': 'AnchorGenerator',
  'DEVICE': 'cuda',
  'CLASS_CONFIG': [{'class_name': 'Car',
    'mode': 'Range',
    'anchor_dims': 3,
    'boxes_size': [[3.9, 1.6, 1.56]],
    'rotations': [0, 1.57],
    'ratios': [1],
    'center_aligned': True,
    'road_plane_aligned': True,
    'road_plane_height': -0.035},
   {'class_name': 'Pedestrian',
    'mode': 'Range',
    'anchor_dims': 3,
    'boxes_size': [[0.8, 0.6, 1.73]],
    'rotations': [0, 1.57],
    'ratios': [1],
    'center_aligned': True,
    'road_plane_aligned': True,
    'road_plane_height': -1.2},
   {'class_name': 'Cyclist',
    'mode': 'Range',
    'anchor_dims': 3,
    'boxes_size': [[1.76, 0.6, 1.73]],
    'rotations': [0, 1.57],
    'ratios': [1],
    'center_aligned': True,
    'anchor_bottom_heights': [-0.6],
    'road_plane_aligned': True,
    'road_plane_height': -1.2}]},
 {'module_list': [MeanVFE(),
   VoxelBackBone8x(
     (conv_input): SparseSequential(
       (0): SubMConv3d()
       (1): BatchNorm1d(16, eps=0.00

In [21]:
from basic.module.dense_head.anchor_generator.anchor_gen_base import AnchorGenerator

anchor_generator = AnchorGenerator(anchor_gen_cfg, model_info_dict, class_type='Car', dtype=torch.float32)
anchors = anchor_generator.gen_anchors(flatten_output=False)
print("Range anchors shape:", anchors.shape)
print("Range stride:", anchors[1, 1] - anchors[0, 0])
print("begin:", anchors[0, 0])
print("end:", anchors[-1, -1])

Range anchors shape: torch.Size([176, 200, 1, 1, 2, 7])
Range stride: tensor([[[[0.4000, 0.4000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
          [0.4000, 0.4000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]]],
       device='cuda:0')
begin: tensor([[[[ 2.0000e-01, -3.9800e+01, -3.5000e-02,  3.9000e+00,  1.6000e+00,
            1.5600e+00,  0.0000e+00],
          [ 2.0000e-01, -3.9800e+01, -3.5000e-02,  3.9000e+00,  1.6000e+00,
            1.5600e+00,  1.5700e+00]]]], device='cuda:0')
end: tensor([[[[ 7.0200e+01,  3.9800e+01, -3.5000e-02,  3.9000e+00,  1.6000e+00,
            1.5600e+00,  0.0000e+00],
          [ 7.0200e+01,  3.9800e+01, -3.5000e-02,  3.9000e+00,  1.6000e+00,
            1.5600e+00,  1.5700e+00]]]], device='cuda:0')


In [22]:
anchors[84,12,0,0,0]

tensor([ 33.8000, -35.0000,  -0.0350,   3.9000,   1.6000,   1.5600,   0.0000],
       device='cuda:0')

In [23]:
anchor_generator.set_mode('Stride')
anchors = anchor_generator.gen_anchors(flatten_output=False)
print("stride:", anchor_generator.stride)
print("Stride anchors shape:", anchors.shape)
print("begin:", anchors[0, 0])
print("end:", anchors[-1, -1])
final_anchors = anchors.view(-1, 7)
print("output anchor shape:", final_anchors.shape)

stride: tensor([0.4000, 0.4000, 4.0000])
Stride anchors shape: torch.Size([176, 200, 1, 1, 2, 7])
begin: tensor([[[[ 2.0000e-01, -3.9800e+01, -3.5000e-02,  3.9000e+00,  1.6000e+00,
            1.5600e+00,  0.0000e+00],
          [ 2.0000e-01, -3.9800e+01, -3.5000e-02,  3.9000e+00,  1.6000e+00,
            1.5600e+00,  1.5700e+00]]]], device='cuda:0')
end: tensor([[[[ 7.0200e+01,  3.9800e+01, -3.5000e-02,  3.9000e+00,  1.6000e+00,
            1.5600e+00,  0.0000e+00],
          [ 7.0200e+01,  3.9800e+01, -3.5000e-02,  3.9000e+00,  1.6000e+00,
            1.5600e+00,  1.5700e+00]]]], device='cuda:0')
output anchor shape: torch.Size([70400, 7])


1.注意输出anchors的shape为176, 200, 1, 1, 2, 7。最后一个维度代表anchor的特征向量xyzlwhr，
其他维度分别与x y z size rot的可枚举数量一致.当然最后输出的shape为(176x200x1x1x2, 7)
2.在对齐体素中心的情况下，Range和Stride两种方案的结果都是一样的。假如点云的x轴范围为\[0, 70.4\]，
而x轴对应的维度在特征图上大小为176.则均分后相邻点的距离为70.4 / 176 = 0.4。Range和Stride
枚举X坐标的核心代码如下

In [24]:
ranges = torch.linspace(0, 70.4, 176)
range_align_center = torch.linspace(0 + 0.2, 70.4 - 0.2, 176)
stride = torch.arange(0, 176) * 0.4
stride_align_center = stride + 0.4 / 2

In [25]:
anchor_generator.set_mode('Range')
model_info_dict['raw_anchor_shape'] = anchor_generator.shape

随机绘制100个anchor box看看
- 明显anchor 产生的全部BBox能覆盖整个点云cube范围

In [26]:
%matplotlib auto
anchors = anchor_generator.gen_anchors(flatten_output=True)
w = VisualWindow(mode='3d')
points = test_data['points']
test_pc = points[points[:, 0] == 0][:, 1:]
w.draw_point_cloud(pc=test_pc.cpu().numpy())
sample_ids = torch.randperm(anchors.size(0))[:100]
w.draw_boxes3d(boxes=anchors[sample_ids].cpu().numpy(), format='corner')

Using matplotlib backend: Qt5Agg


anchor generator（二）
- 在xyz坐标原点生成基本的anchors，然后通过stride。移动这些anchors

MultiClass Generator
在同一feature map上为每种类别生成对应的anchor。输出\[class_dim，xdim，ydim，zdim，size_dim,rot_dim,7\]

In [27]:
from basic.module.dense_head.anchor_generator.anchor_gen_base import MultiClsAnchorGenerator

mul_generator = MultiClsAnchorGenerator(anchor_gen_cfg, model_info_dict,
                                        feature_map_size=output['spatial_features_2d'].shape[2:],
                                        cls_list=['Car', 'Pedestrian', 'Cyclist'])
all_anchors = mul_generator.gen_anchors(flatten_output=False)
print("class_dim，xdim，ydim，zdim，size_dim,rot_dim,7:", all_anchors.shape)
print("Car:", all_anchors[0, 0, 0])
print("Pedestrian:", all_anchors[1, 0, 0])
print("Cyclist", all_anchors[2, 0, 0])

class_dim，xdim，ydim，zdim，size_dim,rot_dim,7: torch.Size([3, 176, 200, 1, 1, 2, 7])
Car: tensor([[[[ 2.0000e-01, -3.9800e+01, -3.5000e-02,  3.9000e+00,  1.6000e+00,
            1.5600e+00,  0.0000e+00],
          [ 2.0000e-01, -3.9800e+01, -3.5000e-02,  3.9000e+00,  1.6000e+00,
            1.5600e+00,  1.5700e+00]]]], device='cuda:0')
Pedestrian: tensor([[[[  0.2000, -39.8000,  -1.2000,   0.8000,   0.6000,   1.7300,   0.0000],
          [  0.2000, -39.8000,  -1.2000,   0.8000,   0.6000,   1.7300,   1.5700]]]],
       device='cuda:0')
Cyclist tensor([[[[  0.2000, -39.8000,  -1.2000,   1.7600,   0.6000,   1.7300,   0.0000],
          [  0.2000, -39.8000,  -1.2000,   1.7600,   0.6000,   1.7300,   1.5700]]]],
       device='cuda:0')


Target assigner
目的：1.训练时，为每个anchor指定类别标签和Boxes偏移量标签；
输入：1.Anchors\[K,7\];2.Ground Truth Boxes\[B,N,8\],其中8=xyzhwlr+class_ind

In [28]:
print("gt_box_temp:", test_data['gt_boxes'][0, 0])
assigner_cfg = model_cfg.MODEL.DENSE_HEAD.TARGET_ASSIGNER_CONFIG
assigner_cfg

gt_box_temp: tensor([26.9494, 11.7531, -0.7768,  3.6334,  1.6990,  1.6376,  0.3899,  1.0000],
       device='cuda:0')


{'NAME': 'MaxIouTargetAssigner',
 'DEVICE': 'cuda',
 'FORCE_MATCH': True,
 'POS_FRACTION': -1.0,
 'NORM_BY_NUM_EXAMPLES': False,
 'MATCH_HEIGHT': False,
 'CLASS_THRESHOLD': [{'class_name': 'Car',
   'pos_threshold': 0.55,
   'neg_threshold': 0.4},
  {'class_name': 'Pedestrian', 'pos_threshold': 0.5, 'neg_threshold': 0.35},
  {'class_name': 'Cyclist', 'pos_threshold': 0.5, 'neg_threshold': 0.35}],
 'IOU_CALCULATOR': {'NAME': 'Iou3DCalculator'},
 'BOX_ENCODER': {'NAME': 'ResidualCoder',
  'code_size': 7,
  'encode_angle_by_sincos': False},
 'SAMPLER': {'NAME': 'MaxSizeSubSampler', 'sample_size': 128}}

In [29]:
for key, value in model_info_dict.items():
    if key != 'module_list':
        print(key, value)

training True
raw_point_feature_dims 4
cur_point_feature_dims 512
point_cloud_range [  0.  -40.   -3.   70.4  40.    1. ]
voxel_size [0.05, 0.05, 0.1]
grid_size [1408 1600   40]
class_names ['Car']
feature_map_size [  1 200 176]
backbone_channels {'x_conv1': 16, 'x_conv2': 32, 'x_conv3': 64, 'x_conv4': 64}
raw_anchor_shape [176, 200, 1, 1, 2]


In [30]:
from basic.module.dense_head.target_assigner import MaxIouTargetAssigner
target_assigner = MaxIouTargetAssigner(assigner_cfg, model_info_dict)

In [31]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
print("labels:", test_data['gt_boxes'][:, :, -1])

labels: tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]], device='cuda:0')


In [32]:
# all_anchors = mul_generator.gen_anchors(flatten_output=True).cuda()
all_anchors = anchor_generator.gen_anchors(flatten_output=True).to(device)
target_assigner.force_match = True
# target_dict, batch_bbox_id_dict = target_assigner.assign(gts=test_data['gt_boxes'][..., :-1], bboxes=all_anchors, gt_labels=test_data['gt_boxes'][:, :, -1])
assign_ret = target_assigner.assign(gts=test_data['gt_boxes'][..., :-1], bboxes=all_anchors, gt_labels=test_data['gt_boxes'][..., -1])

In [54]:
pos_tuples = assign_ret.pos_tuples
neg_tuples = assign_ret.neg_tuples
pos_tuples

tensor([[    0,     4, 11874],
        [    0,     8, 16615],
        [    0,     7, 17133],
        [    0,     3, 20686],
        [    0,     5, 21850],
        [    0,    10, 23599],
        [    0,     0, 27058],
        [    0,     1, 32796],
        [    0,     6, 33041],
        [    0,     2, 35499],
        [    0,     9, 39104],
        [    1,     0, 14666],
        [    1,     4, 17090],
        [    1,     7, 19050],
        [    1,     9, 19838],
        [    1,     1, 20282],
        [    1,     5, 21185],
        [    1,     3, 26361],
        [    1,    10, 27517],
        [    1,     8, 29092],
        [    1,     2, 35454],
        [    1,     6, 37084],
        [    2,    10,  6691],
        [    2,     6, 13101],
        [    2,     5, 15555],
        [    2,     5, 15557],
        [    2,     0, 17064],
        [    2,     4, 21052],
        [    2,     3, 26609],
        [    2,     2, 27434],
        [    2,     1, 29490],
        [    2,     7, 32288],
        

In [59]:
t1, t2 = torch.where(assign_ret.pos_tuples_dense >= 0)
print(t1)
print(t1 == pos_tuples[:, 0])
print(t2)
print(t2 == pos_tuples[:, 2])

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
       device='cuda:0')
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True],
       device='cuda:0')
tensor([11874, 16615, 17133, 20686, 21850, 23599, 27058, 32796, 33041, 35499,
        39104, 14666, 17090, 19050, 19838, 20282, 21185, 26361, 27517, 29092,
        35454, 37084,  6691, 13101, 15555, 15557, 17064, 21052, 26609, 27434,
        29490, 32288, 43518, 52260,     0,  5796, 13008, 20593, 23044, 25868,
        27836, 30656, 33046, 34362, 37044, 37904, 38256], device='cuda:0')
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True

In [68]:
t = assign_ret.bbox_targets.nonzero()[:, :2].unique(dim=0)
print(t)
print(t[:,0] == pos_tuples[:, 0])
print(t[:,1] == pos_tuples[:, 2])

tensor([[    0, 11874],
        [    0, 16615],
        [    0, 17133],
        [    0, 20686],
        [    0, 21850],
        [    0, 23599],
        [    0, 27058],
        [    0, 32796],
        [    0, 33041],
        [    0, 35499],
        [    0, 39104],
        [    1, 14666],
        [    1, 17090],
        [    1, 19050],
        [    1, 19838],
        [    1, 20282],
        [    1, 21185],
        [    1, 26361],
        [    1, 27517],
        [    1, 29092],
        [    1, 35454],
        [    1, 37084],
        [    2,  6691],
        [    2, 13101],
        [    2, 15555],
        [    2, 15557],
        [    2, 17064],
        [    2, 21052],
        [    2, 26609],
        [    2, 27434],
        [    2, 29490],
        [    2, 32288],
        [    2, 43518],
        [    2, 52260],
        [    3,     0],
        [    3,  5796],
        [    3, 13008],
        [    3, 20593],
        [    3, 23044],
        [    3, 25868],
        [    3, 27836],
        [    3, 

In [70]:
t = torch.where(assign_ret.bbox_weights == 1)[1]
print(t)
print(t == pos_tuples[:, -1])

tensor([11874, 16615, 17133, 20686, 21850, 23599, 27058, 32796, 33041, 35499,
        39104, 14666, 17090, 19050, 19838, 20282, 21185, 26361, 27517, 29092,
        35454, 37084,  6691, 13101, 15555, 15557, 17064, 21052, 26609, 27434,
        29490, 32288, 43518, 52260,     0,  5796, 13008, 20593, 23044, 25868,
        27836, 30656, 33046, 34362, 37044, 37904, 38256], device='cuda:0')
tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True],
       device='cuda:0')


In [92]:
t = torch.where(assign_ret.cls_weights == 1)
print(t[1].sort()[0])
torch.cat([ pos_tuples[:, -1], neg_tuples[: ,-1]]).sort()[0]

tensor([    0,   389,   509,   678,   984,  1390,  1413,  1468,  1572,  1591,
         1630,  1761,  1784,  1941,  1994,  2431,  2556,  2596,  2697,  2808,
         2915,  3011,  3209,  3729,  3730,  4041,  4083,  4091,  4188,  4199,
         4395,  4465,  4472,  4541,  4894,  5003,  5116,  5191,  5326,  5382,
         5400,  5567,  5598,  5796,  6135,  6303,  6641,  6691,  6703,  7027,
         7049,  7373,  7977,  8062,  8069,  8177,  8277,  8389,  8426,  8438,
         8537,  8632,  8831,  8947,  9067,  9106,  9200,  9321,  9337,  9482,
         9636,  9735,  9787, 10596, 10702, 10818, 11035, 11054, 11055, 11117,
        11181, 11199, 11213, 11241, 11270, 11409, 11827, 11874, 11897, 12141,
        12405, 12577, 13008, 13101, 13306, 13368, 13376, 13422, 13541, 13592,
        13703, 13785, 13808, 13921, 14106, 14225, 14652, 14666, 14764, 14821,
        14848, 15065, 15083, 15537, 15555, 15557, 15795, 15886, 15988, 16064,
        16097, 16176, 16521, 16591, 16615, 17026, 17064, 17090, 

tensor([    0,   389,   509,   678,   984,  1390,  1413,  1468,  1572,  1591,
         1630,  1761,  1784,  1941,  1994,  2431,  2556,  2596,  2697,  2808,
         2915,  3011,  3209,  3729,  3730,  4041,  4083,  4091,  4188,  4199,
         4395,  4465,  4472,  4541,  4894,  5003,  5116,  5191,  5326,  5382,
         5400,  5567,  5598,  5796,  6135,  6303,  6641,  6691,  6703,  7027,
         7049,  7373,  7977,  8062,  8069,  8177,  8277,  8389,  8426,  8438,
         8537,  8632,  8831,  8947,  9067,  9106,  9200,  9321,  9337,  9482,
         9636,  9735,  9787, 10596, 10702, 10818, 11035, 11054, 11055, 11117,
        11181, 11199, 11213, 11241, 11270, 11409, 11827, 11874, 11897, 12141,
        12405, 12577, 13008, 13101, 13306, 13368, 13376, 13422, 13541, 13592,
        13703, 13785, 13808, 13921, 14106, 14225, 14652, 14666, 14764, 14821,
        14848, 15065, 15083, 15537, 15555, 15557, 15795, 15886, 15988, 16064,
        16097, 16176, 16521, 16591, 16615, 17026, 17064, 17090, 

可视化每个场景中，通过target assign匹配的anchor bbox

In [38]:
%matplotlib auto
batch_bbox = assign_ret.pos_tuples
batch_ids = batch_bbox[:, 0]
bbox_ids = batch_bbox[:, 2]
points = test_data['points']
for i in range(batch_size):
    mask = batch_ids == i
    frame_bbox_ids = bbox_ids[mask]
    if frame_bbox_ids.size(0) > 0:
        print(frame_bbox_ids)
        frame_pc = points[points[:, 0] == i][:, 1:]
        frame_bbox = all_anchors[frame_bbox_ids]
        frame_gt = test_data['gt_boxes'][i]
        w = VisualWindow(mode='3d')
        w.draw_point_cloud(frame_pc.cpu().numpy())
        w.draw_boxes3d(frame_gt[:,:7].cpu().numpy())
        w.draw_boxes3d(frame_bbox.cpu().numpy(), 'corner', c='r')
        # break

Using matplotlib backend: Qt5Agg
tensor([11874, 16615, 17133, 20686, 21850, 23599, 27058, 32796, 33041, 35499,
        39104], device='cuda:0')
tensor([14666, 17090, 19050, 19838, 20282, 21185, 26361, 27517, 29092, 35454,
        37084], device='cuda:0')
tensor([ 6691, 13101, 15555, 15557, 17064, 21052, 26609, 27434, 29490, 32288,
        43518, 52260], device='cuda:0')
tensor([    0,  5796, 13008, 20593, 23044, 25868, 27836, 30656, 33046, 34362,
        37044, 37904, 38256], device='cuda:0')


上面所有子模块组成基于anchor的Dense head：anchor head

In [39]:
from basic.module.dense_head.anchor_head.anchor_head_base import AnchorHeadBase
dense_head_cfg = model_cfg.MODEL.DENSE_HEAD
anchor_head = AnchorHeadBase(dense_head_cfg, model_info_dict).to(device)
output_dict = anchor_head(output)
output_dict

{'cls_pred': tensor([[[ 0.0535,  0.0362],
          [-0.0991,  0.0921],
          [-0.1281,  0.0632],
          ...,
          [ 0.0034,  0.1971],
          [ 0.0562,  0.0372],
          [-0.0977,  0.0159]],
 
         [[ 0.0535,  0.0362],
          [-0.0991,  0.0921],
          [-0.1281,  0.0632],
          ...,
          [-0.0010,  0.1844],
          [ 0.0756,  0.0342],
          [-0.0740,  0.0276]],
 
         [[ 0.0535,  0.0362],
          [-0.0991,  0.0921],
          [-0.1281,  0.0632],
          ...,
          [-0.0376,  0.0809],
          [ 0.0240,  0.0535],
          [-0.0754, -0.0015]],
 
         [[ 0.0535,  0.0362],
          [-0.0991,  0.0921],
          [-0.1281,  0.0632],
          ...,
          [-0.0186,  0.1782],
          [ 0.0634,  0.0500],
          [-0.0739,  0.0267]]], device='cuda:0', grad_fn=<UnsafeViewBackward>),
 'reg_pred': tensor([[[ 1.0238e-01,  3.4355e-02, -4.8905e-02,  ..., -1.3527e-01,
            4.9688e-02,  1.3377e-01],
          [-8.4775e-02,  2.827

最后直接用模型配置文档生成SECOND模型

In [40]:
from basic.model.second import SECOND

data_infos = dataloader.dataset.get_data_infos()
model = SECOND(model_cfg, data_infos).cuda()

In [41]:
loss = model(test_data)
loss['tol_loss']

tensor(2.3224, device='cuda:0', grad_fn=<AddBackward0>)

IOU

In [42]:
s = torch.randn(10, 1000, 4)
max_s,arg_s = s.max(dim=-1)
_,topk = max_s.topk(5, dim=1)

In [43]:
topk

tensor([[634, 136, 114, 423, 921],
        [822, 211, 518, 544, 639],
        [940, 481,   2, 476, 608],
        [605,  37,  92, 988, 412],
        [437, 205, 757, 176, 792],
        [605, 749, 573, 216,  56],
        [498, 470,   6, 981, 912],
        [780, 987, 165, 895, 208],
        [ 17, 416, 333, 111, 455],
        [171, 298,   4, 931, 157]])

In [44]:
torch.where(arg_s > 0)

(tensor([0, 0, 0,  ..., 9, 9, 9]),
 tensor([  0,   2,   4,  ..., 997, 998, 999]))

In [45]:
data = torch.arange(12).view(3,4)
torch.tensor([0,1,2,3]) in data

True

In [46]:
data == torch.tensor([0,1,2,3])

tensor([[ True,  True,  True,  True],
        [False, False, False, False],
        [False, False, False, False]])

In [47]:
for i in data:
    print(i)

tensor([0, 1, 2, 3])
tensor([4, 5, 6, 7])
tensor([ 8,  9, 10, 11])
