In [1]:
import math
import numpy as np
import torch
from kitti.kitti_dataset import get_dataloader
import yaml
from easydict import EasyDict
from pathlib import Path
from basic.utils.vis_utils import VisualWindow
# %matplotlib inline
torch.cuda.empty_cache()
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# DataLoader

In [2]:
dataset_cfg_path = Path("../kitti/cfg/kitti_dataset.yaml")
batch_size = 4
dataloader = get_dataloader(data_cfg_path=dataset_cfg_path, class_name_list=['Car'], batch_size=batch_size)  # 'Pedestrian','Cyclist'

  cfg = EasyDict(yaml.load(f))


In [None]:
from basic.utils.common_utils import put_data_to_gpu
# 取一批数据用于模块测试
for data in dataloader:
    test_data = data
    break
# 单独把某些数据放在GPU中，注意frame_id这种还是为np.ndarray
test_data = put_data_to_gpu(test_data)
# print(f"input voxels shape:", test_data['voxels'].shape)
test_data

# Model cfg
最终的目的是想用字符文档生成模型。但是下面只是单一测试每一个模块
- model cfg 中包含各个模块的配置：module cfg
- 在模型全局中使用model_info_dict记录一些必要的模型信息


In [None]:
from pprint import pprint
from basic.utils.config_utils import cfg_from_yaml_file
top_cfg = cfg_from_yaml_file('../basic/model/model_cfg/second.yaml')
model_cfg = top_cfg.MODEL
model_info_dict = {
    'module_list': [],
    'training': True,
}
data_infos = dataloader.dataset.get_data_infos()
model_info_dict.update(data_infos)
pprint(top_cfg)

In [None]:
pprint(model_cfg)

初始模型信息，注意经过每一个模块处理后,更新以下信息。
- 更新module_list记录的模块
- 当前特征图中每个点的特征维度
- 后面模块可能会使用到的当前模块的一些信息

In [None]:
for key, value in model_info_dict.items():
    print(f"{key}:{value}")

# Voxelize Layer

In [None]:
voxelize_cfg = model_cfg.VOXELIZE_LAYER
pprint(voxelize_cfg)

In [None]:
from basic.module.voxelize import VoxelLayer
voxelize_cfg = model_cfg.VOXELIZE_LAYER
voxelize_module = VoxelLayer(model_info_dict=model_info_dict, **voxelize_cfg).to(device)
output = voxelize_module(test_data, keep_points=True)
model_info_dict['module_list'].append(voxelize_module)
output

In [None]:
print(output['voxels'].shape)
print(output['voxel_coords'].shape)
print(output['voxel_num_points'].shape)

In [None]:
print(voxelize_module.grid_size)
print(voxelize_module.voxel_size)

# Feature Extractor/Encoding Module
点云特征提取模块目的是：从无序的原始点云数据中提取出有序的初步特征,或者说找到一种方式编码原始点云，
令其有序。其实就是把原始点云转换为有序的张量矩阵
常见PointNet的方式，就是为了提取有序的初步特征；而体素的方式，是为了用体素这种格式编码原始点云，令其有序
为什么要这样做？我的理解是，现有CNN只能处理有序的张量！！！不管是3d卷积还是2d卷积



Voxel Feature Extractor(VFE)
- 提取体素级别的特征
输入：体素，以及体素相关的信息
输出：提取的体素特征
- Mean VFE：取每个体素内所有点的平均值作为输出特征
- MLP VFE:对每个体素内的点集，做类似PointNet的操作。即用MLP + Max pooling 提取点集的特征

In [None]:
fe_cfg = model_cfg.FEATURE_EXTRACTOR
pprint(fe_cfg)

In [None]:
#mean vfe
from basic.module.feature_extractor import MeanVFE
mean_vfe_module = MeanVFE(model_info_dict, **fe_cfg).cuda()
output = mean_vfe_module(output)
model_info_dict['module_list'].append(mean_vfe_module)
print(f"Mean VFE： voxel_features shape:", output['voxel_features'].shape)
output

In [None]:
print(f"current model infos:")
for key, value in model_info_dict.items():
    print(f"{key}:{value}")

In [None]:
# mlp vfe just test
from basic.module.feature_extractor import MlpVFE
cfg = {'mlp_dims': [32, 64, 64, 128, 128],
       'input_channels': 4}
mlp_vfe_module = MlpVFE(cfg).cuda()
t = mlp_vfe_module(test_data)
print(f"Mlp VFE： voxel_features shape:", t.shape)

Point Feature Extractor(PFE)
- 直接提取原始点云的特征
- 代表方法PointNet++的SetAbstract layer

In [None]:
#todo

# Backbone3D
- 经过原始点云的特征提取/编码后，一般会得到B,C,VH,VW,VD的体素张量特征矩阵。或B,C,H,W的点云特征张量矩阵。
根据特征张量维度选择用3D卷积还是2D卷积网络来进一步提取特征。
- 因为体素张量特征矩阵非常稀疏，多使用稀疏卷积。使用spconv库来进行稀疏3D卷积

In [None]:
back3d_cfg = model_cfg.BACKBONE3D
pprint(back3d_cfg)

In [None]:
from basic.module.backbone3d import VoxelBackBone8x
backbone3d_module = VoxelBackBone8x(model_info_dict, **back3d_cfg).cuda()
output = backbone3d_module(output)
model_info_dict['module_list'].append(backbone3d_module)
print(f"spconv_tensor_shape:", output['sp_feat3d'].dense().shape)
output

In [None]:
print(f"current model infos:")
for key, value in model_info_dict.items():
    print(f"{key}:{value}")

# NECK
BackBone3D提取的特征向量依然处于3维空间内。目前一般不会在3维空间内提取ROIs。因为3DNMS，3DIOU等都很麻烦....。
因此直接在前视图FOV或在鸟瞰图BEV上提取ROIs。为此需要将3d特征转换为2d特征。
- 常用的Neck：
直接压缩：比如将B,C,D,H,W的特征压缩为B，C*H，D,W,此时的特征图可以认为是BEV视角下的二维特征图

In [None]:
neck_cfg = model_cfg.NECK
pprint(neck_cfg)

In [None]:
from basic.module.neck import DimCompression
neck_module = DimCompression(model_info_dict=model_info_dict, **neck_cfg)
output = neck_module(output)
model_info_dict['module_list'].append(neck_module)
print("直接压缩", output['dense_feat2d'].shape)
output

In [None]:
print(f"current model infos:")
for key, value in model_info_dict.items():
    print(f"{key}:{value}")

# Backbone2D
与BackBone3D一样，根据输入的张量维度。选择用2D卷积网络提取特征。通常如果使用NECK 模块将3维空间内的特征压缩为2维空间的特征后
也会再次使用2D的卷积网络再次提取特征。

In [None]:
backbone2d_cfg = model_cfg.BACKBONE2D
pprint(backbone2d_cfg)

In [None]:
from basic.module.backbone2d import SECONDFPN
backbone2d = SECONDFPN(model_info_dict, **backbone2d_cfg).cuda()
output = backbone2d(output)
model_info_dict['module_list'].append(backbone2d)
output

In [None]:
output['dense_feat2d'].shape

In [None]:
print(f"current model infos:")
for key, value in model_info_dict.items():
    print(f"{key}:{value}")

In [None]:
# from basic.module.backbone2d.base_bev_backbone import BaseBEVBackbone
# backbone2d_cfg = model_cfg.MODEL.BACKBONE2D
# backbone2d = BaseBEVBackbone(backbone2d_cfg, model_info_dict).cuda()
# output = backbone2d(output)
# output['spatial_features_2d'].shape

截止目前为止：输入点云的shape变化为
- 原始点云->体素：183149, 5, 4
- VFE：183149, 4
- BackBone3D：B， 128， 2， 200， 176
- neck：B，256，200，176
- BackBone2D：B，512，200，176，shape未变因为卷积过后，又转置卷积回了原始大小
经过上面的各个模块，从原始点云中获取了能代表该点云的二维特征图。接下来是3D目标识别中最重要的部分：Dense Head 与 ROI head。

anchor generator（一）
- 枚举7种anchor可能用到的特征，即x，y，z，h，w，l，r。然后通过mesh gird产生所有anchors。其中根据anchor中心坐标xyz的取法不同又分为Range和Stride两种方案
    - Range：在点云范围内，给定每个轴的取值范围。每个轴按照特征图中对应的维度平均划分这些轴。比如特征图对应X轴的维度大小为176，就在X轴范围内平均划分176个。
    - Stride：给定xyz坐标下的原点坐标，分别以x stride，y stride，z stride沿着各个轴的正方向按步长获得anchor中心坐标xyz。
    - 代码接口虽然可以自定义Range和Stride。但是为了将特征图上的每个特征点与原图上的每个anchor关联起来，一定要平均划分！！即Range取值为点云的范围，而Stride取值为
  点云采样范围 / 特征图大小。即\[z_stride, x_stride, y_stride\]=\[z_len, x_len, y_len\] / \[H, W, L\]。这样看按Range还是Stride的方案取得的结果应该差距不大。。。
    - 实际上就是把特征图上的每个特征点，映射回了原始数据上对应区域的中心？假如原始点云下采样了8倍得到特征图，则特征图中\[0,0,0\]点对应原点云（点云原点坐标为000）中以\[8,8,8\]为中心，边长为8的正方形区域？

In [None]:
# anchor generator config
anchor_gen_cfg = model_cfg.DENSE_HEAD.ANCHOR_GENERATOR_CONFIG
pprint(anchor_gen_cfg)

In [None]:
from basic.module.dense_head.anchor_generator.anchor_gen_base import AnchorGenerator
anchor_generator = AnchorGenerator(anchor_gen_cfg, model_info_dict, class_type='Car', dtype=torch.float32)
anchors = anchor_generator.gen_anchors(flatten_output=False, feature_map_size=np.array([1, 200, 178]))
print("Range anchors shape:", anchors.shape)
print("Range stride:", anchors[1, 1] - anchors[0, 0])
print("begin:", anchors[0, 0])
print("end:", anchors[-1, -1])

In [None]:
anchor_generator.set_mode('Stride')
anchors = anchor_generator.gen_anchors(flatten_output=False, feature_map_size=np.array([1, 200, 178]))
print("stride:", anchor_generator.stride)
print("Stride anchors shape:", anchors.shape)
print("begin:", anchors[0, 0])
print("end:", anchors[-1, -1])
final_anchors = anchors.view(-1, 7)
print("output anchor shape:", final_anchors.shape)

1.注意输出anchors的shape为176, 200, 1, 1, 2, 7。最后一个维度代表anchor的特征向量xyzlwhr，
其他维度分别与x y z size rot的可枚举数量一致.当然最后输出的shape为(176x200x1x1x2, 7)
2.在对齐体素中心的情况下，Range和Stride两种方案的结果都是一样的。假如点云的x轴范围为\[0, 70.4\]，
而x轴对应的维度在特征图上大小为176.则均分后相邻点的距离为70.4 / 176 = 0.4。Range和Stride
枚举X坐标的核心代码如下

In [None]:
ranges = torch.linspace(0, 70.4, 176)
range_align_center = torch.linspace(0 + 0.2, 70.4 - 0.2, 176)
stride = torch.arange(0, 176) * 0.4
stride_align_center = stride + 0.4 / 2

In [None]:
anchor_generator.set_mode('Range')

随机绘制100个anchor box看看
- 明显anchor 产生的全部BBox能覆盖整个点云cube范围

In [None]:
%matplotlib auto
anchors = anchor_generator.gen_anchors(flatten_output=True, feature_map_size=np.array([1, 200, 178]))
w = VisualWindow(mode='3d')
points = test_data['points']
test_pc = points[points[:, 0] == 0][:, 1:]
w.draw_point_cloud(pc=test_pc.cpu().numpy())
sample_ids = torch.randperm(anchors.size(0))[:100]
w.draw_boxes3d(boxes=anchors[sample_ids].cpu().numpy(), format='corner')

anchor generator（二）
- 在xyz坐标原点生成基本的anchors，然后通过stride。移动这些anchors

MultiClass Generator
在同一feature map上为每种类别生成对应的anchor。输出\[class_dim，xdim，ydim，zdim，size_dim,rot_dim,7\]

In [None]:
# from basic.module.dense_head.anchor_generator.anchor_gen_base import MultiClsAnchorGenerator
# mul_generator = MultiClsAnchorGenerator(anchor_gen_cfg, model_info_dict,
#                                         feature_map_size=np.array([1, 200, 178],
#                                         cls_list=['Car', 'Pedestrian', 'Cyclist'])
# all_anchors = mul_generator.gen_anchors(flatten_output=False)
# print("class_dim，xdim，ydim，zdim，size_dim,rot_dim,7:", all_anchors.shape)
# print("Car:", all_anchors[0, 0, 0])
# print("Pedestrian:", all_anchors[1, 0, 0])
# print("Cyclist", all_anchors[2, 0, 0])

Target assigner
目的：1.训练时，为每个anchor指定类别标签和Boxes偏移量标签；
输入：1.Anchors\[K,7\];2.Ground Truth Boxes\[B,N,8\],其中8=xyzhwlr+class_ind

In [None]:
print("gt_box_temp:", test_data['gt_boxes'][0, 0])
assigner_cfg = model_cfg.DENSE_HEAD.TARGET_ASSIGNER_CONFIG
pprint(assigner_cfg)

In [None]:
for key, value in model_info_dict.items():
    if key != 'module_list':
        print(key, value)

In [None]:
from basic.module.dense_head.target_assigner import MaxIouTargetAssigner
target_assigner = MaxIouTargetAssigner(assigner_cfg, model_info_dict)

In [None]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
print("labels:", test_data['gt_boxes'][:, :, -1])

In [None]:
# all_anchors = mul_generator.gen_anchors(flatten_output=True).cuda()
all_anchors = anchor_generator.gen_anchors(flatten_output=True, feature_map_size=np.array([1, 200, 178])).to(device)
target_assigner.force_match = True
# target_dict, batch_bbox_id_dict = target_assigner.assign(gts=test_data['gt_boxes'][..., :-1], bboxes=all_anchors, gt_labels=test_data['gt_boxes'][:, :, -1])
assign_ret = target_assigner.assign(gts=test_data['gt_boxes'][..., :-1], bboxes=all_anchors, gt_labels=test_data['gt_boxes'][..., -1])

In [None]:
pos_tuples = assign_ret.pos_tuples
neg_tuples = assign_ret.neg_tuples
pos_tuples

In [None]:
t1, t2 = torch.where(assign_ret.pos_tuples_dense >= 0)
print(t1)
print(t1 == pos_tuples[:, 0])
print(t2)
print(t2 == pos_tuples[:, 2])

In [None]:
t = assign_ret.bbox_targets.nonzero()[:, :2].unique(dim=0)
print(t)
print(t[:,0] == pos_tuples[:, 0])
print(t[:,1] == pos_tuples[:, 2])

In [None]:
t = torch.where(assign_ret.bbox_weights == 1)[1]
print(t)
print(t == pos_tuples[:, -1])

In [None]:
t = torch.where(assign_ret.cls_weights == 1)
print(t[1].sort()[0])
torch.cat([ pos_tuples[:, -1], neg_tuples[: ,-1]]).sort()[0]

可视化每个场景中，通过target assign匹配的anchor bbox

In [None]:
%matplotlib auto
batch_bbox = assign_ret.pos_tuples
batch_ids = batch_bbox[:, 0]
bbox_ids = batch_bbox[:, 2]
points = test_data['points']
for i in range(batch_size):
    mask = batch_ids == i
    frame_bbox_ids = bbox_ids[mask]
    if frame_bbox_ids.size(0) > 0:
        print(frame_bbox_ids)
        frame_pc = points[points[:, 0] == i][:, 1:]
        frame_bbox = all_anchors[frame_bbox_ids]
        frame_gt = test_data['gt_boxes'][i]
        w = VisualWindow(mode='3d')
        w.draw_point_cloud(frame_pc.cpu().numpy())
        w.draw_boxes3d(frame_gt[:,:7].cpu().numpy())
        w.draw_boxes3d(frame_bbox.cpu().numpy(), 'corner', c='r')
        # break

上面所有子模块组成基于anchor的Dense head：anchor head

In [None]:
dense_head_cfg = model_cfg.DENSE_HEAD
pprint(dense_head_cfg)

In [None]:
from basic.module.dense_head.anchor_head.anchor3d_head import Anchor3DHead
anchor_head = Anchor3DHead(top_cfg, model_info_dict).to(device)
output_dict = anchor_head(output)
output_dict

最后直接用模型配置文档生成SECOND模型

In [None]:
from basic.model.second import SECOND
data_infos = dataloader.dataset.get_data_infos()
model = SECOND(top_cfg, data_infos).cuda()

In [None]:
loss = model(test_data)
loss['tol_loss']

IOU

In [None]:
s = torch.randn(10, 1000, 4)
max_s,arg_s = s.max(dim=-1)
_,topk = max_s.topk(5, dim=1)

In [None]:
topk

In [None]:
torch.where(arg_s > 0)

In [None]:
data = torch.arange(12).view(3,4)
torch.tensor([0,1,2,3]) in data

In [None]:
data == torch.tensor([0,1,2,3])

In [None]:
for i in data:
    print(i)

In [None]:
data = torch.randint(0, 1000, (178, 200))

In [None]:
data.view(-1,1)[1234]

In [None]:
data.permute(1,0).reshape(-1,1)[1234]

In [None]:
torch.permute