In [1]:
#keep imports here
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from pcdet.ops.voxel import Voxelization

##Load data and voxelize
# kitti config file
DATASET: 'KittiDataset'
DATA_PATH: '../data/kitti'

POINT_CLOUD_RANGE: [0, -40, -3, 70.4, 40, 1]

DATA_SPLIT: {
    'train': train,
    'test': val
}

INFO_PATH: {
    'train': [kitti_infos_train.pkl],
    'test': [kitti_infos_val.pkl],
}

GET_ITEM_LIST: ["points"]
FOV_POINTS_ONLY: True

DATA_AUGMENTOR:
    DISABLE_AUG_LIST: ['placeholder']
    AUG_CONFIG_LIST:
        - NAME: gt_sampling
          USE_ROAD_PLANE: True
          DB_INFO_PATH:
              - kitti_dbinfos_train.pkl
          PREPARE: {
             filter_by_min_points: ['Car:5', 'Pedestrian:5', 'Cyclist:5'],
             filter_by_difficulty: [-1],
          }

          SAMPLE_GROUPS: ['Car:20','Pedestrian:15', 'Cyclist:15']
          NUM_POINT_FEATURES: 4
          DATABASE_WITH_FAKELIDAR: False
          REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0]
          LIMIT_WHOLE_SCENE: True

        - NAME: random_world_flip
          ALONG_AXIS_LIST: ['x']

        - NAME: random_world_rotation
          WORLD_ROT_ANGLE: [-0.78539816, 0.78539816]

        - NAME: random_world_scaling
          WORLD_SCALE_RANGE: [0.95, 1.05]


POINT_FEATURE_ENCODING: {
    encoding_type: absolute_coordinates_encoding,
    used_feature_list: ['x', 'y', 'z', 'intensity'],
    src_feature_list: ['x', 'y', 'z', 'intensity'],
}


DATA_PROCESSOR:
    - NAME: mask_points_and_boxes_outside_range
      REMOVE_OUTSIDE_BOXES: True

    - NAME: shuffle_points
      SHUFFLE_ENABLED: {
        'train': True,
        'test': False
      }

    - NAME: transform_points_to_voxels
      VOXEL_SIZE: [0.05, 0.05, 0.1]
      MAX_POINTS_PER_VOXEL: 5
      MAX_NUMBER_OF_VOXELS: {
        'train': 16000,
        'test': 40000
      }



In [1]:
class KittiDataset(Dataset):
    def __init__(self, cfg):
        super(KittiDataset,self).__init__()
    
    def __len__(self):
        return self.length
    
    def __getitem(self,x):
        return x

NameError: name 'Dataset' is not defined

In [9]:
from pcdet.models.detectors.pointpillar import PointPillar

In [5]:
import numpy as np
from pcdet.config import cfg, cfg_from_yaml_file
from eval_utils.eval_utils import load_data_to_gpu
import pickle
cfg = cfg_from_yaml_file("cfgs/kitti_models/spg.yaml", cfg)
model_cfg = cfg.MODEL

train_sample = pickle.load(open("train_sample.p", "rb"))
print (train_sample.keys())
train_sample["gt_classification"] = np.zeros(train_sample["batch_size"], train_sample["voxel_num_points"][0])
print (train_sample["batch_size"])
load_data_to_gpu(train_sample)

dict_keys(['frame_id', 'gt_boxes', 'points', 'use_lead_xyz', 'voxels', 'voxel_coords', 'voxel_num_points', 'image_shape', 'batch_size'])
1


In [8]:
for p in cfg.DATA_CONFIG.DATA_PROCESSOR:
    if p.NAME == "transform_points_to_smaller_voxels":
        small_voxel_size = p.VOXEL_SIZE

[0.16, 0.16, 0.1]


In [24]:
#SPG MODEL
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
model_info_dict = {'module_list': [], 
                   'num_rawpoint_features': 4, 
                   'num_point_features': 4, 
                   'grid_size': np.array([432, 496, 1]), 
                   'point_cloud_range': np.array([  0.  , -39.68,  -3.  ,  69.12,  39.68,   1.  ]), 
                   'voxel_size': [0.16, 0.16, 4], 
                   'z_voxel_size': 0.1,
                   'depth_downsample_factor': None}
model_info_dict["out_classes"] = int(abs(cfg.DATA_CONFIG.POINT_CLOUD_RANGE[2] - cfg.DATA_CONFIG.POINT_CLOUD_RANGE[5])/model_info_dict["z_voxel_size"])
model_info_dict

{'module_list': [],
 'num_rawpoint_features': 4,
 'num_point_features': 4,
 'grid_size': array([432, 496,   1]),
 'point_cloud_range': array([  0.  , -39.68,  -3.  ,  69.12,  39.68,   1.  ]),
 'voxel_size': [0.16, 0.16, 4],
 'z_voxel_size': 0.1,
 'depth_downsample_factor': None,
 'out_classes': 40}

In [25]:
class VFETemplate(nn.Module):
    def __init__(self, model_cfg, **kwargs):
        super().__init__()
        self.model_cfg = model_cfg

    def get_output_feature_dim(self):
        raise NotImplementedError

    def forward(self, **kwargs):
        """
        Args:
            **kwargs:

        Returns:
            batch_dict:
                ...
                vfe_features: (num_voxels, C)
        """
        raise NotImplementedError

class PFNLayer(nn.Module):
    def __init__(self,
                 in_channels,
                 out_channels,
                 use_norm=True,
                 last_layer=False):
        super().__init__()
        
        self.last_vfe = last_layer
        self.use_norm = use_norm
        if not self.last_vfe:
            out_channels = out_channels // 2

        if self.use_norm:
            self.linear = nn.Linear(in_channels, out_channels, bias=False)
            self.norm = nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01)
        else:
            self.linear = nn.Linear(in_channels, out_channels, bias=True)

        self.part = 50000

    def forward(self, inputs):
        if inputs.shape[0] > self.part:
            # nn.Linear performs randomly when batch size is too large
            num_parts = inputs.shape[0] // self.part
            part_linear_out = [self.linear(inputs[num_part*self.part:(num_part+1)*self.part])
                               for num_part in range(num_parts+1)]
            x = torch.cat(part_linear_out, dim=0)
        else:
            x = self.linear(inputs)
        torch.backends.cudnn.enabled = False
        x = self.norm(x.permute(0, 2, 1)).permute(0, 2, 1) if self.use_norm else x
        torch.backends.cudnn.enabled = True
        x = F.relu(x)
        x_max = torch.max(x, dim=1, keepdim=True)[0]

        if self.last_vfe:
            return x_max
        else:
            x_repeat = x_max.repeat(1, inputs.shape[1], 1)
            x_concatenated = torch.cat([x, x_repeat], dim=2)
            return x_concatenated


class PillarVFE(VFETemplate):
    def __init__(self, model_cfg, num_point_features, voxel_size, point_cloud_range, **kwargs):
        super().__init__(model_cfg=model_cfg)

        self.use_norm = self.model_cfg.USE_NORM
        self.with_distance = self.model_cfg.WITH_DISTANCE
        self.use_absolute_xyz = self.model_cfg.USE_ABSLOTE_XYZ
        num_point_features += 6 if self.use_absolute_xyz else 3
        if self.with_distance:
            num_point_features += 1

        self.num_filters = self.model_cfg.NUM_FILTERS
        assert len(self.num_filters) > 0
        num_filters = [num_point_features] + list(self.num_filters)

        pfn_layers = []
        for i in range(len(num_filters) - 1):
            in_filters = num_filters[i]
            out_filters = num_filters[i + 1]
            pfn_layers.append(
                PFNLayer(in_filters, out_filters, self.use_norm, last_layer=(i >= len(num_filters) - 2))
            )
        self.pfn_layers = nn.ModuleList(pfn_layers)

        self.voxel_x = voxel_size[0]
        self.voxel_y = voxel_size[1]
        self.voxel_z = voxel_size[2]
        self.x_offset = self.voxel_x / 2 + point_cloud_range[0]
        self.y_offset = self.voxel_y / 2 + point_cloud_range[1]
        self.z_offset = self.voxel_z / 2 + point_cloud_range[2]

    def get_output_feature_dim(self):
        return self.num_filters[-1]

    def get_paddings_indicator(self, actual_num, max_num, axis=0):
        actual_num = torch.unsqueeze(actual_num, axis + 1)
        max_num_shape = [1] * len(actual_num.shape)
        max_num_shape[axis + 1] = -1
        max_num = torch.arange(max_num, dtype=torch.int, device=actual_num.device).view(max_num_shape)
        paddings_indicator = actual_num.int() > max_num
        return paddings_indicator

    def forward(self, batch_dict, **kwargs):
        voxel_features, voxel_num_points, coords = batch_dict['voxels'], batch_dict['voxel_num_points'], batch_dict['voxel_coords']
        points_mean = voxel_features[:, :, :3].sum(dim=1, keepdim=True) / voxel_num_points.type_as(voxel_features).view(-1, 1, 1)
        f_cluster = voxel_features[:, :, :3] - points_mean

        f_center = torch.zeros_like(voxel_features[:, :, :3])
        f_center[:, :, 0] = voxel_features[:, :, 0] - (coords[:, 3].to(voxel_features.dtype).unsqueeze(1) * self.voxel_x + self.x_offset)
        f_center[:, :, 1] = voxel_features[:, :, 1] - (coords[:, 2].to(voxel_features.dtype).unsqueeze(1) * self.voxel_y + self.y_offset)
        f_center[:, :, 2] = voxel_features[:, :, 2] - (coords[:, 1].to(voxel_features.dtype).unsqueeze(1) * self.voxel_z + self.z_offset)

        if self.use_absolute_xyz:
            features = [voxel_features, f_cluster, f_center]
        else:
            features = [voxel_features[..., 3:], f_cluster, f_center]

        if self.with_distance:
            points_dist = torch.norm(voxel_features[:, :, :3], 2, 2, keepdim=True)
            features.append(points_dist)
        features = torch.cat(features, dim=-1)

        voxel_count = features.shape[1]
        mask = self.get_paddings_indicator(voxel_num_points, voxel_count, axis=0)
        mask = torch.unsqueeze(mask, -1).type_as(voxel_features)
        features *= mask
        for pfn in self.pfn_layers:
            features = pfn(features)
        features = features.squeeze()
        batch_dict['pillar_features'] = features
        return batch_dict
    
class PointPillarScatter(nn.Module):
    def __init__(self, model_cfg, grid_size, **kwargs):
        super().__init__()

        self.model_cfg = model_cfg
        self.num_bev_features = self.model_cfg.NUM_BEV_FEATURES
        self.nx, self.ny, self.nz = grid_size
        assert self.nz == 1

    def forward(self, batch_dict, **kwargs):
        pillar_features, coords = batch_dict['pillar_features'], batch_dict['voxel_coords']
        batch_spatial_features = []
        batch_size = coords[:, 0].max().int().item() + 1
        for batch_idx in range(batch_size):
            spatial_feature = torch.zeros(
                self.num_bev_features,
                self.nz * self.nx * self.ny,
                dtype=pillar_features.dtype,
                device=pillar_features.device)

            batch_mask = coords[:, 0] == batch_idx
            this_coords = coords[batch_mask, :]
            indices = this_coords[:, 1] + this_coords[:, 2] * self.nx + this_coords[:, 3]
            indices = indices.type(torch.long)
            pillars = pillar_features[batch_mask, :]
            pillars = pillars.t()
            spatial_feature[:, indices] = pillars
            batch_spatial_features.append(spatial_feature)

        batch_spatial_features = torch.stack(batch_spatial_features, 0)
        batch_spatial_features = batch_spatial_features.view(batch_size, self.num_bev_features * self.nz, self.ny, self.nx)
        batch_dict['spatial_features'] = batch_spatial_features
        return batch_dict

class BaseBEVBackbone(nn.Module):
    def __init__(self, model_cfg, input_channels):
        super().__init__()
        self.model_cfg = model_cfg

        if self.model_cfg.get('LAYER_NUMS', None) is not None:
            assert len(self.model_cfg.LAYER_NUMS) == len(self.model_cfg.LAYER_STRIDES) == len(self.model_cfg.NUM_FILTERS)
            layer_nums = self.model_cfg.LAYER_NUMS
            layer_strides = self.model_cfg.LAYER_STRIDES
            num_filters = self.model_cfg.NUM_FILTERS
        else:
            layer_nums = layer_strides = num_filters = []

        if self.model_cfg.get('UPSAMPLE_STRIDES', None) is not None:
            assert len(self.model_cfg.UPSAMPLE_STRIDES) == len(self.model_cfg.NUM_UPSAMPLE_FILTERS)
            num_upsample_filters = self.model_cfg.NUM_UPSAMPLE_FILTERS
            upsample_strides = self.model_cfg.UPSAMPLE_STRIDES
        else:
            upsample_strides = num_upsample_filters = []

        num_levels = len(layer_nums)
        c_in_list = [input_channels, *num_filters[:-1]]
        self.blocks = nn.ModuleList()
        self.deblocks = nn.ModuleList()
        for idx in range(num_levels):
            cur_layers = [
                nn.ZeroPad2d(1),
                nn.Conv2d(
                    c_in_list[idx], num_filters[idx], kernel_size=3,
                    stride=layer_strides[idx], padding=0, bias=False
                ),
                nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01),
                nn.ReLU()
            ]
            for k in range(layer_nums[idx]):
                cur_layers.extend([
                    nn.Conv2d(num_filters[idx], num_filters[idx], kernel_size=3, padding=1, bias=False),
                    nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01),
                    nn.ReLU()
                ])
            self.blocks.append(nn.Sequential(*cur_layers))
            if len(upsample_strides) > 0:
                stride = upsample_strides[idx]
                if stride >= 1:
                    self.deblocks.append(nn.Sequential(
                        nn.ConvTranspose2d(
                            num_filters[idx], num_upsample_filters[idx],
                            upsample_strides[idx],
                            stride=upsample_strides[idx], bias=False
                        ),
                        nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, momentum=0.01),
                        nn.ReLU()
                    ))
                else:
                    stride = np.round(1 / stride).astype(np.int)
                    self.deblocks.append(nn.Sequential(
                        nn.Conv2d(
                            num_filters[idx], num_upsample_filters[idx],
                            stride,
                            stride=stride, bias=False
                        ),
                        nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, momentum=0.01),
                        nn.ReLU()
                    ))

        c_in = sum(num_upsample_filters)
        if len(upsample_strides) > num_levels:
            self.deblocks.append(nn.Sequential(
                nn.ConvTranspose2d(c_in, c_in, upsample_strides[-1], stride=upsample_strides[-1], bias=False),
                nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01),
                nn.ReLU(),
            ))

        self.num_bev_features = c_in

    def forward(self, data_dict):
        """
        Args:
            data_dict:
                spatial_features
        Returns:
        """
        spatial_features = data_dict['spatial_features']
        ups = []
        ret_dict = {}
        x = spatial_features
        for i in range(len(self.blocks)):
            x = self.blocks[i](x)

            stride = int(spatial_features.shape[2] / x.shape[2])
            ret_dict['spatial_features_%dx' % stride] = x
            if len(self.deblocks) > 0:
                ups.append(self.deblocks[i](x))
            else:
                ups.append(x)

        if len(ups) > 1:
            x = torch.cat(ups, dim=1)
        elif len(ups) == 1:
            x = ups[0]

        if len(self.deblocks) > len(self.blocks):
            x = self.deblocks[-1](x)

        data_dict['spatial_features_2d'] = x
        return data_dict



class ConvClassificationHead(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ConvClassificationHead, self).__init__()
        self.out_channels = out_channels
        self.conv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size = 4, padding = 1, stride = 2)
        self.act = nn.Sigmoid()  #what should it be
    def forward(self, batch_dict):
        breakpoint()

        spatial_features_2d = batch_dict["spatial_features_2d"]
        output_prob = self.conv(spatial_features_2d)
        if not self.training:
            output_prob = self.act(output_prob)
        batch_dict["output_prob"] = output_prob
        return batch_dict
        
    
class SPG_CLASSIFICATION(nn.Module):
    def __init__(self):
        super(SPG_CLASSIFICATION, self).__init__()
        self.vfe_module = PillarVFE(
            model_cfg = model_cfg.VFE,
            num_point_features=model_info_dict['num_rawpoint_features'],
            point_cloud_range=model_info_dict['point_cloud_range'],
            voxel_size=model_info_dict['voxel_size'],
            grid_size=model_info_dict['grid_size'],
            depth_downsample_factor=model_info_dict['depth_downsample_factor']
        )
        model_info_dict['num_point_features'] = self.vfe_module.get_output_feature_dim()
        model_info_dict['module_list'].append(self.vfe_module)
        
        self.map_to_bev_module = PointPillarScatter(
            model_cfg = model_cfg.MAP_TO_BEV,
            grid_size = model_info_dict['grid_size']
        )
        model_info_dict['module_list'].append(self.map_to_bev_module)
        model_info_dict['num_bev_features'] = self.map_to_bev_module.num_bev_features
        
        self.backbone_2d_module = BaseBEVBackbone(
            model_cfg=model_cfg.BACKBONE_2D,
            input_channels=model_info_dict['num_bev_features']
        )
        model_info_dict['module_list'].append(self.backbone_2d_module)
        model_info_dict['num_bev_features'] = self.backbone_2d_module.num_bev_features
        
        self.classification_head = ConvClassificationHead(model_info_dict["num_bev_features"],model_info_dict["out_classes"])
    def forward(self, batch_dict):
        batch_dict = self.vfe_module(batch_dict)
        batch_dict = self.map_to_bev_module(batch_dict)
        batch_dict = self.backbone_2d_module(batch_dict)
        batch_dict = self.classification_head(batch_dict)
        return batch_dict

        
        
model = SPG_CLASSIFICATION()
model.cuda()

SPG_CLASSIFICATION(
  (vfe_module): PillarVFE(
    (pfn_layers): ModuleList(
      (0): PFNLayer(
        (linear): Linear(in_features=10, out_features=64, bias=False)
        (norm): BatchNorm1d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      )
    )
  )
  (map_to_bev_module): PointPillarScatter()
  (backbone_2d_module): BaseBEVBackbone(
    (blocks): ModuleList(
      (0): Sequential(
        (0): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
        (1): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), bias=False)
        (2): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (3): ReLU()
        (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (5): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (6): ReLU()
        (7): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (8): BatchNorm2d(64, eps=0.001, mo

In [26]:
out_dict = model(train_sample)
out_dict.keys()
# train_sample["voce"]

> [0;32m<ipython-input-25-d1bdc77ea5a3>[0m(289)[0;36mforward[0;34m()[0m
[0;32m    287 [0;31m        [0mbreakpoint[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    288 [0;31m[0;34m[0m[0m
[0m[0;32m--> 289 [0;31m        [0mspatial_features_2d[0m [0;34m=[0m [0mbatch_dict[0m[0;34m[[0m[0;34m"spatial_features_2d"[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    290 [0;31m        [0moutput_prob[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0mconv[0m[0;34m([0m[0mspatial_features_2d[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    291 [0;31m        [0;32mif[0m [0;32mnot[0m [0mself[0m[0;34m.[0m[0mtraining[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> 
ipdb> c


dict_keys(['frame_id', 'gt_boxes', 'points', 'use_lead_xyz', 'voxels', 'voxel_coords', 'voxel_num_points', 'image_shape', 'batch_size', 'gt_classification', 'pillar_features', 'spatial_features', 'spatial_features_2d', 'output_prob'])

In [27]:
out_dict["output_prob"].shape

torch.Size([1, 40, 496, 432])

(torch.return_types.min(
 values=tensor([ 0.,  0.,  0., 11.], device='cuda:0'),
 indices=tensor([  0,   0,  79, 136], device='cuda:0')),
 torch.return_types.max(
 values=tensor([  0.,   0., 262., 431.], device='cuda:0'),
 indices=tensor([   0,    0, 1549, 1268], device='cuda:0')))

In [115]:
train_sample["voxels"].shape

torch.Size([6050, 32, 4])

In [116]:
train_sample["voxel_coords"].shape

torch.Size([6050, 4])

In [139]:
train_sample = pickle.load(open("voxelized_train_sample.p", "rb"))
load_data_to_gpu(train_sample)

In [138]:
train_sample["voxel_coords"]
from pcdet.ops.roiaware_pool3d.roiaware_pool3d_utils import points_in_boxes_gpu

In [160]:
grid = train_sample["voxel_coords"]
batch = grid[grid[:,0] == 0][:,1:]
batch = batch.unsqueeze(0)
# batch.shape


In [None]:
boxes = train_sample["gt_boxes"]
# boxes = boxes[boxes[:,0] == 0][:,1:]
# boxes.shape
boxes

In [10]:
import kornia 
kornia.losses.BinaryFocalLossWithLogits


  and should_run_async(code)


kornia.losses.focal.BinaryFocalLossWithLogits

In [12]:
point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
voxel_size = [0.16, 0.16, 0.2]


3

In [24]:
centers = []
coords = []
for i in range(len(voxel_size)):
    centers.append([(x+voxel_size[i]/2) for x in np.arange(point_cloud_range[i], point_cloud_range[i+3], voxel_size[i])])
    coords.append(np.arange(0, len(centers[-1])).tolist())
centers = np.stack(np.meshgrid(*centers), -1).reshape(-1,3)
coords = np.stack(np.meshgrid(*coords), -1).reshape(-1,3)

In [28]:
coords[:,::-1]

array([[  0,   0,   0],
       [  1,   0,   0],
       [  2,   0,   0],
       ...,
       [ 17, 495, 431],
       [ 18, 495, 431],
       [ 19, 495, 431]])

In [29]:
centers.unsqueeze(0)

AttributeError: 'numpy.ndarray' object has no attribute 'unsqueeze'

In [31]:
np.expand_dims(centers, 0).shape

(1, 4285440, 3)