In [None]:
import mmcv
from pyquaternion import Quaternion
from nuscenes.utils.data_classes import Box
from nuscenes.nuscenes import NuScenes
import numpy as np
import cv2
import matplotlib.pyplot as plt

nusc = NuScenes(version='v1.0-trainval', dataroot='/media/hcis-s20/SRL/nuscenes/trainval/', verbose=True)
# bevformer_results = mmcv.load("/media/hcis-s20/SRL/det3d/BEVFormer/test/bevformer_base/Wed_Mar__6_21_42_43_2024/pts_bbox/results_nusc.json")

In [None]:
file_name = "/tmp/tmp4ywinw5c/results/pred_instances_3d/results_nusc.json"
file_name = "/media/hcis-s20/SRL/det3d/SparseBEV/submission/pts_bbox/results_nusc.json" 
bevformer_results = mmcv.load(file_name) 

In [None]:
def get_transformation_matrix(R, t, inv=False):
    pose = np.eye(4, dtype=np.float32)
    pose[:3, :3] = R if not inv else R.T
    pose[:3, -1] = t if not inv else R.T @ -t

    return pose


def get_pose(rotation, translation, inv=False, flat=False):
    if flat:
        yaw = Quaternion(rotation).yaw_pitch_roll[0]
        R = Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).rotation_matrix
    else:
        R = Quaternion(rotation).rotation_matrix

    t = np.array(translation, dtype=np.float32)

    return get_transformation_matrix(R, t, inv=inv)

def get_bev_from_box(
        nusc,
        batch,
        pred, 
        threshold,
        class_name = [  ['car', 'truck', 'bus','trailer', 'construction','motorcycle', 'bicycle',],
                        ['pedestrian']
                     ]
    ):
    def get_class_index(name):
        for i in range(len(class_name)):
            if name in class_name[i]:
                return i
        return -1
    token = batch['token'][0]

    sample = nusc.get('sample', token)
    sample_data_token = sample['data']['LIDAR_TOP']
    sd_record = nusc.get('sample_data', sample_data_token)
    pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])

    boxes = [Box(record['translation'], record['size'], Quaternion(record['rotation']),
                         name=record['detection_name'], token='predicted') for record in
                     pred['results'][token] if record['detection_score'] > threshold]
    pose_inv = get_pose(pose_record['rotation'],pose_record['translation'], inv=True)
    # pose_inv = batch['pose_inverse'][0].cpu().numpy()
    V = batch['view'][0].cpu().numpy()
    S = np.array([
            [1, 0, 0, 0],
            [0, 1, 0, 0],
            [0, 0, 0, 1],
        ]) 
    render = np.zeros((len(class_name), 200, 200), dtype=np.uint8)
    for box in boxes:
        index = get_class_index(box.name)
        if index == -1:
            continue

        p = box.bottom_corners()
        p = np.pad(p, ((0, 1), (0, 0)), constant_values=1.0)                        
        p = V @ S @ pose_inv @ p        
        # p = p[:2, :4]
        # p = V @ S  @ p        
        p = p[:2]
        cv2.fillPoly(render[index], [p.round().astype(np.int32).T], 1, cv2.LINE_8)
    
    return render


In [1]:
import torch
from common import get_cfg, prepare_val
import numpy as np
import cv2
import matplotlib.pyplot as plt

DATASET_DIR = '/media/hcis-s20/SRL/nuscenes/trainval/'
vehicle_idx = [4, 5, 6, 7, 8, 10, 11,12]
DYNAMIC = [
    'car', 'truck', 'bus',
    'trailer', 'construction_vehicle',
    'pedestrian',
    'motorcycle', 'bicycle',
    # 'emergency',
]

In [2]:
version = 'cvt_labels_nuscenes_v3'
cfg1 = get_cfg(DATASET_DIR, version, 'BEVSD') # cvt_nuscenes_multiclass nuscenes_detr3d
device = torch.device('cpu') # cuda:5
# best resnet: 0830_232653, origin cvt: 0824_024032
CHECKPOINT_PATH = None # '../logs/cross_view_transformers_test/0415_180119/checkpoints/last.ckpt'
model, network, loader, viz, _ = prepare_val(cfg1,device,CHECKPOINT_PATH,mode='val',batch_size=1)

  from .autonotebook import tqdm as notebook_tqdm


4
Dataset length: 6019


In [8]:
max_vehicles = 0.0
max_peds = 0.0
with torch.no_grad():
    for i,batch in enumerate(loader):
        print(f"Index: {i}, max_v: {max_vehicles}, max_p: {max_peds}", end='\r')
        vehicle_idx = [[4,5,6,7,8,10,11]]
        vehicles = [batch['bev'][:, idx].max(1, keepdim=True).values for idx in vehicle_idx]
        vehicles = torch.cat(vehicles, 1)
        vehicles = vehicles[0,0].bool() & batch['visibility'][0]

        ped_idx = [[9]]
        peds = [batch['bev'][:, idx].max(1, keepdim=True).values for idx in ped_idx]
        peds = torch.cat(peds, 1)
        peds = peds[0,0].bool() & batch['visibility_ped'][0]
        # if with_visibility:
        #     if 9 in class_index[0]:
        #         visibility = batch['visibility_ped'][0]
        #     else:
        #         visibility = batch['visibility'][0]

        max_vehicles = max(max_vehicles, vehicles.sum())
        max_peds = max(max_peds, peds.sum())
        # break

Index: 28129, max_v: 3587, max_p: 219

In [None]:
network.to(device)
network.eval()
model.to(device)
model.metrics.reset()
preds = []
scores_vehicles = []
scores_peds = []
threshold = 0.8
with torch.no_grad():
    for i,batch in enumerate(loader):
        print(i,end='\r')
        for k, v in batch.items():
            if isinstance(v, torch.Tensor):
                batch[k] = v.to(device)
            elif isinstance(v, list):
                if isinstance(v[0], torch.Tensor):
                    batch[k] = [i.to(device) for i in v]
            else:
                batch[k] = v
        # break
        # 
        bev_render = np.zeros((1,1,200,200),np.float32)
        for j in range(8):
            if j+4 == 9:
                continue
            label = [batch['bev'][:, idx].max(1, keepdim=True).values for idx in [[j+4]]]
            label = torch.cat(label, 1)[0,0]
            pts = map2points(label.cpu().numpy())
            if len(pts) == 0:
                continue
            clusters = apply_dbscan(pts,1.0,3)
            if len(clusters) == 0:
                continue
            for k in range(clusters.max()+1):
                # if i<clusters.max():
                #     continue
                tmp_index = np.where(clusters==k)[0]
                t_c = tmp_calculate()
                x1, x2, y1, y2 = t_c.compute(pts[tmp_index])
                score = label[y1:y2,x1:x2].sum()/((x2-x1)*(y2-y1))
                scores_vehicles.append(score)
                if score < threshold:
                    continue
                # bev_render[0,0,y1:y2,x1:x2] = 1.0
                cv2.rectangle(bev_render[0,0], (x1, y1), (x2, y2), 1, -1)
        
        ped_render = np.zeros((1,1,200,200),np.float32)
        label = [batch['bev'][:, idx].max(1, keepdim=True).values for idx in [[9]]]
        label = torch.cat(label, 1)[0,0]
        pts = map2points(label.cpu().numpy())
        if len(pts) == 0:
            continue
        clusters = apply_dbscan(pts,1.0,3)
        if len(clusters) == 0:
            continue
        for k in range(clusters.max()+1):
            # if i<clusters.max():
            #     continue
            tmp_index = np.where(clusters==k)[0]
            t_c = tmp_calculate()
            x1, x2, y1, y2 = t_c.compute(pts[tmp_index])
            score = label[y1:y2,x1:x2].sum()/((x2-x1)*(y2-y1))
            scores_peds.append(score)
            if score < threshold:
                continue
            # ped_render[0,0,y1:y2,x1:x2] = 1.0
            cv2.rectangle(ped_render[0,0], (x1, y1), (x2, y2), 1, -1)
            
        # pred = {}
        # pred['bev'] = torch.from_numpy(bev_render).to(device)
        # pred['ped'] = torch.from_numpy(ped_render).to(device)

        # model.metrics.update(pred,batch)


In [None]:
result = model.metrics.compute()
for s in ['iou_ped','iou_vehicle']:
    print(result[s]['@0.60'])

In [None]:
import plotly.express as px
# df = px.data.tips()
scores = torch.stack(scores_vehicles).cpu().numpy()
fig = px.histogram(scores, range_x=[0.0, 1.0], title='Train set Vehicles statistic')
fig.show()

In [None]:
import plotly.graph_objects as go
scores = torch.stack(scores_vehicles).cpu().numpy()
fig = go.Figure(data=[go.Histogram(x=scores, cumulative_enabled=True, histnorm='percent')])

# Set the title and labels
fig.update_layout(
    title_text='Val set Vehicles statistic', 
    xaxis_title_text='Score', 
    yaxis_title_text='Percentage', 
    bargap=0.2, 
    bargroupgap=0.1
)

fig.show()

In [None]:
import plotly.graph_objects as go
scores = torch.stack(scores_peds).cpu().numpy()
fig = go.Figure(data=[go.Histogram(x=scores, cumulative_enabled=True, histnorm='percent')])

# Set the title and labels
fig.update_layout(
    title_text='Val set Peds statistic', 
    xaxis_title_text='Score', 
    yaxis_title_text='Percentage', 
    bargap=0.2, 
    bargroupgap=0.1
)

fig.show()

In [10]:
import torch

def _set_cache_dense_coords():
    """Get, and / or, set dense coordinates used during training and validation."""
    spatial_range = 200, 200, 8
    spatial_bounds = [-50.0, -50.0, 0.0, 50.0, 50.0, 4.0]
    # Alias
    X, Y, Z = spatial_range
    XMIN, YMIN, ZMIN, XMAX, YMAX, ZMAX = spatial_bounds

    # Coordinates
    # (3, rX, rY, Z), r for reverse order.
    dense_vox_coords = torch.stack(
        torch.meshgrid(
            torch.linspace(XMIN, XMAX, X, dtype=torch.float64),
            torch.linspace(YMIN, YMAX, Y, dtype=torch.float64),
            torch.linspace(ZMIN, ZMAX, Z, dtype=torch.float64),
            indexing="ij",
        )
    ).flip(1, 2)
    dense_vox_coords = dense_vox_coords.float()

    # Indices
    dense_vox_idx = torch.stack(
        torch.meshgrid(
            torch.arange(X), torch.arange(Y), torch.arange(Z), indexing="ij"
        )
    ).flip(1, 2).int()
    return dense_vox_coords, dense_vox_idx
a, b = _set_cache_dense_coords()
print(a.shape,b.shape)
print(a[:,-1,-1,:].transpose(0,1))
print(b[:,-1,-1,:].transpose(0,1))
x1, y1, z1 = torch.meshgrid(
            torch.arange(X), torch.arange(Y), torch.arange(Z), indexing="ij"
        )

torch.Size([3, 200, 200, 8]) torch.Size([3, 200, 200, 8])
tensor([[-50.0000, -50.0000,   0.0000],
        [-50.0000, -50.0000,   0.5714],
        [-50.0000, -50.0000,   1.1429],
        [-50.0000, -50.0000,   1.7143],
        [-50.0000, -50.0000,   2.2857],
        [-50.0000, -50.0000,   2.8571],
        [-50.0000, -50.0000,   3.4286],
        [-50.0000, -50.0000,   4.0000]])
tensor([[0, 0, 0],
        [0, 0, 1],
        [0, 0, 2],
        [0, 0, 3],
        [0, 0, 4],
        [0, 0, 5],
        [0, 0, 6],
        [0, 0, 7]], dtype=torch.int32)


NameError: name 'X' is not defined

In [None]:
for th in [0.2]:# [0.2,0.3,0.4,0.5,0.6,0.7]:
    print(th)
    model.metrics.reset()
    for k,m in model.metrics.items():
        m.thresholds = m.thresholds.to(device)
        m.tp = m.tp.to(device)
        m.fp = m.fp.to(device)
        m.fn = m.fn.to(device)
    with torch.no_grad():
        for i,batch in enumerate(loader):
            print(i,end='\r')
            if i == 2:
                break
            continue
            render = get_bev_from_box(nusc,batch,bevformer_results,th)
            render = np.float32(render)
            if i == 2:
                break
            pred = {'bev':torch.from_numpy(render[0])[None,None].to(device),'ped':torch.from_numpy(render[1])[None,None].to(device)}
            batch['bev'] = batch['bev'].to(device)
            batch['visibility'] = batch['visibility'].to(device)
            batch['visibility_ped'] = batch['visibility_ped'].to(device)
            model.metrics.update(pred,batch)

    print()
    for k,m in model.metrics.items():
        print(k,':\n\t',m.compute(),'\n\t','='*50)

In [None]:
network.to(device)
network.eval()
model.to(device)
max_n = 0
with torch.no_grad():
    for i,batch in enumerate(loader):
        print(i,end='\r')
        # for k, v in batch.items():
        #     if isinstance(v, torch.Tensor):
        #         batch[k] = v.to(device)
        #     elif isinstance(v, list):
        #         if isinstance(v[0], torch.Tensor):
        #             batch[k] = [i.to(device) for i in v]
        #     else:
        #         batch[k] = v
        max_n = max(batch['labels'].shape[1],max_n)
        # pred = network(batch)
        # scores, labels = pred['pred_logits'].detach().softmax(-1)[:, :, :-1].max(-1)
        # if (labels == 2).cpu().numpy().any():
        #     print(labels)
        #     break
# render = np.zeros((200,200),dtype=np.uint8)
# for (x1,y1,x2,y2), label in zip(batch['boxes'][0],batch['labels'][0]):
#     if label >= 1:
#         continue
#     x1 = (x1 * 100) -50
#     x2 = (x2 * 100) -50
#     y1 = (y1 * 100) -50
#     y2 = (y2 * 100) -50
#     pts = np.array([[x1,y1,1],[x2,y2,1]]).transpose()
#     pts = batch['view'][0].cpu().numpy() @ pts
#     pts = pts.astype(np.uint8)
#     x1,y1 = pts[:2,0] 
#     x2,y2 = pts[:2,1]
#     cv2.rectangle(render, (x1, y1), (x2, y2), 1, -1)
# plt.imshow(render)
print(max_n)

In [None]:
max_n

In [None]:
type(batch['boxes'])

In [None]:
from cross_view_transformer.util.box_ops import box_cxcywh_to_xyxy, generalized_box_iou
labels = batch['labels']
boxes = batch['boxes']
tgt_ids = torch.cat([v for v in labels])
tgt_bbox = torch.cat([v for v in boxes])
out_bbox = pred["pred_boxes"].flatten(0, 1).cpu()
out_prob = pred["pred_logits"].flatten(0, 1).softmax(-1).cpu()
cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox))
cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
cost_class = -out_prob[:, tgt_ids]


In [None]:
a = torch.ones((2),dtype=torch.long)
test = out_prob[:5]
cost_class = -test[:, a]
print(test.shape)
print(cost_class.shape)

In [None]:
l = []
with torch.no_grad():
    for i,batch in enumerate(loader):
        l.append(batch)
        if i == 30:
            break

In [None]:
plt.imshow(batch['bev'][0,4].cpu().numpy())

In [None]:
with torch.no_grad():
    for i,batch in enumerate(loader):
        vehicle_boxes = []
        view_inv = batch['view'][0].inverse().cpu().numpy()
        for j in range(len(DYNAMIC)):
            box_list = get_bev_from_box(batch,[[j+4]],return_bb=True, with_visibility=True, view=view_inv)
            
            vehicle_boxes += box_list
        
        vehicle_boxes = np.array(vehicle_boxes)
        vehicle_boxes = np.pad(vehicle_boxes,[(0,0),(0,1)], mode='constant', constant_values=0)
        ped_boxes = get_bev_from_box(batch,[[9]],return_bb=True, with_visibility=True)
        ped_boxes = np.array(ped_boxes)
        ped_boxes = np.pad(ped_boxes,[(0,0),(0,1)], mode='constant', constant_values=1)
        result = np.concatenate((vehicle_boxes,ped_boxes),0)
        print(result)
        
        break
        

In [None]:
from sklearn.cluster import DBSCAN
from random import randrange
import matplotlib.pyplot as plt

def apply_dbscan(data, eps, min_samples):
    clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(data)
    labels = clustering.labels_
    return labels

def map2points(data):
    ys, xs = np.where(data == 1)
    return np.array((xs, ys)).transpose()

def generate_colors(number):
    return [(randrange(255), randrange(255), randrange(255)) for _ in range(number)]

def get_min_max(pts_list, h=200, w=200):
    min_x, max_x, min_y, max_y = h, -1, w, -1
    for (x,y) in pts_list:
        if x < min_x:
            min_x = x
        if y < min_y:
            min_y = y
        if x > max_x:
            max_x = x
        if y > max_y:
            max_y = y
    return (min_x, min_y), (max_x, max_y)

def get_bev_from_box(batch, class_index=[[4, 5, 6, 7, 8, 10, 11,12]], return_clusters=False, return_bb=False, with_visibility=False, view=None):
    label = [batch['bev'][:, idx].max(1, keepdim=True).values for idx in class_index]
    label = torch.cat(label, 1)
    if with_visibility:
        if 9 in class_index[0]:
            visibility = batch['visibility_ped'][0]
        else:
            visibility = batch['visibility'][0]
        label = label * (visibility>=2)
    render = np.zeros((200,200),np.uint8)
    bev_pts = map2points(label[0,0])

    if len(bev_pts) == 0:
        return [] if return_bb else render

    clusters = apply_dbscan(bev_pts,1.0,3)
    if return_clusters:
        return clusters
    if return_bb:
        box_list = []
    for j in range(clusters.max()+1):
        tmp_index = np.where(clusters==j)[0]
        (x1,y1),(x2,y2) = get_min_max(bev_pts[tmp_index])
        if return_bb:
            if view is not None:
                pts = np.array([[x1,y1,1],[x2,y2,1]]).transpose()
                # pts = np.array([[(x1+x2)/2,(y1+y2)/2,1]]).transpose()
                pts = view @ pts
                x1, y1, x2, y2 = pts[:2].transpose().reshape(-1)
                x1 = (x1+50) / 100
                x2 = (x2+50) / 100
                y1 = (y1+50) / 100
                y2 = (y2+50) / 100

            box_list.append([x1,y1,x2,y2])
        else:
            cv2.rectangle(render, (x1, y1), (x2, y2), 1, -1)

    return box_list if return_bb else render

def image_(pts,h=224,w=480):
    n = pts.shape[-1]
    if n == 0:
        return pts
    mask = torch.ones((n), dtype=bool)
    mask = mask & (pts[0, :] < w-1)
    mask = mask & (pts[0, :] > 1) 
    mask = mask & (pts[1, :] > 1)
    mask = mask & (pts[1, :] < h-1)
    return pts[:,mask]

def view_points(points, view) -> np.ndarray:
    """
    This is a helper class that maps 3d points to a 2d plane. It can be used to implement both perspective and
    orthographic projections. It first applies the dot product between the points and the view. By convention,
    the view should be such that the data is projected onto the first 2 axis. It then optionally applies a
    normalization along the third dimension.

    For a perspective projection the view should be a 3x3 camera matrix, and normalize=True
    For an orthographic projection with translation the view is a 3x4 matrix and normalize=False
    For an orthographic projection without translation the view is a 3x3 matrix (optionally 3x4 with last columns
     all zeros) and normalize=False

    :param points: <np.float32: 3, n> Matrix of points, where each point (x, y, z) is along each column.
    :param view: <np.float32: n, n>. Defines an arbitrary projection (n <= 4).
        The projection should be such that the corners are projected onto the first 2 axis.
    :param normalize: Whether to normalize the remaining coordinate (along the third axis).
    :return: <np.float32: 3, n>. Mapped point. If normalize=False, the third coordinate is the height.
    """

    viewpad = np.eye(4)
    viewpad[:view.shape[0], :view.shape[1]] = view

    nbr_points = points.shape[1]

    # Do operation in homogenous coordinates.
    points = np.concatenate((points, np.ones((1, nbr_points))))
    points = np.dot(viewpad, points)
    points = points[:3, :]

    points = points / points[2:3, :].repeat(3, 0).reshape(3, nbr_points)

    return points

def project_points(batch,cam_index,points,filter=False):
    intrinsics = batch['intrinsics'][0,cam_index].cpu().numpy()
    extrinsics = batch['extrinsics'][0,cam_index].cpu().numpy()
    points = extrinsics @ points
    depths = points[2]
    points = view_points(points[:3], intrinsics)
    points = points[:,depths > 1.0]
    print(points.shape)
    if not filter:
        return points
    points = image_(points)
    return points

class tmp_calculate:
    def __init__(self, h=200, w=200):
        self.x1s = []
        self.x2s = []
        self.y1s = []
        self.y2s = []
        self.h = h
        self.w = w

    def compute(self, pts_list):
        h = self.h
        w = self.w
        min_x, max_x, min_y, max_y = h, -1, w, -1
        for (x,y) in pts_list:
            if x < min_x:
                min_x = x
                self.x1s = []
                self.x1s.append([x,y])
            elif x == min_x:
                self.x1s.append([x,y])
            if y < min_y:
                min_y = y
                self.y1s = []
                self.y1s.append([x,y])
            elif y == min_y:
                self.y1s.append([x,y])
            if x > max_x:
                max_x = x
                self.x2s = []
                self.x2s.append([x,y])
            elif x == max_x:
                self.x2s.append([x,y])
            if y > max_y:
                max_y = y
                self.y2s = []
                self.y2s.append([x,y])
            elif y == max_y:
                self.y2s.append([x,y])

        return min_x, max_x, min_y, max_y # self.x1s, self.x2s, self.y1s, self.y2s

    def get_pts(self):
        def cal_distance(pt1, pt2):
            return np.sqrt((pt1[0]-pt2[0])**2 + (pt1[1]-pt2[1])**2)
        out = []
        used = []
        for pt_list in [self.x1s,self.y1s,self.x2s,self.y2s]:
            max_dis, max_pt = 0.0, None
            for pt in pt_list:
                if len(used) != 0:
                    for pt2 in used:
                        dis = cal_distance(pt,pt2)
                        if dis > max_dis:
                            max_dis = dis
                            max_pt = pt
                else:
                    max_pt = pt
                    break
            out.append(max_pt)
            used.append(max_pt)
            
        return out

In [None]:
class tmp_calculate:
    def __init__(self, h=200, w=200):
        self.x1s = []
        self.x2s = []
        self.y1s = []
        self.y2s = []
        self.h = h
        self.w = w

    def compute(self, pts_list):
        h = self.h
        w = self.w
        min_x, max_x, min_y, max_y = h, -1, w, -1
        for (x,y) in pts_list:
            if x < min_x:
                min_x = x
                self.x1s = []
                self.x1s.append([x,y])
            elif x == min_x:
                self.x1s.append([x,y])
            if y < min_y:
                min_y = y
                self.y1s = []
                self.y1s.append([x,y])
            elif y == min_y:
                self.y1s.append([x,y])
            if x > max_x:
                max_x = x
                self.x2s = []
                self.x2s.append([x,y])
            elif x == max_x:
                self.x2s.append([x,y])
            if y > max_y:
                max_y = y
                self.y2s = []
                self.y2s.append([x,y])
            elif y == max_y:
                self.y2s.append([x,y])

        return min_x, max_x, min_y, max_y # self.x1s, self.x2s, self.y1s, self.y2s

    def get_pts(self):
        def cal_distance(pt1, pt2):
            return np.sqrt((pt1[0]-pt2[0])**2 + (pt1[1]-pt2[1])**2)
        out = []
        used = []
        for pt_list in [self.x1s,self.y1s,self.x2s,self.y2s]:
            max_dis, max_pt = 0.0, None
            for pt in pt_list:
                if len(used) != 0:
                    for pt2 in used:
                        dis = cal_distance(pt,pt2)
                        if dis > max_dis:
                            max_dis = dis
                            max_pt = pt
                else:
                    max_pt = pt
                    break
            out.append(max_pt)
            used.append(max_pt)
            
        return out
render = np.zeros((200,200,3),np.uint8)
tmp = np.zeros((200,200),np.int32)
label = [batch['bev'][:, idx].max(1, keepdim=True).values for idx in [[4,5,6,7,8,10,11]]]
label = torch.cat(label, 1)[0,0]
pts = map2points(label.cpu().numpy())
clusters = apply_dbscan(pts,1.0,3)
scores = []
for i in range(clusters.max()+1):
    # if i<clusters.max():
    #     continue
    if i !=5:
        continue
    tmp_index = np.where(clusters==i)[0]
    t_c = tmp_calculate()
    x1, x2, y1, y2 = t_c.compute(pts[tmp_index])
    render[y1:y2,x1:x2] = 1
    score = label[y1:y2,x1:x2].sum()/((x2-x1)*(y2-y1))
    print(score)
    scores.append(score)
    cv2.rectangle(render, (x1, y1), (x2, y2), (0,255,255), -1)
    # out = t_c.get_pts()
    # out = np.array(out)
    # print(out)
    # cv2.fillPoly(render, [out], (0,255,255))
    # plt.scatter(out[:,0],out[:,1],s=4)
plt.title('Oriented Box')
plt.imshow(render)


In [None]:
import plotly.express as px
# df = px.data.tips()
fig = px.histogram(np.array(scores), range_x=[0.0, 1.0])
fig.show()

In [None]:
a = [[2,1]]
[1,2] in a

In [None]:
batch = l[1]
img_index = 4
img = np.ascontiguousarray((batch['image'][0,img_index].permute(1,2,0).cpu().numpy()*255).astype(np.uint8))

render = get_bev_from_box(batch,class_index=[[4]],return_bb=False, with_visibility=True)
plt.imshow(render)
plt.axis('off')

In [None]:
import torch.nn.functional as F

def draw_3d(img, pts, linewidth=3.0):
    pts = pts.transpose()
    for i in range(4):
        try:
            cv2.line(img,
                        (int(pts[2*i][0]), int(pts[2*i][1])),
                        (int(pts[2*i + 1][0]), int(pts[2*i + 1][1])),
                        linewidth)
        except:
            continue
    return img

def pad_points(pts, z_range=[0,2]):
    pts_new = []
    for pt in pts[:2].permute(1,0):
        # for i in range((z_range[1]-z_range[0])+1):
        #     pts_new.append(torch.Tensor([pt[0],pt[1],z_range[0]+i,1]))
        for z in z_range:
            pts_new.append(torch.Tensor([pt[0],pt[1],z,1]))
    return torch.stack(pts_new).transpose(0,1)

batch = l[1]
img_index = 4
img = np.ascontiguousarray((batch['image'][0,img_index].permute(1,2,0).cpu().numpy()*255).astype(np.uint8))
box_list = get_bev_from_box(batch,[[4]],return_bb=True, with_visibility=True)
pts_list = get_bev_from_box(batch, return_clusters=True)# ,[[4]]
colors = generate_colors(len(box_list))
view_inv = batch['view'][0].inverse()
if True:
    for i, (x1,y1,x2,y2) in enumerate(box_list):
        # bev -> lidar
        # pts = np.array([[x1,y1,1],[x2,y2,1]]).transpose()
        pts = np.array([[x1,y1,1],[x2,y1,1],[x1,y2,1],[x2,y2,1]]).transpose()
        # pts = np.array([[(x1+x2)/2,(y1+y2)/2,1]]).transpose()
        pts = view_inv @ pts
        pts = pad_points(pts).cpu().numpy()
        pts = project_points(batch,img_index,pts)#,filter=True)
        if pts.shape[-1] == 0:
            continue
        # pts = pts.astype(np.uint8)
        top_left, bottom_right = get_min_max(pts.transpose()[:,:2],480,224)
        # cv2.rectangle(img,(int(pts[0,5]),int(pts[1,5])),(int(pts[0,6]),int(pts[1,6])),(0, 0, 255), 1, cv2.LINE_AA)
        cv2.rectangle(img,(int(top_left[0]),int(top_left[1])),(int(bottom_right[0]),int(bottom_right[1])),colors[i], 2, cv2.LINE_AA)
        # plt.scatter(pts[0],pts[1],s=4.0)
        # img = draw_3d(img, pts)
else:
    for i, pts in enumerate(pts_list):
        # bev -> lidar
        # print(pts)
        # print(np.pad(pts, [(0, 0), (0, 1)], mode='constant',constant_values=1))
        pts = np.pad(pts, [(0, 0), (0, 1)], mode='constant',constant_values=1).transpose()
        pts = view_inv @ pts
        pts = pad_points(pts).cpu().numpy()
        # print(pts)
        pts = project_points(batch,img_index,pts,filter=True)
        if pts.shape[-1] == 0:
            continue
        # pts = pts.astype(np.uint8)
        # cv2.rectangle(img,(pts[1,0],pts[1,1]),(pts[2,0],pts[2,1]),(0, 0, 255), 1, cv2.LINE_AA)
        plt.scatter(pts[0],pts[1],s=4.0)
plt.imshow(img)
plt.axis('off')

In [None]:
render = np.zeros((200,200,3),np.uint8)
colors = generate_colors(len(box_list))
for i,(x1,y1,x2,y2) in enumerate(box_list):
    cv2.rectangle(render, (x1, y1), (x2, y2), colors[i], -1)
plt.imshow(render)
print(box_list)
print(box_list[6])

In [None]:
nusc.render_sample(l[1]['token'])

In [None]:
anns = nusc.get('sample', batch['token'][0])['anns']
for ann in anns:
    ann = nusc.get('sample_annotation', ann)
    if ann['token'] == 'a561a5a52e7d4069867284d2cd2a3ed3':
        break
    if 'pedestrian.adult' in ann['category_name'] and int(ann['visibility_token']) > 1:
        print(ann)

In [None]:
def view_points(points, view) -> np.ndarray:
    """
    This is a helper class that maps 3d points to a 2d plane. It can be used to implement both perspective and
    orthographic projections. It first applies the dot product between the points and the view. By convention,
    the view should be such that the data is projected onto the first 2 axis. It then optionally applies a
    normalization along the third dimension.

    For a perspective projection the view should be a 3x3 camera matrix, and normalize=True
    For an orthographic projection with translation the view is a 3x4 matrix and normalize=False
    For an orthographic projection without translation the view is a 3x3 matrix (optionally 3x4 with last columns
     all zeros) and normalize=False

    :param points: <np.float32: 3, n> Matrix of points, where each point (x, y, z) is along each column.
    :param view: <np.float32: n, n>. Defines an arbitrary projection (n <= 4).
        The projection should be such that the corners are projected onto the first 2 axis.
    :param normalize: Whether to normalize the remaining coordinate (along the third axis).
    :return: <np.float32: 3, n>. Mapped point. If normalize=False, the third coordinate is the height.
    """

    assert view.shape[0] <= 4
    assert view.shape[1] <= 4
    assert points.shape[0] == 3
    viewpad = np.eye(4)
    viewpad[:view.shape[0], :view.shape[1]] = view

    nbr_points = points.shape[1]

    # Do operation in homogenous coordinates.
    points = np.concatenate((points, np.ones((1, nbr_points))))
    points = np.dot(viewpad, points)
    points = points[:3, :]

    points = points / points[2:3, :].repeat(3, 0).reshape(3, nbr_points)

    return points

# GLOBAL box
ped_box = Box(ann['translation'], ann['size'], Quaternion(ann['rotation']))
sample = nusc.get('sample', batch['token'][0])
cam = nusc.get('sample_data', sample['data']['CAM_BACK_LEFT'])

# First step: transform from global into the ego vehicle frame for the timestamp of the image.
poserecord = nusc.get('ego_pose', cam['ego_pose_token'])
ped_box.translate(-np.array(poserecord['translation']))
ped_box.rotate(Quaternion(poserecord['rotation']).inverse)

# Second step: transform from ego into the camera.
cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token'])
ped_box.translate(-np.array(cs_record['translation']))
ped_box.rotate(Quaternion(cs_record['rotation']).inverse)

# Third step: Project to image -> 8 corners points
corners = view_points(ped_box.bottom_corners(), cs_record['camera_intrinsic'])[:2, :]

In [None]:
print(cs_record['camera_intrinsic'])
print(batch['intrinsics'][0,3])

In [None]:
nusc.render_annotation('a561a5a52e7d4069867284d2cd2a3ed3')

In [None]:
bbox_list = np.int32(np.array(bbox_list))
render = np.zeros((200,200),np.uint8)
for (y1,x1),(y2,x2) in bbox_list:
    cv2.rectangle(render, (x1, y1), (x2, y2), 1, -1)
plt.imshow(render)

In [None]:
model.metrics.reset()
for k,m in model.metrics.items():
    m.thresholds = m.thresholds.to(device)
    m.tp = m.tp.to(device)
    m.fp = m.fp.to(device)
    m.fn = m.fn.to(device)

max_veihcle = 0
max_ped = 0
max_box = 0
with torch.no_grad():
    for i,batch in enumerate(loader):
        print(i,end='\r')
        box_vehicle = get_bev_from_box(batch,return_clusters=True)
        box_ped = get_bev_from_box(batch,[[9]],return_clusters=True)
        max_veihcle = max(box_vehicle.max()+1,max_veihcle)
        max_ped = max(box_ped.max()+1,max_ped)
        max_box = max(box_ped.max()+box_vehicle.max()+2,max_box)
        continue
        pred = {'bev':torch.from_numpy(box_vehicle)[None,None].to(device),'ped':torch.from_numpy(box_ped)[None,None].to(device)}
        batch['bev'] = batch['bev'].to(device)
        batch['visibility'] = batch['visibility'].to(device)
        batch['visibility_ped'] = batch['visibility_ped'].to(device)
        model.metrics.update(pred,batch)
        # if i == 50:
        #     break
        
print()
for k,m in model.metrics.items():
    print(k,':\n\t',m.compute(),'\n\t','='*50)

In [None]:
print(max_veihcle,max_ped,max_box)

In [None]:
import tqdm
class helper:
    def __init__(self):
        self.n = 0.0
        self.translation = [0.0, 0.0, 0.0]
        self.size = [0.0, 0.0, 0.0]
        self.rotation = [0.0, 0.0, 0.0, 0.0]
    
    def forward(self, translation, size, rotation):
        n = self.n
        for i in range(len(translation)):
            v1 = self.n * self.translation[i]
            self.translation[i] = (v1 + translation[i]) / (n+1)
        for i in range(len(size)):
            v1 = self.n * self.size[i]
            self.size[i] = (v1 + size[i]) / (n+1)

        for i in range(len(rotation)):
            v1 = self.n * self.rotation[i]
            self.rotation[i] = (v1 + rotation[i]) / (n+1)
        
        self.n += 1
        # current_z_mean = self.z_mean * self.n
        # current_z_size_mean = self.z_size_mean * self.n
        # self.n += 1
        # self.z_mean = (current_z_mean + z)/ self.n
        # self.z_size_mean = (current_z_size_mean + z_size)/ self.n

    def __str__(self):
        return f"Translation mean: {self.translation}, Size mean: {self.size}, Rotation mean: {self.rotation}"

z_result = dict(
    car = helper(),
    truck = helper(),
    bus = helper(),
    trailer = helper(),
    construction_vehicle = helper(),
    pedestrian = helper(),
    motorcycle = helper(),
    bicycle = helper(),
)
total_ped = 0.0
count_ped = 0.0
for scene_record in tqdm.tqdm(nusc.scene):
    if scene_record['name'] not in train_splits:
        continue
    sample_token = scene_record['first_sample_token']
    while sample_token:
        sample_record = nusc.get('sample', sample_token)
        anns = sample_record['anns']
        for ann in anns:
            ann = nusc.get('sample_annotation', ann)
            if int(ann['visibility_token']) < 2:
                continue
            for k in z_result:
                s = k if k!= 'construction_vehicle' else 'construction'
                if s in ann['category_name']:
                    z_result[k].forward(ann['translation'],ann['size'],ann['rotation'])
            if 'motorcycle' in ann['category_name']:
                total_ped += 1
                if ann['size'][0] <0.5 or ann['size'][1] <0.5:
                    count_ped += 1
        sample_token = sample_record['next']
print(f"{count_ped}/{total_ped}")

In [None]:
for k,v in z_result.items():
    print(k,v)
# car Z mean: 0.9496268797600059, Height mean: 1.7372864269667732
# truck Z mean: 1.5562680559520412, Height mean: 2.8328014083165924
# bus Z mean: 1.862588283304612, Height mean: 3.5100965811190528
# trailer Z mean: 2.112663209226726, Height mean: 3.81640157906954
# construction Z mean: 1.37493974499088, Height mean: 2.527865719489937
# pedestrian Z mean: 1.0667289361085115, Height mean: 1.7676267494450986
# motorcycle Z mean: 0.8392319868995608, Height mean: 1.471776610261977
# bicycle Z mean: 0.7497402640263989, Height mean: 1.3034333333333443

In [None]:
file_name = '/media/hcis-s20/SRL/det3d/BEVFormer/test/bevformer_base/Wed_Mar_27_10_38_55_2024/pts_bbox/results_nusc.json'
bevformer_results = mmcv.load(file_name) 

In [None]:
a = [0,1,2]
b = [-1,5,3]
a + b

In [None]:
bevformer_results['meta']

In [None]:
for k,v in bevformer_results['results'].items():
    print(v[0])
    break

In [None]:
for scene_record in tqdm.tqdm(nusc.scene):
    if scene_record['name'] not in val_splits:
        continue
    if scene_record == '30e55a3ec6184d8cb1944b39ba19d622 ':
        print("HI!\n\n")

In [None]:
get_attribute = dict(
    car = 'vehicle.moving', 
    truck = 'vehicle.parked', 
    bus = 'vehicle.moving',
    trailer = 'vehicle.parked', 
    construction_vehicle = 'vehicle.parked',
    pedestrian = 'pedestrian.standing',
    motorcycle = 'cycle.with_rider', 
    bicycle = 'cycle.without_rider'
)

result = dict(
    meta={
        'use_lidar': False,
        'use_camera': True,
        'use_radar': False,
        'use_map': False,
        'use_external': True
    },
    results=dict()
)
with torch.no_grad():
    for i,batch in enumerate(loader):
        print(i,end='\r')
        tmp = []
        token = batch['token'][0]
        view_inv = batch['view'][0].inverse().cpu().numpy()
        pose = batch['pose'][0].cpu().numpy()
        for i, obj_class in enumerate(DYNAMIC):
            attribute_name = get_attribute[obj_class]
            z_trans, z_size = z_result[obj_class].z_mean, z_result[obj_class].z_size_mean
            box_list = get_bev_from_box(batch,[[i+4]],return_bb=True)
            for (x1,y1,x2,y2) in box_list:
                _x, _y = (x2-x1) / 2.0, (y2-y1) / 2.0
                if _x == 0.0:
                    _x = 0.1
                if _y == 0.0:
                    _y = 0.1
                center = view_inv @ np.array([(x1+x2)/2.0, (y1+y2)/2.0,1]).transpose() # bev
                center = pose @ np.array([center[0], center[1],1,1]).transpose() # world
                tmp.append(
                    dict(
                        sample_token=token,
                        translation=[center[0],center[1],z_trans],
                        size=[_x,_y,z_size],
                        rotation=[0.0,0.0,0.0,0.0],
                        velocity=[0.0,0.0],
                        detection_name=obj_class,
                        detection_score=0.9,
                        attribute_name=attribute_name
                    )
                )
        result['results'][token] = tmp

import json
with open('/media/hcis-s20/SRL/det3d/BEVFormer/test.json', 'w') as f:
    json.dump(result, f)

In [None]:
for i, scene_record in enumerate(nusc.scene):
    sample_token = scene_record['first_sample_token']
    while sample_token:
        sample_record = nusc.get('sample', sample_token)
        for ann in sample_record['anns']:
            nusc.get('sample_annotation', sample_token)
    break

In [2]:
import torch
x = torch.zeros((2,2,4))
view = torch.ones((2,3,3))
x0, y0, x1, y1 = x.unbind(-1)
p1, p2 = torch.stack([x0,y0], dim=-1), torch.stack([x1,y1], dim=-1)
p1, p2 = torch.nn.functional.pad(p1,(0,1), value=1), torch.nn.functional.pad(p2,(0,1), value=1)
p1 = torch.einsum('b i j, b n j -> b n i', view, p1)
print(p1)
print(p1.shape,p2.shape)

tensor([[[1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.]]])
torch.Size([2, 2, 3]) torch.Size([2, 2, 3])


In [3]:
b, h, w, n = 5, 200, 200, 100 # b: batch, h: height, w: width, n: number of bounding boxes
x = torch.rand((b,h,w))
box = torch.rand((b,n,4)) # bounding boxes in x1,y1,w,h
# for each batch: b
for i in range(b):
    # for each box prediction
    for j in range(n):
        # Set the value of x into 1 inside each bounding box region
        x1,y1,w,h = box[i,j]
        x[i][x1:x1+w,y1:,y1+h] = 1

IndexError: too many indices for tensor of dimension 2

In [10]:
import torch
h, w = 10, 10
x = torch.zeros((2,h,w))
box = torch.tensor([
    [[0,0,2,2],[4,3,5,8]],
    [[1,2,3,4],[2,3,6,6]],
])
print(x.shape, box.shape) # b h w, b n 4
xx, yy = torch.meshgrid(torch.arange(h), torch.arange(w))
xx, yy = xx.to(x.device), yy.to(x.device)

# Expand dimensions for xx and yy to match the dimensions of box
xx = xx[None, None, ...]
yy = yy[None, None, ...]
# print(xx.shape,yy.shape)

# Check if the coordinates are inside the boxes
masks = (xx >= box[..., 0, None, None]) & (xx <= box[..., 2, None, None]) & \
        (yy >= box[..., 1, None, None]) & (yy <= box[..., 3, None, None])

# Combine the masks for different boxes using logical OR
mask = masks.any(dim=1)

# Set the value of x into 1 inside each bounding box region
x[mask] = 1
print(mask)

torch.Size([2, 10, 10]) torch.Size([2, 2, 4])
tensor([[[ True,  True,  True, False, False, False, False, False, False, False],
         [ True,  True,  True, False, False, False, False, False, False, False],
         [ True,  True,  True, False, False, False, False, False, False, False],
         [False, False, False, False, False, False, False, False, False, False],
         [False, False, False,  True,  True,  True,  True,  True,  True, False],
         [False, False, False,  True,  True,  True,  True,  True,  True, False],
         [False, False, False, False, False, False, False, False, False, False],
         [False, False, False, False, False, False, False, False, False, False],
         [False, False, False, False, False, False, False, False, False, False],
         [False, False, False, False, False, False, False, False, False, False]],

        [[False, False, False, False, False, False, False, False, False, False],
         [False, False,  True,  True,  True, False, False, Fa

In [None]:
a = torch.ones((2,4,2))
new_points = torch.cat([a,a],dim=-1)
print(new_points.shape)

In [None]:
import torch
x = torch.randn((4,3))
print(x)
scores, labels = x.max(-1)
print(scores,labels)

In [None]:
import math
import torch.nn.functional as F
def generate_grid(height: int, width: int, z: int = 0):
    xs = torch.linspace(0, 1, width)
    ys = torch.linspace(0, 1, height)

    if z > 0 :
        zs = torch.linspace(0, 1, z)
        indices = torch.stack(torch.meshgrid((xs, ys, zs),indexing='xy'), 0)   
    else:
        indices = torch.stack(torch.meshgrid((xs, ys), indexing='xy'), 0)       # 2 h w
        indices = F.pad(indices, (0, 0, 0, 0, 0, 1), value=1)                   # 3 h w
    indices = indices[None]                                                 # 1 3 h w

    return indices

def positionalencoding2d(d_model, height, width, V):
    """
    :param d_model: dimension of the model
    :param height: height of the positions
    :param width: width of the positions
    :return: d_model*height*width position matrix
    """
    V_inv = torch.FloatTensor(V).inverse()
    pos = generate_grid(height, width)[0]
    pos[0] = 200 * pos[0]
    pos[1] = 200 * pos[1]
    pos = V_inv @ pos.reshape(3, height * width)
    pos = pos.reshape(3,height, width)[:2]
    pos_h, pos_w = pos[0,:,0].unsqueeze(1), pos[1,0,:].unsqueeze(1)
    print(pos_h)
    if d_model % 4 != 0:
        raise ValueError("Cannot use sin/cos positional encoding with "
                         "odd dimension (got dim={:d})".format(d_model))
    pe = torch.zeros(d_model, height, width)
    # Each dimension use half of d_model
    d_model = int(d_model / 2)
    div_term = torch.exp(torch.arange(0., d_model, 2) *
                         -(math.log(10000.0) / d_model)).unsqueeze(0)
    pe[0:d_model:2, :, :] = torch.sin(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1)
    pe[1:d_model:2, :, :] = torch.cos(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1)
    pe[d_model::2, :, :] = torch.sin(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width)
    pe[d_model + 1::2, :, :] = torch.cos(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width)
    return pe

pe = positionalencoding2d(128, 25, 25, l[0]['view'][0])

In [None]:
tp = 2126813
fn = 1615751
tp = 30084
fn = 85496
tp/(tp+fn)

In [None]:
import numpy as np

# Initialize the array a and the index array b
a = np.zeros((10,6))
z_stats = [
            [0.9496, 1.7372], # car
            [1.5563, 2.8328], # truck
            [1.8626, 3.5100], # bus
            [2.1127, 3.8164], # trailer
            [1.3749, 2.5279], # construction
            [1.0667, 1.7676], # pedestrian
            [0.8392, 1.4717], # motorcycle
            [0.7497, 1.3034], # bicycle
        ]
b = np.random.randint(len(z_stats), size=10)

# Vectorize the assigning operation
a[:, 4] = np.array(z_stats)[b, 0]

In [None]:
import torch
a = torch.rand((4,100,6,3)) # b q p 3
b = torch.rand((4,100,3)) # b q 3
c = a + b.unsqueeze(2)
c.shape

In [None]:
from einops import rearrange

In [None]:
H, W = 25, 25
bs = 2
ref_y, ref_x = torch.meshgrid(
                torch.linspace(
                    0.5, H - 0.5, H),
                torch.linspace(
                    0.5, W - 0.5, W)
            )
ref_y = ref_y.reshape(-1)[None] / H
ref_x = ref_x.reshape(-1)[None] / W
ref_2d = torch.stack((ref_x, ref_y), -1)
ref_2d = ref_2d.repeat(bs, 1, 1).unsqueeze(2)

In [None]:
ref_2d.shape

In [53]:
import torch
# pred_boxes: BxNx6
# coords = pred_boxes[..., :2] # pad 1 B N 3
b, N = 4, 100

pred_boxes_coords = torch.rand((b,N,2))
pred_logits = torch.rand((b,N,8))
box_feats = torch.rand((b,N,128))
view = torch.rand((b,3,3))

scores, _ = pred_logits.softmax(-1)[..., :-1].max(-1)
filter_idx = torch.topk(scores, k=50, dim=-1).indices
# Expand dimensions for filter_idx for matching with pred_boxes_coords
filter_idx_expand = filter_idx.unsqueeze(-1).expand(*filter_idx.shape, pred_boxes_coords.shape[-1])

# Use torch.gather to index pred_boxes_coords with filter_idx
pred_boxes_coords = torch.gather(pred_boxes_coords, 1, filter_idx_expand)

filter_idx_expand = filter_idx.unsqueeze(-1).expand(*filter_idx.shape, 128)
box_feats = torch.gather(box_feats, 1, filter_idx_expand)

pred_boxes_coords = torch.nn.functional.pad(pred_boxes_coords,(0, 1), value=1) # b filter_N 3
pred_boxes_coords = ((torch.einsum('b i j, b N j -> b N i', view, pred_boxes_coords)[..., :2])* 100).int()

batch_idx = torch.arange(0, b).view(-1,1,1).expand(-1,50,1).int()
pred_boxes_coords = torch.cat([batch_idx, pred_boxes_coords], dim = -1)


In [54]:
import spconv.pytorch as spconv

spconv.SparseConvTensor(box_feats.flatten(0,1), pred_boxes_coords.flatten(0,1), [200,200], b)

SparseConvTensor[shape=torch.Size([200, 128])]

In [36]:
pred_boxes_coords.flatten(0,1)[0]

tensor([122, 144], dtype=torch.int32)

In [180]:
import torch
B, H, W = 2, 10, 10
mask = torch.zeros([B,H,W])
# b = torch.arange(B)[:, None].to(device)
# mask[b, pred_boxes_coords[..., 1], pred_boxes_coords[..., 0]] = 1
mask[0, 2,3] = 1
mask[1, 1,1] = 1
print(mask)

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.,

In [181]:
mask = mask.unsqueeze(1)

patch_size = 3
kernel = torch.ones(
    (1, 1, patch_size, patch_size), dtype=torch.float64
)
augm_mask = torch.nn.functional.conv2d(
    mask.to(torch.float64), kernel, padding=(patch_size - 1) // 2
)
augm_mask = augm_mask.bool().squeeze(1)
print(augm_mask.shape)

torch.Size([2, 10, 10])


In [182]:
def _select_idx_to_keep(mask, N_pts):
    """Select final points to keep.
    Either we keep Nfine points ordered by their importance or we reinject random points when points are
    predicted as not important, otherwise we will have an artefact at the bottom due to the selection
    on uniform null points.
    """
    # Alias
    bt = mask.size(0)
    device = mask.device

    out_idx = []
    if N_pts == "dyna":
        for i in range(bt):
            # Numbers of activated elements
            activ_idx = torch.nonzero(mask[i]).squeeze(1)
            out_idx.append(activ_idx)
    else:
        # Reinject random points in batches
        for i in range(bt):
            # Numbers of activated elements
            activ_idx = torch.nonzero(mask[i]).squeeze(1)
            # How many points are not activated.
            n_activ = activ_idx.size(0)
            idle = N_pts - n_activ

            # Less detected points than N_pts
            if idle > 0:
                # Random selection
                allowed_idx = torch.nonzero(mask[i] == 0).squeeze(1)
                perm = torch.randperm(allowed_idx.size(0))
                augm_idx = allowed_idx[perm[:idle]]
            else:
                augm_idx = torch.empty([0], device=device, dtype=torch.int64)
                activ_idx = activ_idx[:N_pts]

            out_idx.append(torch.cat([activ_idx, augm_idx]))

    out_idx = torch.stack(out_idx)
    # xy_vox_idx = torch.stack([((out_idx // Y) % X), out_idx % Y], dim=-1)
    return out_idx



In [183]:
def index_4d_tensor(X, Y):
    B, N, _ = Y.shape
    b = torch.arange(B)[:, None].expand(-1, N)  # Create a tensor for batch indices
    h, w = Y.unbind(dim=-1)  # Split the last dimension of Y into separate tensors
    indexed_X = X[b, h, w]  # Use advanced indexing to index X

    return indexed_X


In [184]:
def _init_bev_layers(H=25, W=25, Z=8, num_points_in_pillar=4, **kwargs):
    # zs = torch.linspace(0.5, Z - 0.5, num_points_in_pillar
    #                         ).view(-1, 1, 1).expand(num_points_in_pillar, H, W) / Z
    xs = torch.linspace(0.5, W - 0.5, W
                        ).view(1, W).expand(H, W) / W
    ys = torch.linspace(0.5, H - 0.5, H
                        ).view(H, 1).expand(H, W) / H
    ref_3d = torch.stack((xs, ys), -1)
    return ref_3d

grid = _init_bev_layers(H,W)[None].expand(2,-1,-1,-1)

print(augm_mask.shape)
print(_select_idx_to_keep(augm_mask, 5).shape)
selected_grid = index_4d_tensor(grid, _select_idx_to_keep(augm_mask, 5))
print(selected_grid.shape) # b n 2

torch.Size([2, 10, 10])
torch.Size([2, 5, 2])
torch.Size([2, 5, 2])


In [161]:
from einops import rearrange, repeat
num_points_in_pillar = 4
N = 5
zs = torch.linspace(0.5, 8 - 0.5, num_points_in_pillar
                                ).view(-1, 1).expand(num_points_in_pillar, N) / 8
pad_zs = repeat(zs, 'p n -> b p n 1', b=B)
selected_grid = repeat(selected_grid, 'b n d -> b p n d', p=num_points_in_pillar)
print(pad_zs.shape,selected_grid.shape)
torch.cat([selected_grid, pad_zs],dim=-1).shape

torch.Size([2, 4, 5, 1]) torch.Size([2, 4, 5, 2])


torch.Size([2, 4, 5, 3])

In [149]:
print(pad_zs.shape,selected_grid.shape)

torch.Size([2, 4, 5, 1]) torch.Size([2, 4, 4, 2])
