In [1]:
%load_ext autoreload
%autoreload 2

import os
from pprint import pprint

import pickle
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.transforms import Affine2D

# New implementation
from detection_eval.detection_eval import DetectionEval
from detection_eval.filter import build_kitti_filters, ClassFilter, RangeFilter, CombinedFilter
from detection_eval.box_list import BoxList

print(os.getcwd())

/home/matthew/git/cadc_testing/uncertainty_eval


In [2]:
dataset_path = '/home/matthew/git/cadc_testing/WISEOpenLidarPerceptron/data/kitti'

# Clustering
logdir = '/home/matthew/git/cadc_testing/pcdet_output/output_pkls'
preds_path = os.path.join(logdir, 'ensemble_a.pkl')
gts_path = os.path.join(logdir, 'gts.pkl')


In [3]:
def load_dicts():
    # Load gt and prediction data dict
    with open(gts_path, 'rb') as f:
        gt_dicts = pickle.load(f)
    with open(preds_path, 'rb') as f:
        pred_dicts = pickle.load(f)
    return gt_dicts, pred_dicts

def load_image(frame_id):
    img_path = os.path.join(dataset_path, 'training', 'image_2', f'{frame_id}.png')
    return plt.imread(img_path)

def load_lidar(frame_id, xlim, ylim):
    lidar_path = os.path.join(dataset_path, 'training', 'velodyne', f'{frame_id}.bin')
    points = np.fromfile(lidar_path, dtype=np.float32).reshape(-1, 4)
    valid_mask = (points[:,0] > xlim[0]) & \
        (points[:,0] < xlim[1]) & \
        (points[:,1] > ylim[0]) & \
        (points[:,1] < ylim[1]) & \
        (points[:,2] < 4)
    points = points[valid_mask]
    return points

def add_box(ax, box, color=None):
    # box: [x, y, z, w, l, h, theta]
    w = box[3]
    h = box[4]
    xy = (box[0] - w/2, box[1] - h/2)
    angle = box[-1] * 180 / np.pi
    box_rect = Rectangle(
        xy, w, h, facecolor='none', edgecolor=color
    )
    t = Affine2D().rotate_around(box[0], box[1], box[-1]) + ax.transData
    box_rect.set_transform(t)
    ax.add_patch(box_rect)

In [4]:
# Load example GT and prediction
idx = np.random.randint(3700)
# idx = 2322, 291
idx = 0
print(idx)
gt_dicts, pred_dicts = load_dicts()

print('Example GT dict:')
print(gt_dicts[idx].keys())
print('Example pred dict:')
print(pred_dicts[idx][0].keys())

0
Example GT dict:
dict_keys(['gt_boxes', 'gt_labels', 'gt_names', 'frame_id'])
Example pred dict:
dict_keys(['name', 'truncated', 'occluded', 'alpha', 'bbox', 'dimensions', 'location', 'rotation_y', 'score', 'score_all', 'boxes_lidar', 'pred_labels', 'target_labels', 'pred_vars', 'pred_head_ids', 'frame_id'])


In [5]:
# Cluster pred_dicts
from cluster import cluster_preds

MIN_CLUSTER_SIZE = 3

print("Before clustering")
print("Number of frames", len(pred_dicts))
print("Number of pred dicts per frame", len(pred_dicts[0]))

pred_dicts = cluster_preds(pred_dicts, MIN_CLUSTER_SIZE)

print("After clustering")
print("Number of frames", len(pred_dicts))
print("Is the output per frame now a single dict?", isinstance(pred_dicts[0], dict))

Before clustering
Number of frames 3769
Number of pred dicts per frame 4
EMPTY FRAME DETECTED
EMPTY FRAME DETECTED
EMPTY FRAME DETECTED
After clustering
Number of frames 3769
Is the output per frame now a single dict? True


In [7]:
def get_labels(data_dict):
    if 'gt_labels' in data_dict:
        return data_dict['gt_labels']
    if 'name' in data_dict:
        classes = ['Car', 'Pedestrian', 'Cyclist']
        return np.array([classes.index(name)+1 for name in data_dict['name']])
    raise ValueError()

def get_boxes(data_dict):
    if 'boxes_lidar' in data_dict:
        return data_dict['boxes_lidar']
    if 'gt_boxes' in data_dict:
        return data_dict['gt_boxes']
    raise ValueError()

def get_scores(data_dict):
    return data_dict['score']
    
def gt_processor(data_dict):
    return ( get_labels(data_dict), get_boxes(data_dict) )

def pred_processor(data_dict):
    return (get_labels(data_dict),
            get_scores(data_dict),
            get_boxes(data_dict))

def get_dist(data_dict):
    boxes = get_boxes(data_dict)
    coords = boxes[:,:2]
    dist = np.linalg.norm(coords, axis=1)
    return dist

# Settings for both tests

In [8]:
################################################################################
# 
# Example: evaluating one sample
#
################################################################################

# Threshold (list or dict) maps a label to a matching threshold
# thresholds[label] = threshold
thresholds = {
    0: 0.5,    # Just to not output error
    1: 0.7,    # Car
    2: 0.5,    # Pedestrian
    3: 0.5     # Cyclist
}

# Examples for some common filters
# ClassFilter: filter by class, needs to specify class label
# RangeFilter: filter numerical value by a range
# CombinedFilter: combine multiple filters together with an `AND` clause

car_filter = ClassFilter(name='Car', label=1,
                         gt_processor=gt_processor, pred_processor=pred_processor)

def attach_data(sample_idx, gt_dict, pred_dict, gt_list, pred_list):
    for i in range(len(gt_list)):
        gt_list.data[i] = dict(gt_boxes=gt_dict['gt_boxes'][i])
    for i in range(len(pred_list)):
        pred_list.data[i] = dict(
            score_all=pred_dict['score_all'][i],
            boxes_lidar=pred_dict['boxes_lidar'][i],
            pred_vars=pred_dict['pred_vars'][i]
        )


# Evaluate individual frames to check for weird matches

In [25]:
for idx in range(3769):
    # Call `evaluate_one_sample` to get the results
    gt_list, pred_list = DetectionEval.evaluate_all_samples(
        [gt_dicts[idx]],
        [pred_dicts[idx]],
        thresholds,
        criterion='iou',
        epsilon=0.1,
        filta=car_filter,
        gt_processor=gt_processor,
        pred_processor=pred_processor,
        callback=attach_data
    )

    # A prediction box is either a TP or FP
    # TP is valid, localized and classified
    tp = pred_list.valid & pred_list.localized & pred_list.classified
    # FP is valid and either not localized or not classified correctly
    fp = pred_list.valid & ~(pred_list.localized & pred_list.classified)
    tp_list = pred_list[tp]

    count = 0
    bad_idx = []
    for j in range(len(tp_list)):
        obj = tp_list[j]
        pred_x = obj.data['boxes_lidar'][0]
        gt_x = gt_list[int(obj.matched_idx)].data['gt_boxes'][0]
        if np.abs(pred_x - gt_x) > 1000:
            count += 1
            print('find object at index', j)
            bad_idx.append(j)
            print(obj)
            print(obj.data['boxes_lidar'])
            print(obj.matched_idx)
            print(gt_list[int(obj.matched_idx)].data['gt_boxes'])
    if count > 0:
        print('total count', count)
        print('bad_idx', bad_idx)

0
Eval with empty predictions!!
0
Eval with empty predictions!!
0
Eval with empty predictions!!


# Evaluate all frames to check for weird matches

In [28]:
first_frame = 1994
last_frame = 1997
gt_list, pred_list = DetectionEval.evaluate_all_samples(
    gt_dicts,#[first_frame:last_frame],
    pred_dicts,#[first_frame:last_frame],
    thresholds,
    criterion='iou',
    epsilon=0.1,
    filta=car_filter,
    gt_processor=gt_processor,
    pred_processor=pred_processor,
    callback=attach_data
)

# A prediction box is either a TP or FP
# TP is valid, localized and classified
tp = pred_list.valid & pred_list.localized & pred_list.classified
# FP is valid and either not localized or not classified correctly
fp = pred_list.valid & ~(pred_list.localized & pred_list.classified)
tp_list = pred_list[tp]

count = 0
bad_idx = []
for j in range(len(tp_list)):
    obj = tp_list[j]
    pred_x = obj.data['boxes_lidar'][0]
    gt_x = gt_list[int(obj.matched_idx)].data['gt_boxes'][0]
    if np.abs(pred_x - gt_x) > 1000:
        count += 1
        print('find object at index', j)
        bad_idx.append(j)
        print('obj', obj)
        print('obj boxes_lidar', obj.data['boxes_lidar'])
        print('matched_idx', obj.matched_idx)
        print('gt_boxes', gt_list[int(obj.matched_idx)].data['gt_boxes'])
if count > 0:
    print('total count', count)
    print('bad_idx', bad_idx)

1994
Eval with empty predictions!!
2614
Eval with empty predictions!!
2807
Eval with empty predictions!!


# Searching for small frame group that will give invalid output

In [27]:
gt_list, pred_list = DetectionEval.evaluate_all_samples(
    [gt_dicts[1994],gt_dicts[1995],gt_dicts[1996]],
    [pred_dicts[1994],pred_dicts[1995],pred_dicts[1996]],
    thresholds,
    criterion='iou',
    epsilon=0.1,
    filta=car_filter,
    gt_processor=gt_processor,
    pred_processor=pred_processor,
    callback=attach_data
)

# A prediction box is either a TP or FP
# TP is valid, localized and classified
tp = pred_list.valid & pred_list.localized & pred_list.classified
# FP is valid and either not localized or not classified correctly
fp = pred_list.valid & ~(pred_list.localized & pred_list.classified)
tp_list = pred_list[tp]

count = 0
bad_idx = []
for j in range(len(tp_list)):
    obj = tp_list[j]
    pred_x = obj.data['boxes_lidar'][0]
    gt_x = gt_list[int(obj.matched_idx)].data['gt_boxes'][0]
    if np.abs(pred_x - gt_x) > 1000:
        count += 1
        print('find object at index', j)
        bad_idx.append(j)
        print('obj', obj)
        print('obj boxes_lidar', obj.data['boxes_lidar'])
        print('matched_idx', obj.matched_idx)
        print('gt_boxes', gt_list[int(obj.matched_idx)].data['gt_boxes'])
if count > 0:
    print('total count', count)
    print('bad_idx', bad_idx)

0
Eval with empty predictions!!


In [178]:
relevant_frames = [1994, 1995, 1996]
for frame in relevant_frames:
    print('gt_dicts:', gt_dicts[frame])
    print('pred_dicts:', pred_dicts[frame])


gt_dicts: {'gt_boxes': array([[ 7.2815964e+01,  4.5350776e+00, -9.9642044e-01,  3.0500000e+00,
         1.7300000e+00,  1.6600000e+00, -7.9631805e-04],
       [ 7.6146431e+01,  8.7548447e-01, -9.8028213e-01,  1.2340000e+01,
         2.6300001e+00,  2.8499999e+00,  1.9203663e-02],
       [ 3.4917698e+01,  9.9610119e+00, -1.4251553e+00,  1.7040001e+01,
         2.6700001e+00,  3.1900001e+00, -1.0796428e-02]], dtype=float32), 'gt_labels': array([ 1, -1, -1]), 'gt_names': array(['Car', 'Truck', 'Tram'], dtype='<U5'), 'frame_id': '004040'}
pred_dicts: {'frame_id': '004040', 'name': array([], dtype=float64), 'pred_labels': array([], dtype=float64), 'score': array([], dtype=float64), 'score_all': array([], dtype=float64), 'boxes_lidar': array([], dtype=float64), 'pred_vars': array([], dtype=float64), 'cluster_size': array([], dtype=float64)}
gt_dicts: {'gt_boxes': array([[ 6.9097809e+01, -2.6458913e-01, -1.1359985e+00,  1.2340000e+01,
         2.6300001e+00,  2.8499999e+00,  9.2036724e-03],
 

# testing NNL is negative

In [13]:
import torch
from torch.distributions.multivariate_normal import MultivariateNormal
from torch.distributions.normal import Normal
from torch.distributions.von_mises import VonMises


gt_box_means = np.array([[31.132696, -6.887674, -1.7227516, 4.45, 1.68, 1.4, -3.6607962]])
pred_box_scores = [0.9150038, 0, 0, 0.08499616]
pred_box_means = np.array([[31.20313, -6.8782997, -1.7680302, 4.3111343, 1.6266472, 1.4391828, 5.7834845]])
pred_box_vars = np.array([[0.00546014, 0.00246465, 0.00197836, 0.0274806, 0.00508997, 0.00448636, 0.00132322]])

# tmp = gt_box_means
# gt_box_means = pred_box_means
# pred_box_means = tmp

pred_var_mat = [np.diag(i[:-1]) for i in pred_box_vars]

pred_multivariate_normal_dists = MultivariateNormal(
        torch.tensor(pred_box_means[:,:-1]),
        torch.tensor(pred_var_mat) + 1e-2 * torch.eye(pred_box_vars.shape[1]-1))

pred_von_mises_dists = VonMises(
        torch.tensor(pred_box_means[:,-1:]),
        torch.tensor(1/(pred_box_vars[:,-1:]+ 1e-2 )))

print(pred_var_mat)
print('multivariate mean', pred_multivariate_normal_dists.mean)
print('multivariate variance', pred_multivariate_normal_dists.variance)
print('multivariate mean', pred_von_mises_dists.mean)
print('multivariate variance', pred_von_mises_dists.variance)

print('multivariate normal log prob', -pred_multivariate_normal_dists.log_prob(torch.tensor(gt_box_means[:,:-1])))
print('von-mises log prob', -pred_von_mises_dists.log_prob(torch.tensor(gt_box_means[:,-1:])).squeeze())


[array([[0.00546014, 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.00246465, 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.00197836, 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.0274806 , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.00508997,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.00448636]])]
multivariate mean tensor([[31.2031, -6.8783, -1.7680,  4.3111,  1.6266,  1.4392]],
       dtype=torch.float64)
multivariate variance tensor([[0.0155, 0.0125, 0.0120, 0.0375, 0.0151, 0.0145]], dtype=torch.float64)
multivariate mean tensor([[5.7835]], dtype=torch.float64)
multivariate variance tensor([[0.0057]], dtype=torch.float64)
multivariate normal log prob tensor([-6.1779], dtype=torch.float64)
von-mises log prob tensor(175.2913, dtype=torch.float64)


In [197]:
# values for Ali

gt_box_means = np.array([[31.132696, -6.887674, -1.7227516, 4.45, 1.68, 1.4, -3.6607962]])
pred_box_scores = [0.9150038, 0, 0, 0.08499616]
pred_box_means = np.array([[31.20313, -6.8782997, -1.7680302, 4.3111343, 1.6266472, 1.4391828, 5.7834845]])
pred_box_vars = np.array([[0.00546014, 0.00246465, 0.00197836, 0.0274806, 0.00508997, 0.00448636, 0.00132322]])


pred_box_means = np.array([[163.8247, 442.1709, 425.7526, 768.0000],
        [682.6232, 463.4604, 767.7873, 583.4254],
        [381.4330, 533.0671, 510.7216, 651.6105]])
pred_var_mat = np.array([[[ 9.2102e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  9.2160e-05,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  9.2102e-05,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  9.2160e-05]],
        [[ 9.2102e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  9.2160e-05,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  9.2102e-05,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  9.2160e-05]],
        [[ 1.7386e-01, -6.6875e-01,  1.2679e+00,  6.3513e-01],
         [-6.6875e-01,  2.5736e+00, -4.8791e+00, -2.4441e+00],
         [ 1.2679e+00, -4.8791e+00,  9.2504e+00,  4.6338e+00],
         [ 6.3513e-01, -2.4441e+00,  4.6338e+00,  2.3213e+00]]])
gt_box_means = np.array([[208.6400, 428.7997, 430.7200, 683.5200],
        [618.8800, 476.1600, 766.7200, 576.6398],
        [386.5600, 509.7975, 567.6800, 648.1351]])

pred_multivariate_normal_dists = MultivariateNormal(
        torch.tensor(pred_box_means),
        torch.tensor(pred_var_mat) + 1e-2 * torch.eye(4))

# pred_von_mises_dists = VonMises(
#         torch.tensor(pred_box_means[:,-1:]),
#         torch.tensor(1/pred_box_vars[:,-1:]))

print(pred_var_mat)
print('multivariate mean', pred_multivariate_normal_dists.mean)
print('multivariate variance', pred_multivariate_normal_dists.variance)
# print('multivariate mean', pred_von_mises_dists.mean)
# print('multivariate variance', pred_von_mises_dists.variance)

print('multivariate normal log prob', -pred_multivariate_normal_dists.log_prob(torch.tensor(gt_box_means)))
# print('von-mises log prob', -pred_von_mises_dists.log_prob(torch.tensor(gt_box_means[:,-1:])).squeeze())


[[[ 9.2102e-05  0.0000e+00  0.0000e+00  0.0000e+00]
  [ 0.0000e+00  9.2160e-05  0.0000e+00  0.0000e+00]
  [ 0.0000e+00  0.0000e+00  9.2102e-05  0.0000e+00]
  [ 0.0000e+00  0.0000e+00  0.0000e+00  9.2160e-05]]

 [[ 9.2102e-05  0.0000e+00  0.0000e+00  0.0000e+00]
  [ 0.0000e+00  9.2160e-05  0.0000e+00  0.0000e+00]
  [ 0.0000e+00  0.0000e+00  9.2102e-05  0.0000e+00]
  [ 0.0000e+00  0.0000e+00  0.0000e+00  9.2160e-05]]

 [[ 1.7386e-01 -6.6875e-01  1.2679e+00  6.3513e-01]
  [-6.6875e-01  2.5736e+00 -4.8791e+00 -2.4441e+00]
  [ 1.2679e+00 -4.8791e+00  9.2504e+00  4.6338e+00]
  [ 6.3513e-01 -2.4441e+00  4.6338e+00  2.3213e+00]]]
multivariate mean tensor([[163.8247, 442.1709, 425.7526, 768.0000],
        [682.6232, 463.4604, 767.7873, 583.4254],
        [381.4330, 533.0671, 510.7216, 651.6105]], dtype=torch.float64)
multivariate variance tensor([[0.0101, 0.0101, 0.0101, 0.0101],
        [0.0101, 0.0101, 0.0101, 0.0101],
        [0.1839, 2.5836, 9.2604, 2.3313]], dtype=torch.float64)
multivaria

In [239]:
import math
gt_box_means = np.array([[0, 0, 0, 0, 0, 0, -3.16]])
# pred_box_means = np.array([[0, 0, 0, 0, 0, 0, 6.3]])
pred_box_means = np.array([[0, 0, 0, 0, 0, 0, 6.3-2*np.pi]])
print('pred_box_means', pred_box_means)
# pred_box_vars = np.array([[0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01]])
pred_box_vars = np.array([[0.17, 0.1, 0.2, 0.2, 0.2, 0.2, 0.00293855]])

pred_var_mat = [np.diag(i[:-1]) for i in pred_box_vars]

pred_multivariate_normal_dists = MultivariateNormal(
        torch.tensor(pred_box_means[:,:-1]),
        torch.tensor(pred_var_mat) + 1e-20 * torch.eye(pred_box_vars.shape[1]-1))

normal_dist = []
for i in range(6):
    tmp = Normal(
        torch.tensor(pred_box_means[0][i]),
        torch.tensor(math.sqrt(pred_box_vars[0][i])))
    normal_dist.append(tmp)
print(normal_dist)

for i in range(6):
    print('normal log prob', -normal_dist[i].log_prob(torch.tensor(gt_box_means[0][i])))

pred_von_mises_dists = VonMises(
        torch.tensor(pred_box_means[:,-1:]),
        torch.tensor(1/pred_box_vars[:,-1:]))

print(pred_var_mat)
print('multivariate mean', pred_multivariate_normal_dists.mean)
print('multivariate variance', pred_multivariate_normal_dists.variance)
print('multivariate mean', pred_von_mises_dists.mean)
print('multivariate variance', pred_von_mises_dists.variance)

print('multivariate normal log prob', -pred_multivariate_normal_dists.log_prob(torch.tensor(gt_box_means[:,:-1])))
print('von-mises log prob', -pred_von_mises_dists.log_prob(torch.tensor(gt_box_means[:,-1:])).squeeze())

pred_box_means [[0.         0.         0.         0.         0.         0.
  0.01681469]]
[Normal(loc: 0.0, scale: 0.41231057047843933), Normal(loc: 0.0, scale: 0.3162277638912201), Normal(loc: 0.0, scale: 0.4472135901451111), Normal(loc: 0.0, scale: 0.4472135901451111), Normal(loc: 0.0, scale: 0.4472135901451111), Normal(loc: 0.0, scale: 0.4472135901451111)]
normal log prob tensor(0.0330, dtype=torch.float64)
normal log prob tensor(-0.2324, dtype=torch.float64)
normal log prob tensor(0.1142, dtype=torch.float64)
normal log prob tensor(0.1142, dtype=torch.float64)
normal log prob tensor(0.1142, dtype=torch.float64)
normal log prob tensor(0.1142, dtype=torch.float64)
[array([[0.17, 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.1 , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.2 , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.2 , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.2 , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.2 ]])]
multivariate mean tensor([[0., 0., 0., 0., 0., 0.]],

In [8]:
from scipy.stats import vonmises

all_predicted_gt_current_dim = np.array([-3.14])

all_predicted_means_current_dim = np.array([3.14])
all_predicted_covariances_current_dim = np.array([0.05])

all_predicted_scores = vonmises.cdf(
    all_predicted_means_current_dim,
    1/all_predicted_covariances_current_dim, 
    loc=all_predicted_gt_current_dim)
print(all_predicted_scores)

[1.49435355]
