# Analysis Notebook

## NuScenes Data Analysis

### Load reqd packages

In [1]:
import json
import os
import numpy as np 
import copy
import tempfile
import warnings
from os import path as osp

import mmcv
import numpy as np
import pyquaternion
import torch
from nuscenes.utils.data_classes import Box as NuScenesBox
import mmcv
from collections import defaultdict

# from mmdet3d.core import bbox3d2result, box3d_multiclass_nms, xywhr2xyxyr
# from mmdet.datasets import CocoDataset

### Paths to val and training annotation files for image based 3D object detection

In [43]:
val_anno_file = '/home/anishmad/msr_thesis/mmdet3d-lt3d/data/nuscenes/nuscenes_infos_val_mono3d.coco.json'
train_anno_file = '/home/anishmad/msr_thesis/mmdet3d-lt3d/data/nuscenes/nuscenes_infos_train_mono3d.coco.json'

### Choosing only classes present in the val set

In [44]:
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 'motorcycle', 'emergency_vehicle',
'adult', 'child', 'police_officer', 'construction_worker', 'stroller', 'personal_mobility', 
'pushable_pullable', 'debris', 'traffic_cone', 'barrier'
]

In [45]:
data_train = json.load(open(train_anno_file,'r'))
data_val = json.load(open(val_anno_file,'r'))

From the paper: 

*Formally, for the regression branch, the detector predicts
3D attributes, including offsets ∆x, ∆y to the projected 3D center, depths d, 3D size w3D, l3D, h3D,
sin value of rotation θ, direction class Cθ, center-ness c, and distances to four sides of 2D boxes l,
r, t, b, for each location on the output dense map.*

**bbox_cam3d in annotations represents [∆x, ∆y to the projected 3D center, depths d, 3D size w3D, l3D, h3D]**  
This is why we multiply with classwise depth priors to first 2 indices of predictions. (see function `decode()` in `fcos3d_bbox_coder.py`)   

> **Warning**  
Note: In the original mmdet3d config the order for base_dims is given as (l3D, w3D, h3D), therefore we also follow same order to generate base_dims for classes not mentioned in the original mmdet config

In [46]:
data_val['categories']

[{'id': 0, 'name': 'car'},
 {'id': 1, 'name': 'truck'},
 {'id': 2, 'name': 'trailer'},
 {'id': 3, 'name': 'bus'},
 {'id': 4, 'name': 'construction_vehicle'},
 {'id': 5, 'name': 'bicycle'},
 {'id': 6, 'name': 'motorcycle'},
 {'id': 7, 'name': 'emergency_vehicle'},
 {'id': 8, 'name': 'adult'},
 {'id': 9, 'name': 'child'},
 {'id': 10, 'name': 'police_officer'},
 {'id': 11, 'name': 'construction_worker'},
 {'id': 12, 'name': 'stroller'},
 {'id': 13, 'name': 'personal_mobility'},
 {'id': 14, 'name': 'pushable_pullable'},
 {'id': 15, 'name': 'debris'},
 {'id': 16, 'name': 'traffic_cone'},
 {'id': 17, 'name': 'barrier'}]

In [5]:
data_train['annotations'][101]

{'file_name': 'samples/CAM_BACK/n015-2018-07-18-11-07-57+0800__CAM_BACK__1531883533537525.jpg',
 'image_id': '5ffc994537664bb9aefa37ee09068c79',
 'area': 1142.9049071452464,
 'category_name': 'car',
 'category_id': 0,
 'bbox': [723.0468713984644,
  489.1449417500972,
  37.854072475335215,
  30.19238967986695],
 'iscrowd': 0,
 'bbox_cam3d': [-4.6043109342135065,
  1.1319897855125702,
  42.28373715976227,
  4.25,
  1.44,
  1.638,
  -1.7497131449765695],
 'velo_cam3d': [-1.3292092789466072, 8.538864273706539],
 'center2d': [741.1028442382812, 503.4422912597656, 42.28373718261719],
 'attribute_name': 'vehicle.moving',
 'attribute_id': 5,
 'segmentation': [],
 'id': 101}

### Compute mean depth priors (classwise) for a data split

In [6]:

def get_mean_depths(data):
    classwise_depths = defaultdict(list)
    mean_std_depths = []
    for idx,anno in enumerate(data['annotations']):
        classwise_depths[anno['category_name']].append(anno['center2d'][2])
        # l.append(anno['category_name'])

    for cat, cat_depths in classwise_depths.items():
        # print(cat, np.mean(cat_depths), np.std(cat_depths))
        mean, std = np.round(np.mean(cat_depths),3), np.round(np.std(cat_depths),3)
        mean_std_depths.append((mean,std))
    print(mean_std_depths)
    return mean_std_depths

print('Train\n')
ms_depth_train = get_mean_depths(data_train)
print('Val\n')
ms_depth_val = get_mean_depths(data_val)

Train

[(37.152, 24.632), (31.99, 21.124), (20.606, 13.679), (23.893, 15.209), (20.571, 14.341), (34.157, 20.107), (27.457, 15.528), (22.736, 15.011), (22.193, 16.328), (24.278, 16.049), (22.348, 13.704), (40.911, 26.341), (39.687, 23.974), (22.298, 10.944), (24.985, 12.478), (29.132, 16.155), (18.995, 12.011), (29.624, 21.013)]
Val

[(27.356, 16.619), (18.817, 12.008), (33.185, 21.504), (22.562, 13.306), (38.762, 25.564), (38.81, 25.374), (20.296, 14.837), (25.255, 14.126), (28.419, 18.421), (39.775, 21.664), (24.895, 17.914), (21.214, 13.083), (21.788, 4.532), (41.683, 21.851), (27.188, 15.182), (20.188, 10.077), (32.358, 25.091), (17.598, 16.05)]


### Computing base dims for bbox sizes (length, width, height)

In [13]:
def get_mean_basedims(data):
    classwise_dims = defaultdict(lambda: defaultdict(list))
    for idx,anno in enumerate(data['annotations']):
        classwise_dims[anno['category_name']]['length'].append(anno['bbox_cam3d'][3])  # corresponding to w3D?
        classwise_dims[anno['category_name']]['width'].append(anno['bbox_cam3d'][4])  # corresponding to l3D?
        classwise_dims[anno['category_name']]['height'].append(anno['bbox_cam3d'][5])   # corresponding to h3D?
        
        # l.append(anno['category_name'])
    
    mean_dims_dict = {}
    for cat, cat_dims_dict in classwise_dims.items():
        mean_length, mean_length_std = np.round(np.mean(cat_dims_dict['length']),2), np.round(np.std(cat_dims_dict['length']),2)
        mean_width, mean_width_std = np.round(np.mean(cat_dims_dict['width']),2), np.round(np.std(cat_dims_dict['width']),2)
        mean_height, mean_height_std = np.round(np.mean(cat_dims_dict['height']),2), np.round(np.std(cat_dims_dict['height']),2)        
        # print(cat, np.mean(cat_depths), np.std(cat_depths))
        # print(cat, mean_length, mean_length_std, mean_width, mean_width_std, mean_height, mean_height_std)
        mean_dims_dict[cat] = (mean_length, mean_width , mean_height)
    return mean_dims_dict

print('Train\n')
tr_mean_dims = get_mean_basedims(data_train)
print(tr_mean_dims)
ordered_tr_mean_dim_vals = [tr_mean_dims[x] for x in class_names]
print('Val\n')
val_mean_dims = get_mean_basedims(data_val)

Train

{'truck': (6.93, 2.83, 2.51), 'car': (4.62, 1.73, 1.96), 'traffic_cone': (0.41, 1.08, 0.41), 'construction_worker': (0.71, 1.73, 0.72), 'pushable_pullable': (0.66, 1.06, 0.6), 'construction_vehicle': (6.68, 3.21, 2.85), 'adult': (0.73, 1.77, 0.67), 'barrier': (0.5, 0.99, 2.52), 'debris': (0.9, 1.19, 0.97), 'motorcycle': (2.11, 1.46, 0.78), 'bicycle': (1.7, 1.29, 0.61), 'bus': (11.22, 3.5, 2.95), 'trailer': (12.56, 3.89, 2.94), 'child': (0.53, 1.38, 0.51), 'stroller': (0.94, 1.19, 0.62), 'police_officer': (0.69, 1.82, 0.73), 'personal_mobility': (1.18, 1.74, 0.62), 'emergency_vehicle': (5.06, 1.88, 2.04)}
Val



### Compute Base Dims for NuScenes dataset (used in PGD config)

In [14]:
%matplotlib inline
from nuscenes.nuscenes import NuScenes

nusc = NuScenes(version="v1.0-trainval", dataroot='/ssd0/nperi/nuScenes/', verbose=True)

Loading NuScenes tables for version v1.0-trainval...
23 category,
8 attribute,
4 visibility,
64386 instance,
12 sensor,
10200 calibrated_sensor,
2631083 ego_pose,
68 log,
850 scene,
34149 sample,
2631083 sample_data,
1166187 sample_annotation,
4 map,
Done loading in 38.391 seconds.
Reverse indexing ...
Done reverse indexing in 8.2 seconds.


### List Categories in the dataset along with statistics

In [15]:
nusc.list_categories()

Category stats for split v1.0-trainval:
animal                      n=  787, width= 0.37±0.13, len= 0.86±0.36, height= 0.60±0.20, lw_aspect= 2.35±0.69
human.pedestrian.adult      n=208240, width= 0.67±0.13, len= 0.73±0.19, height= 1.77±0.18, lw_aspect= 1.11±0.26
human.pedestrian.child      n= 2066, width= 0.51±0.14, len= 0.53±0.15, height= 1.38±0.25, lw_aspect= 1.05±0.23
human.pedestrian.constructi n= 9161, width= 0.72±0.20, len= 0.71±0.20, height= 1.74±0.30, lw_aspect= 1.02±0.29
human.pedestrian.personal_m n=  395, width= 0.62±0.12, len= 1.18±0.31, height= 1.71±0.27, lw_aspect= 1.98±0.64
human.pedestrian.police_off n=  727, width= 0.73±0.14, len= 0.69±0.13, height= 1.83±0.14, lw_aspect= 0.97±0.18
human.pedestrian.stroller   n= 1072, width= 0.63±0.13, len= 0.95±0.27, height= 1.17±0.15, lw_aspect= 1.58±0.68
human.pedestrian.wheelchair n=  503, width= 0.77±0.10, len= 1.09±0.23, height= 1.37±0.09, lw_aspect= 1.42±0.23
movable_object.barrier      n=152087, width= 2.53±0.64, len= 0.50±0.17,

### Manually copied over values

In [16]:
#### All classes listed as given in config https://github.com/neeharperi/mmdet3d-lt3d/blob/main/configs/_base_/datasets/nus-mono3d.py 
# class_names = [
# 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 'motorcycle', 'emergency_vehicle',
# 'adult', 'child', 'police_officer', 'construction_worker', 'stroller', 'personal_mobility', 
# 'pushable_pullable', 'debris', 'traffic_cone', 'barrier'
# ]

## follow (len, height, width) convention.  There are slight differences in values probably due to using trainval set and not simply train set.
## If slighlty different values are an issue, use method for base_depths to compute base_dims over the train set.

base_dims = { 'car': (4.62, 1.73, 1.96), 'truck': (6.93, 2.83, 2.51), 'trailer': (12.56, 3.89, 2.94), 'bus': (11.22, 3.50, 2.95),
            'construction_vehicle': (6.68, 3.21, 2.85), 'bicycle': (1.70, 1.28, 0.60), 'motorcycle': (2.11, 1.46, 0.78),
             'emergency_vehicle': (5.04, 1.85, 2.03), 'adult': (0.73, 1.77, 0.67), 'child': (0.53, 1.38, 0.51), 
             'police_officer': (0.69, 1.83, 0.73), 'construction_worker': (0.71, 1.74, 0.72), 'stroller': (0.95, 1.17, 0.63), 
             'personal_mobility': (1.18, 1.71, 0.62), 'pushable_pullable': (0.67, 1.06, 0.60), 'debris': (1.08, 1.26, 1.01), 
             'traffic_cone': (0.41, 1.07, 0.41), 'barrier':(0.5, 0.98, 2.53)
            }

# classes not included
## animal,  wheelchair, bicycle_rack, bendy_bus, (only taken emergency vehicle as police vehicle, not ambulance), 

#### Check difference between values already computed for trainval set, and specifically when computed for train set (no val)

In [17]:
list(zip(list(base_dims.values()), ordered_tr_mean_dim_vals))

[((4.62, 1.73, 1.96), (4.62, 1.73, 1.96)),
 ((6.93, 2.83, 2.51), (6.93, 2.83, 2.51)),
 ((12.56, 3.89, 2.94), (12.56, 3.89, 2.94)),
 ((11.22, 3.5, 2.95), (11.22, 3.5, 2.95)),
 ((6.68, 3.21, 2.85), (6.68, 3.21, 2.85)),
 ((1.7, 1.28, 0.6), (1.7, 1.29, 0.61)),
 ((2.11, 1.46, 0.78), (2.11, 1.46, 0.78)),
 ((5.04, 1.85, 2.03), (5.06, 1.88, 2.04)),
 ((0.73, 1.77, 0.67), (0.73, 1.77, 0.67)),
 ((0.53, 1.38, 0.51), (0.53, 1.38, 0.51)),
 ((0.69, 1.83, 0.73), (0.69, 1.82, 0.73)),
 ((0.71, 1.74, 0.72), (0.71, 1.73, 0.72)),
 ((0.95, 1.17, 0.63), (0.94, 1.19, 0.62)),
 ((1.18, 1.71, 0.62), (1.18, 1.74, 0.62)),
 ((0.67, 1.06, 0.6), (0.66, 1.06, 0.6)),
 ((1.08, 1.26, 1.01), (0.9, 1.19, 0.97)),
 ((0.41, 1.07, 0.41), (0.41, 1.08, 0.41)),
 ((0.5, 0.98, 2.53), (0.5, 0.99, 2.52))]

#### THe correct one to use is the one computed using only the trainset

In [18]:
ordered_tr_mean_dim_vals

[(4.62, 1.73, 1.96),
 (6.93, 2.83, 2.51),
 (12.56, 3.89, 2.94),
 (11.22, 3.5, 2.95),
 (6.68, 3.21, 2.85),
 (1.7, 1.29, 0.61),
 (2.11, 1.46, 0.78),
 (5.06, 1.88, 2.04),
 (0.73, 1.77, 0.67),
 (0.53, 1.38, 0.51),
 (0.69, 1.82, 0.73),
 (0.71, 1.73, 0.72),
 (0.94, 1.19, 0.62),
 (1.18, 1.74, 0.62),
 (0.66, 1.06, 0.6),
 (0.9, 1.19, 0.97),
 (0.41, 1.08, 0.41),
 (0.5, 0.99, 2.52)]

### Create COCO Structure using Symlinks

In [54]:
new_anno_path = '/home/anishmad/msr_thesis/glip/DATASET/nuscenes/annotations/'
os.makedirs(new_anno_path, exist_ok=True)
new_imgs_path = '/home/anishmad/msr_thesis/glip/DATASET/nuscenes/images/'
os.makedirs(new_imgs_path, exist_ok=True)
# os.symlink('/ssd0/nperi/nuScenes/samples', '/home/anishmad/msr_thesis/glip/DATASET/nuscenes/images/samples', target_is_directory=True)
# os.symlink('/home/anishmad/msr_thesis/mmdet3d-lt3d/data/nuscenes/nuscenes_infos_val_mono3d.coco.json','/home/anishmad/msr_thesis/glip/DATASET/nuscenes/annotations/nuscenes_infos_val_mono3d.coco.json')
os.symlink('/home/anishmad/msr_thesis/mmdet3d-lt3d/data/nuscenes/nuscenes_infos_train_mono3d.coco.json','/home/anishmad/msr_thesis/glip/DATASET/nuscenes/annotations/nuscenes_infos_train_mono3d.coco.json')

In [5]:
preds = torch.load('/home/anishmad/msr_thesis/glip/results/coco/zero-shot-coco-eval/eval/glip_tiny_model_o365_goldg_cc_sbu/inference/coco_2017_val/predictions.pth')


In [8]:
dir(preds[0])

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_copy_extra_fields',
 '_jit_unwrap',
 '_jit_wrap',
 '_split_into_xyxy',
 'add_field',
 'area',
 'bbox',
 'clip_to_image',
 'concate_box_list',
 'convert',
 'copy_with_fields',
 'crop',
 'extra_fields',
 'fields',
 'get_field',
 'has_field',
 'mode',
 'resize',
 'size',
 'to',
 'transpose']

In [22]:
preds[0].bbox

tensor([[ 409.4594,   11.5031,  486.7885,   73.2527],
        [ 543.0848,   64.0387,  612.6340,  116.1780],
        [ 948.3429,  157.3511, 1001.9305,  252.7081],
        [ 841.1035,  225.6266,  864.5202,  265.3079],
        [ 894.3657,  256.1796,  984.2666,  326.8865],
        [ 931.5273,  265.4379,  984.4628,  326.2958],
        [ 893.7115,  256.3721,  983.4470,  326.7967],
        [ 893.7115,  256.3721,  983.4470,  326.7967],
        [ 922.8558,  287.8022,  941.9806,  322.8975],
        [ 932.4453,  288.8634,  942.5273,  323.3798],
        [ 437.8874,  260.3157,  497.8688,  398.8068],
        [ 722.8879,  323.7018,  750.4893,  387.0290],
        [ 437.6594,  328.6328,  497.5798,  397.9185],
        [ 438.0580,  328.3372,  497.5044,  398.3215],
        [ 624.9235,  331.3458,  710.9141,  416.6017],
        [ 589.0894,  359.7681,  606.6123,  401.2081],
        [ 589.0894,  359.7681,  606.6123,  401.2081],
        [ 589.0894,  359.7681,  606.6123,  401.2081],
        [ 625.5745,  352.846

## Argoverse 2 Dataset (use trinity 2-9 for accessing this)

In [20]:
av2_data_path = '/ssd0/nperi/Sensor/'

In [21]:
av2_val_anno_file = '/ssd0/nperi/Sensor/av2_mmdet3d_trainval/av2_infos_val_mono3d.coco.json'
av2_train_anno_file = '/ssd0/nperi/Sensor/av2_mmdet3d_trainval/av2_infos_train_mono3d.coco.json'

In [22]:
av2_class_names = [
    'REGULAR_VEHICLE', 'PEDESTRIAN', 'BICYCLIST', 'MOTORCYCLIST', 'WHEELED_RIDER',
    'BOLLARD', 'CONSTRUCTION_CONE', 'SIGN', 'CONSTRUCTION_BARREL', 'STOP_SIGN', 'MOBILE_PEDESTRIAN_CROSSING_SIGN',
    'LARGE_VEHICLE', 'BUS', 'BOX_TRUCK', 'TRUCK', 'VEHICULAR_TRAILER', 'TRUCK_CAB', 'SCHOOL_BUS', 'ARTICULATED_BUS',
    'MESSAGE_BOARD_TRAILER', 'BICYCLE', 'MOTORCYCLE', 'WHEELED_DEVICE', 'WHEELCHAIR', 'STROLLER', 'DOG'
]

In [23]:
av2_data_train = json.load(open(av2_train_anno_file,'r'))
av2_data_val = json.load(open(av2_val_anno_file,'r'))

In [24]:
av2_data_val.keys()

dict_keys(['annotations', 'images', 'categories'])

In [25]:
def get_av2_mean_depths(data):
    classwise_depths = defaultdict(list)
    mean_std_depths = []
    for idx,annotations in enumerate(data['annotations']):
        for anno in annotations:
            classwise_depths[anno['category_name']].append(anno['center2d'][2])
        # l.append(anno['category_name'])

    for cat, cat_depths in classwise_depths.items():
        # print(cat, np.mean(cat_depths), np.std(cat_depths))
        mean, std = np.round(np.mean(cat_depths),3), np.round(np.std(cat_depths),3)
        mean_std_depths.append((mean,std))
    print(mean_std_depths)
    return mean_std_depths

# print('Train\n')
# ms_depth_train = get_mean_depths(data_train)
# print('Val\n')
# ms_depth_val = get_mean_depths(data_val)

In [26]:
print('Train\n')
av2_mean_depth_train = get_av2_mean_depths(av2_data_train)
print('Val\n')
av2_mean_depth_val = get_av2_mean_depths(av2_data_val)

Train

[(67.916, 49.937), (46.656, 33.858), (56.606, 40.822), (38.339, 28.203), (76.658, 50.601), (54.712, 40.125), (73.337, 48.471), (37.717, 30.003), (55.527, 41.531), (37.9, 28.919), (63.728, 45.957), (56.305, 41.804), (56.094, 42.341), (70.247, 51.26), (35.152, 24.898), (44.353, 30.747), (42.142, 32.147), (48.5, 36.594), (39.861, 28.927), (29.193, 19.689), (61.041, 46.659), (62.564, 47.949), (50.827, 39.056), (43.389, 31.502), (71.797, 52.945), (97.331, 50.898)]
Val

[(47.375, 34.212), (57.413, 41.079), (56.439, 40.694), (41.256, 31.124), (61.638, 38.635), (40.423, 28.701), (69.278, 48.788), (43.791, 30.888), (56.478, 41.139), (84.919, 53.948), (40.12, 30.18), (61.358, 46.853), (58.87, 50.133), (56.71, 49.369), (37.246, 22.41), (72.569, 51.317), (32.471, 24.265), (39.031, 26.855), (42.536, 28.846), (60.113, 42.958), (75.775, 54.038), (40.235, 30.475), (52.551, 37.513), (62.898, 4.456), (58.963, 39.803), (61.372, 36.554)]


In [27]:
def get_av2_mean_basedims(data):
    classwise_dims = defaultdict(lambda: defaultdict(list))
    for idx,annotations in enumerate(data['annotations']):
        for anno in annotations:
            classwise_dims[anno['category_name']]['length'].append(anno['bbox_cam3d'][3])  # corresponding to w3D?
            classwise_dims[anno['category_name']]['width'].append(anno['bbox_cam3d'][4])  # corresponding to l3D?
            classwise_dims[anno['category_name']]['height'].append(anno['bbox_cam3d'][5])   # corresponding to h3D?

        # l.append(anno['category_name'])
    
    mean_dims_dict = {}
    for cat, cat_dims_dict in classwise_dims.items():
        mean_length, mean_length_std = np.round(np.mean(cat_dims_dict['length']),2), np.round(np.std(cat_dims_dict['length']),2)
        mean_width, mean_width_std = np.round(np.mean(cat_dims_dict['width']),2), np.round(np.std(cat_dims_dict['width']),2)
        mean_height, mean_height_std = np.round(np.mean(cat_dims_dict['height']),2), np.round(np.std(cat_dims_dict['height']),2)        
        # print(cat, np.mean(cat_depths), np.std(cat_depths))
        # print(cat, mean_length, mean_length_std, mean_width, mean_width_std, mean_height, mean_height_std)
        mean_dims_dict[cat] = (mean_length, mean_width , mean_height)
    return mean_dims_dict

print('AV2 Train\n')
av2_tr_mean_dims = get_av2_mean_basedims(av2_data_train)
print(av2_tr_mean_dims)
ordered_av2_tr_mean_dim_vals = [av2_tr_mean_dims[x] for x in av2_class_names]
# print('Val\n')
# av2_val_mean_dims = get_av2_mean_basedims(av2_data_val)

AV2 Train

{'BUS': (11.67, 2.97, 3.3), 'PEDESTRIAN': (0.7, 0.77, 1.76), 'REGULAR_VEHICLE': (4.48, 1.94, 1.73), 'WHEELCHAIR': (0.98, 0.76, 1.15), 'LARGE_VEHICLE': (6.66, 2.68, 3.05), 'SIGN': (0.44, 1.51, 2.6), 'BOX_TRUCK': (7.7, 2.81, 3.5), 'CONSTRUCTION_CONE': (0.36, 0.33, 0.88), 'CONSTRUCTION_BARREL': (0.69, 0.66, 1.09), 'STROLLER': (0.87, 0.65, 1.2), 'TRUCK': (9.79, 2.85, 3.35), 'VEHICULAR_TRAILER': (7.42, 2.87, 3.26), 'STOP_SIGN': (0.36, 0.98, 3.09), 'TRUCK_CAB': (7.64, 3.32, 3.65), 'WHEELED_DEVICE': (1.26, 0.6, 1.38), 'BOLLARD': (0.36, 0.31, 1.06), 'MOTORCYCLE': (1.86, 0.73, 1.34), 'MOTORCYCLIST': (1.26, 0.85, 1.63), 'BICYCLE': (1.65, 0.62, 1.23), 'DOG': (1.0, 0.45, 0.8), 'ARTICULATED_BUS': (10.54, 2.94, 3.29), 'SCHOOL_BUS': (8.93, 2.79, 3.1), 'BICYCLIST': (1.12, 0.81, 1.77), 'WHEELED_RIDER': (0.82, 0.73, 1.81), 'MOBILE_PEDESTRIAN_CROSSING_SIGN': (0.32, 0.99, 1.42), 'MESSAGE_BOARD_TRAILER': (3.25, 3.2, 3.75)}


In [28]:
ordered_av2_tr_mean_dim_vals

[(4.48, 1.94, 1.73),
 (0.7, 0.77, 1.76),
 (1.12, 0.81, 1.77),
 (1.26, 0.85, 1.63),
 (0.82, 0.73, 1.81),
 (0.36, 0.31, 1.06),
 (0.36, 0.33, 0.88),
 (0.44, 1.51, 2.6),
 (0.69, 0.66, 1.09),
 (0.36, 0.98, 3.09),
 (0.32, 0.99, 1.42),
 (6.66, 2.68, 3.05),
 (11.67, 2.97, 3.3),
 (7.7, 2.81, 3.5),
 (9.79, 2.85, 3.35),
 (7.42, 2.87, 3.26),
 (7.64, 3.32, 3.65),
 (8.93, 2.79, 3.1),
 (10.54, 2.94, 3.29),
 (3.25, 3.2, 3.75),
 (1.65, 0.62, 1.23),
 (1.86, 0.73, 1.34),
 (1.26, 0.6, 1.38),
 (0.98, 0.76, 1.15),
 (0.87, 0.65, 1.2),
 (1.0, 0.45, 0.8)]

In [29]:
av2_data_val['categories']

[{'id': 0, 'name': 'REGULAR_VEHICLE'},
 {'id': 1, 'name': 'PEDESTRIAN'},
 {'id': 2, 'name': 'BICYCLIST'},
 {'id': 3, 'name': 'MOTORCYCLIST'},
 {'id': 4, 'name': 'WHEELED_RIDER'},
 {'id': 5, 'name': 'BOLLARD'},
 {'id': 6, 'name': 'CONSTRUCTION_CONE'},
 {'id': 7, 'name': 'SIGN'},
 {'id': 8, 'name': 'CONSTRUCTION_BARREL'},
 {'id': 9, 'name': 'STOP_SIGN'},
 {'id': 10, 'name': 'MOBILE_PEDESTRIAN_CROSSING_SIGN'},
 {'id': 11, 'name': 'LARGE_VEHICLE'},
 {'id': 12, 'name': 'BUS'},
 {'id': 13, 'name': 'BOX_TRUCK'},
 {'id': 14, 'name': 'TRUCK'},
 {'id': 15, 'name': 'VEHICULAR_TRAILER'},
 {'id': 16, 'name': 'TRUCK_CAB'},
 {'id': 17, 'name': 'SCHOOL_BUS'},
 {'id': 18, 'name': 'ARTICULATED_BUS'},
 {'id': 19, 'name': 'MESSAGE_BOARD_TRAILER'},
 {'id': 20, 'name': 'BICYCLE'},
 {'id': 21, 'name': 'MOTORCYCLE'},
 {'id': 22, 'name': 'WHEELED_DEVICE'},
 {'id': 23, 'name': 'WHEELCHAIR'},
 {'id': 24, 'name': 'STROLLER'},
 {'id': 25, 'name': 'DOG'}]

In [30]:
# av2_data_val['annotations'][0]

In [31]:
# av2_data_val['images'][0]

## COCO Dataset Analysis

In [29]:
val_coco_anno_file = '/home/anishmad/msr_thesis/glip/coco_data/annotations/instances_val2017.json'


In [30]:
data_val_coco = json.load(open(val_coco_anno_file,'r'))

In [42]:
data_val_coco['categories']

[{'supercategory': 'person', 'id': 1, 'name': 'person'},
 {'supercategory': 'vehicle', 'id': 2, 'name': 'bicycle'},
 {'supercategory': 'vehicle', 'id': 3, 'name': 'car'},
 {'supercategory': 'vehicle', 'id': 4, 'name': 'motorcycle'},
 {'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'},
 {'supercategory': 'vehicle', 'id': 6, 'name': 'bus'},
 {'supercategory': 'vehicle', 'id': 7, 'name': 'train'},
 {'supercategory': 'vehicle', 'id': 8, 'name': 'truck'},
 {'supercategory': 'vehicle', 'id': 9, 'name': 'boat'},
 {'supercategory': 'outdoor', 'id': 10, 'name': 'traffic light'},
 {'supercategory': 'outdoor', 'id': 11, 'name': 'fire hydrant'},
 {'supercategory': 'outdoor', 'id': 13, 'name': 'stop sign'},
 {'supercategory': 'outdoor', 'id': 14, 'name': 'parking meter'},
 {'supercategory': 'outdoor', 'id': 15, 'name': 'bench'},
 {'supercategory': 'animal', 'id': 16, 'name': 'bird'},
 {'supercategory': 'animal', 'id': 17, 'name': 'cat'},
 {'supercategory': 'animal', 'id': 18, 'name': 'dog'},

In [31]:
data_val_coco['annotations'][10]

{'segmentation': [[304.09,
   266.18,
   308.95,
   263.56,
   313.06,
   262.81,
   318.3,
   262.81,
   322.04,
   262.81,
   336.25,
   264.68,
   338.87,
   264.68,
   344.85,
   259.07,
   353.83,
   252.34,
   352.7,
   258.32,
   344.1,
   269.17,
   352.33,
   274.4,
   357.94,
   281.88,
   357.94,
   293.1,
   356.07,
   300.58,
   356.44,
   308.06,
   354.57,
   319.28,
   353.45,
   326.01,
   351.96,
   338.73,
   355.32,
   345.08,
   354.95,
   346.21,
   350.09,
   346.21,
   341.86,
   346.21,
   341.11,
   345.46,
   343.73,
   334.24,
   344.85,
   319.65,
   344.48,
   313.3,
   343.73,
   326.01,
   341.86,
   340.6,
   339.62,
   348.82,
   341.49,
   352.94,
   344.1,
   355.56,
   343.36,
   357.42,
   341.11,
   357.8,
   338.49,
   359.67,
   336.25,
   360.79,
   334.75,
   360.79,
   331.01,
   360.79,
   328.77,
   359.67,
   327.27,
   356.68,
   329.14,
   354.43,
   329.14,
   352.56,
   328.02,
   351.44,
   328.77,
   348.45,
   328.77,
   344.34,
   

In [32]:
data_val_coco['images'][1220]

{'license': 4,
 'file_name': '000000326542.jpg',
 'coco_url': 'http://images.cocodataset.org/val2017/000000326542.jpg',
 'height': 480,
 'width': 640,
 'date_captured': '2013-11-19 17:58:52',
 'flickr_url': 'http://farm4.staticflickr.com/3343/3451646252_bf663fdb0d_z.jpg',
 'id': 326542}

In [33]:
img_id_mapping = dict()
for idx, dict_info in enumerate(data_val_coco['images']):
    img_id_mapping[dict_info['id']] = dict_info

In [36]:
data_val_coco['annotations'][0]

{'segmentation': [[510.66,
   423.01,
   511.72,
   420.03,
   510.45,
   416.0,
   510.34,
   413.02,
   510.77,
   410.26,
   510.77,
   407.5,
   510.34,
   405.16,
   511.51,
   402.83,
   511.41,
   400.49,
   510.24,
   398.16,
   509.39,
   397.31,
   504.61,
   399.22,
   502.17,
   399.64,
   500.89,
   401.66,
   500.47,
   402.08,
   499.09,
   401.87,
   495.79,
   401.98,
   490.59,
   401.77,
   488.79,
   401.77,
   485.39,
   398.58,
   483.9,
   397.31,
   481.56,
   396.35,
   478.48,
   395.93,
   476.68,
   396.03,
   475.4,
   396.77,
   473.92,
   398.79,
   473.28,
   399.96,
   473.49,
   401.87,
   474.56,
   403.47,
   473.07,
   405.59,
   473.39,
   407.71,
   476.68,
   409.41,
   479.23,
   409.73,
   481.56,
   410.69,
   480.4,
   411.85,
   481.35,
   414.93,
   479.86,
   418.65,
   477.32,
   420.03,
   476.04,
   422.58,
   479.02,
   422.58,
   480.29,
   423.01,
   483.79,
   419.93,
   486.66,
   416.21,
   490.06,
   415.57,
   492.18,
   416.85,

In [40]:
# img_anno_map = defaultdict(list)
img_anno_map = {}
for img_data in data_val_coco['images']:
    img_anno_map[img_data['file_name']] = []
    
for idx, dict_info in enumerate(data_val_coco['annotations']):
    # print(idx)
    img_info = img_id_mapping[dict_info['image_id']]
    img_anno_map[img_info['file_name']].append(idx)


In [41]:
print(len(img_anno_map))

5000
