# Visualizing Bounding Boxes

This notebook is a simple demonstration of how to use Rekall and Vgrid to visualize bounding boxes on a video dataset. This is an abbreviated version of the [Rekall basics tutorial](https://github.com/scanner-research/rekall/blob/master/tutorials/Basics.ipynb), on a different dataset (Intel cyclist detection dataset). We'll be visualizing maskrcnn object detections as well as manually-annotated cyclist detections.

In [50]:
import urllib3, requests, json, os, pickle
import numpy as np

# HACK: I don't want to see certificate warnings from olimar.stanford.edu
urllib3.disable_warnings()

In [51]:
# location of the video metadata file.
# It is assumed that video data is located relative to this file.
VIDEO_COLLECTION_BASEURL = "http://olimar.stanford.edu/hdd/intel_self_driving/" 
VIDEO_METADATA_FILENAME = "intel_metadata.json"

In [52]:
# Grab the metadata (width, height, number of frames, FPS) of my video collection from olimar
req = requests.get(os.path.join(VIDEO_COLLECTION_BASEURL, VIDEO_METADATA_FILENAME), verify=False)
video_collection = sorted(req.json(), key=lambda vm: vm['filename'])
print("The video collection has %d videos." % len(video_collection))

The video collection has 5 videos.


In [53]:
# Names of the maskrcnn files
maskrcnn_bbox_files = [ 'maskrcnn_bboxes_0001.pkl', 'maskrcnn_bboxes_0002.pkl', 'maskrcnn_bboxes_0003.pkl',
                  'maskrcnn_bboxes_0004.pkl', 'maskrcnn_bboxes_0005.pkl' ]

# Names of the cyclist files
cyclist_bbox_files = [ 'cyclist_labels_0001.pkl', 'cyclist_labels_0002.pkl', 'cyclist_labels_0003.pkl',
                 'cyclist_labels_0004.pkl', 'cyclist_labels_0005.pkl' ]

In [54]:
# Now, load the bounding boxes from olimar.
# We load a list of lists of bboxes from olimar, and put those into a sorted list by bbox file name.
maskrcnn_bboxes = []
for bbox_file in maskrcnn_bbox_files:
    req = requests.get(os.path.join(VIDEO_COLLECTION_BASEURL, bbox_file), verify=False)
    maskrcnn_bboxes.append(pickle.loads(req.content))
    
cyclist_bboxes = []
for bbox_file in cyclist_bbox_files:
    req = requests.get(os.path.join(VIDEO_COLLECTION_BASEURL, bbox_file), verify=False)
    cyclist_bboxes.append(pickle.loads(req.content))

In [55]:
from rekall import Interval, IntervalSet, IntervalSetMapping, Bounds3D
from vgrid import VGridSpec, VideoMetadata, VideoBlockFormat
from vgrid_jupyter import VGridWidget

# Load the video metadata into VideoMetadata objects, using filename for the id
video_metadata = [
    VideoMetadata(v["filename"], v["filename"], v["fps"], v["num_frames"], v["width"], v["height"])
    for v in video_collection
]

# Load the maskrcnn bboxes into Rekall, using video id as key
# Units of Bounds are seconds for time, relative units for X and Y
maskrcnn_bboxes_ism = IntervalSetMapping({
    vm.id: IntervalSet([
        Interval(
            Bounds3D(
                t1 = frame_num / vm.fps,
                t2 = (frame_num + 1) / vm.fps,
                x1 = bbox[0] / vm.width,
                x2 = bbox[2] / vm.width,
                y1 = bbox[1] / vm.height,
                y2 = bbox[3] / vm.height
            ),
            payload = {
                'class': bbox[4],
                'score': bbox[5]
            }
        )
        for frame_num, bboxes_in_frame in enumerate(maskrcnn_frame_list)
        for bbox in bboxes_in_frame
    ])
    for vm, maskrcnn_frame_list in zip(video_metadata, maskrcnn_bboxes)
})

# Load the cyclist bboxes into Rekall, using video id as key
# Units of Bounds are seconds for time, relative units for X and Y
cyclist_bboxes_ism = IntervalSetMapping({
    vm.id: IntervalSet([
        Interval(
            Bounds3D(
                t1 = frame_num / vm.fps,
                t2 = (frame_num + 1) / vm.fps,
                x1 = bbox[0] / vm.width,
                x2 = bbox[2] / vm.width,
                y1 = bbox[1] / vm.height,
                y2 = bbox[3] / vm.height
            ),
            payload = {
                'class': bbox[4],
                'score': bbox[5]
            }
        )
        for frame_num, bboxes_in_frame in enumerate(cyclist_frame_list)
        for bbox in bboxes_in_frame
    ])
    for vm, cyclist_frame_list in zip(video_metadata, cyclist_bboxes)
})

In [56]:
cyclist_bboxes_ism.get_grouped_intervals().keys()

dict_keys(['0003.mp4', '0004.mp4', '0002.mp4', '0001.mp4', '0005.mp4'])

In [57]:
X_train_paths = np.load('/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/X_paths_train.npy')
X_dev_paths = np.load('/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/X_paths_dev_strided.npy')
X_test_paths = np.load('/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/X_paths_test_strided.npy')

In [58]:
noisy_train = np.load(
    '/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/noisy_labels_tows_custom_train_tuned.npy')
noisy_dev = np.load(
    '/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/noisy_labels_tows_custom_dev_tuned.npy')
noisy_test = np.load(
    '/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/noisy_labels_tows_custom_test_tuned.npy')

In [59]:
dp_train = np.load(
    '/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/noisy_labels_dp_train.npy')
dp_dev = np.load(
    '/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/noisy_labels_dp_dev.npy')
dp_test = np.load(
    '/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/noisy_labels_dp_test.npy')

In [60]:
X_train_paths[:10]

array(['/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/Autumn 1242x375/Autumn 0001 1242x375/images/000000001.png',
       '/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/Autumn 1242x375/Autumn 0001 1242x375/images/000000002.png',
       '/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/Autumn 1242x375/Autumn 0001 1242x375/images/000000003.png',
       '/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/Autumn 1242x375/Autumn 0001 1242x375/images/000000004.png',
       '/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/Autumn 1242x375/Autumn 0001 1242x375/images/000000005.png',
       '/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/Autumn 1242x375/Autumn 0001 1242x375/images/000000006.png',
       '/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/Autumn 1242x375/Autumn 0001 1242x375/images/000000007.png',
       '/dfs/scratch0/danfu/sequential_ws/2019-05-17_intel_self_driving/Autu

In [61]:
id_to_fps = {
    vm.id: vm.fps
    for vm in video_metadata
}

In [62]:
def path_to_id(path):
    return os.path.split(os.path.split(os.path.split(path)[0])[0])[1].split(' ')[1] + '.mp4'

In [63]:
def path_to_framenum(path):
    return int(os.path.basename(path).split('.')[0]) - 1

In [64]:
path_to_id(X_train_paths[0])

'0001.mp4'

In [65]:
path_to_framenum(X_train_paths[0])

0

In [66]:
train_parsed = [
    (path_to_id(path), path_to_framenum(path), label)
    for path, label in zip(X_train_paths, noisy_train)
]

In [67]:
dev_parsed = [
    (path_to_id(path), path_to_framenum(path), label)
    for path, label in zip(X_dev_paths, noisy_dev)
]

In [68]:
test_parsed = [
    (path_to_id(path), path_to_framenum(path), label)
    for path, label in zip(X_test_paths, noisy_test)
]

In [69]:
train_parsed_dp = [
    (path_to_id(path), path_to_framenum(path), label)
    for path, label in zip(X_train_paths, dp_train)
]
dev_parsed_dp = [
    (path_to_id(path), path_to_framenum(path), label)
    for path, label in zip(X_dev_paths, dp_dev)
]
test_parsed_dp = [
    (path_to_id(path), path_to_framenum(path), label)
    for path, label in zip(X_test_paths, dp_test)
]

In [70]:
print(len(train_parsed_dp), len(dev_parsed_dp), len(test_parsed_dp))

23925 3350 3345


In [71]:
len(train_parsed)

23925

In [72]:
len(dev_parsed)

3350

In [73]:
len(test_parsed)

3345

In [74]:
train_ids = set([
    tup[0]
    for tup in train_parsed
])


In [75]:
dev_ids = set([
    tup[0]
    for tup in dev_parsed
])


In [76]:
test_ids = set([
    tup[0]
    for tup in test_parsed
])

In [77]:
train_labels = IntervalSetMapping({
    vid_id: IntervalSet([
        Interval(Bounds3D(
            tup[1] / id_to_fps[tup[0]],
            (tup[1] + 1) / id_to_fps[tup[0]],
        ), payload = tup[2])
        for tup in train_parsed if tup[0] == vid_id
    ])
    for vid_id in train_ids
})


In [78]:
dev_labels = IntervalSetMapping({
    vid_id: IntervalSet([
        Interval(Bounds3D(
            tup[1] / id_to_fps[tup[0]],
            (tup[1] + 1) / id_to_fps[tup[0]],
        ), payload = tup[2])
        for tup in dev_parsed if tup[0] == vid_id
    ])
    for vid_id in dev_ids
})

In [79]:
test_labels = IntervalSetMapping({
    vid_id: IntervalSet([
        Interval(Bounds3D(
            tup[1] / id_to_fps[tup[0]],
            (tup[1] + 1) / id_to_fps[tup[0]],
        ), payload = tup[2])
        for tup in test_parsed if tup[0] == vid_id
    ])
    for vid_id in test_ids
})

In [80]:
train_labels_dp = IntervalSetMapping({
    vid_id: IntervalSet([
        Interval(Bounds3D(
            tup[1] / id_to_fps[tup[0]],
            (tup[1] + 1) / id_to_fps[tup[0]],
        ), payload = tup[2])
        for tup in train_parsed_dp if tup[0] == vid_id
    ])
    for vid_id in train_ids
})
dev_labels_dp = IntervalSetMapping({
    vid_id: IntervalSet([
        Interval(Bounds3D(
            tup[1] / id_to_fps[tup[0]],
            (tup[1] + 1) / id_to_fps[tup[0]],
        ), payload = tup[2])
        for tup in dev_parsed_dp if tup[0] == vid_id
    ])
    for vid_id in dev_ids
})
test_labels_dp = IntervalSetMapping({
    vid_id: IntervalSet([
        Interval(Bounds3D(
            tup[1] / id_to_fps[tup[0]],
            (tup[1] + 1) / id_to_fps[tup[0]],
        ), payload = tup[2])
        for tup in test_parsed_dp if tup[0] == vid_id
    ])
    for vid_id in test_ids
})

In [81]:
all_noisy_labels = train_labels.union(dev_labels).union(test_labels)

In [82]:
all_dp_labels = train_labels_dp.union(dev_labels_dp).union(test_labels_dp)

In [83]:
# Visualize Mask-RCNN bboxes, and cyclist bboxes
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('cyclist_bounding_boxes', cyclist_bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist')),
        ('mask_rcnn_bounding_boxes', maskrcnn_bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'bicycle')),
        ('noisy_labels', all_noisy_labels.filter(
            lambda intrvl: intrvl['payload'] >= 0.5)),
        ('persons', maskrcnn_bboxes_ism.filter(
             lambda intrvl: intrvl['payload']['class'] == 'person'))
    ]),
    video_endpoint = VIDEO_COLLECTION_BASEURL
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'data': b'x\x9c\xc4\xbd\xdb\x8e5\xdbq\x9d\xf9*\x06\xaf\ta\x9e\x0f}\xd9\xaf\xd0}\xd7\x1…

In [84]:
# Visualize Mask-RCNN bboxes, and cyclist bboxes
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('cyclist_bounding_boxes', cyclist_bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist')),
        ('mask_rcnn_bounding_boxes', maskrcnn_bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'bicycle')),
        ('persons', maskrcnn_bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'person')),
#         ('noisy_labels', all_noisy_labels.filter(
#             lambda intrvl: intrvl['payload'] >= 0.99)),
#         ('dp_labels', all_dp_labels.filter(
#             lambda intrvl: intrvl['payload'] >= 0.5))
    ]),
    video_endpoint = VIDEO_COLLECTION_BASEURL
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'data': b'x\x9c\xcc\xbd\xdb\xae.\xcbq\x9d\xf9*\x02\xaf\t!\xcf\x87\xbe\xecW\xe8\xbe3\x0…

In [36]:
# Just the dev/test set
cyclist_bboxes_ism_dt = IntervalSetMapping({'0002.mp4': cyclist_bboxes_ism.get_grouped_intervals()['0002.mp4']})
maskrcnn_bboxes_ism_dt = IntervalSetMapping({'0002.mp4': maskrcnn_bboxes_ism.get_grouped_intervals()['0002.mp4']})
noisy_labels_ism_dt = IntervalSetMapping({'0002.mp4': all_noisy_labels.get_grouped_intervals()['0002.mp4']})
dp_labels_ism_dt = IntervalSetMapping({'0002.mp4': all_dp_labels.get_grouped_intervals()['0002.mp4']})

In [37]:
# Visualize Mask-RCNN bboxes, and cyclist bboxes
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('cyclist_bounding_boxes', cyclist_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist')),
        ('mask_rcnn_bounding_boxes', maskrcnn_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'bicycle')),
        ('noisy_labels', noisy_labels_ism_dt.filter(
            lambda intrvl: intrvl['payload'] >= 0.5)),
#         ('persons', maskrcnn_bboxes_ism.filter(
#             lambda intrvl: intrvl['payload']['class'] == 'person'))
    ]),
    video_endpoint = VIDEO_COLLECTION_BASEURL
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'data': b'x\x9c\xd4\xbd\xdd\xae$\xbb\x91\xa5\xf9*\r]\x0b\x05\xe7?}.\xe7\x15f\xee\xe6\x…

In [38]:
# Visualize Mask-RCNN bboxes, and cyclist bboxes
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('cyclist_bounding_boxes', cyclist_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist')),
        ('mask_rcnn_bounding_boxes', maskrcnn_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'bicycle')),
        ('noisy_labels', noisy_labels_ism_dt.filter(
            lambda intrvl: intrvl['payload'] >= 0.9)),
#         ('persons', maskrcnn_bboxes_ism.filter(
#             lambda intrvl: intrvl['payload']['class'] == 'person'))
    ]),
    video_endpoint = VIDEO_COLLECTION_BASEURL
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'data': b'x\x9c\xd4\xbd\xdd\xae$\xbb\x91\xa5\xf9*\r]\x0b\x05\xe7?}.\xe7\x15f\xee\xe6\x…

In [39]:
# Visualize Mask-RCNN bboxes, and cyclist bboxes
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('cyclist_bounding_boxes', cyclist_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist')),
        ('mask_rcnn_bounding_boxes', maskrcnn_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'bicycle')),
        ('noisy_labels', noisy_labels_ism_dt.filter(
            lambda intrvl: intrvl['payload'] >= 0.3)),
#         ('persons', maskrcnn_bboxes_ism.filter(
#             lambda intrvl: intrvl['payload']['class'] == 'person'))
    ]),
    video_endpoint = VIDEO_COLLECTION_BASEURL
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'data': b'x\x9c\xd4\xbd\xdd\xae$\xbb\x91\xa5\xf9*\r]\x0b\x05\xe7?}.\xe7\x15f\xee\xe6\x…

In [40]:
# Visualize Mask-RCNN bboxes, and cyclist bboxes
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('cyclist_bounding_boxes', cyclist_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist')),
        ('mask_rcnn_bounding_boxes', maskrcnn_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'bicycle')),
        ('noisy_labels', noisy_labels_ism_dt.filter(
            lambda intrvl: intrvl['payload'] >= 0.99)),
#         ('persons', maskrcnn_bboxes_ism.filter(
#             lambda intrvl: intrvl['payload']['class'] == 'person'))
    ]),
    video_endpoint = VIDEO_COLLECTION_BASEURL
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'data': b'x\x9c\xd4\xbd\xdd\xae$\xbb\x91\xa5\xf9*\r]\x0b\x05\xe7?}.\xe7\x15f\xee\xe6\x…

In [41]:
# DATA PROGRAMMING
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('cyclist_bounding_boxes', cyclist_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist')),
        ('mask_rcnn_bounding_boxes', dp_labels_ism_dt.filter(
            lambda intrvl: intrvl['payload'] >= 0.99999)),
        ('noisy_labels', noisy_labels_ism_dt.filter(
            lambda intrvl: intrvl['payload'] >= 0.99)),
        ('persons', maskrcnn_bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'person'))
    ]),
    video_endpoint = VIDEO_COLLECTION_BASEURL
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'data': b'x\x9c\xd4\xbd\xdd\xae$\xbb\x91\xa5\xf9*\r]\x0b\x05\xe7?}.\xe7\x15f\xee\xe6\x…

In [42]:
from rekall.predicates import *
equal()

<function rekall.predicates.equal.<locals>.<lambda>(intrvl1, intrvl2)>

In [None]:
def tp(gt, test):
    return gt.join(
        test,
        predicate = Bounds3D.T(equal()), merge_op = lambda intrvl1, intrvl2: intrvl2, window=0.0
    ).group_by_axis(('t1', 't2'), Bounds3D(0, 1, 0, 1, 0, 1))

In [44]:
def print_precision_recall_f1(gt, test):
    true_positives = tp(gt, test).size()['0002.mp4']
    precision = true_positives / gt.group_by_axis(('t1', 't2'), Bounds3D(0, 1, 0, 1, 0, 1)).size()['0002.mp4']
    recall = true_positives / test.size()['0002.mp4']
    
    print('Precision: {}'.format(precision))
    print('Recall: {}'.format(recall))
    print('F1: {}'.format(2 * precision * recall / (precision + recall)))

In [45]:
print_precision_recall_f1(
    cyclist_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist'),
    noisy_labels_ism_dt.filter(
            lambda intrvl: intrvl['payload'] >= 0.99)
)

Precision: 0.6618705035971223
Recall: 0.9638095238095238
F1: 0.7848003101977511


In [46]:
print_precision_recall_f1(
    cyclist_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist'),
    maskrcnn_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'bicycle'
    ).group_by_axis(('t1', 't2'), Bounds3D(0, 1, 0, 1, 0, 1))
)

Precision: 0.5853499018966645
Recall: 0.9955506117908788
F1: 0.7372322899505765


In [47]:
coalesced_filtered_bikes = maskrcnn_bboxes_ism_dt.filter(
    lambda intrvl: intrvl['payload']['class'] == 'bicycle'
).group_by_axis(
    ('t1', 't2'), Bounds3D(0, 1, 0, 1, 0, 1)
).dilate(10).coalesce(
    ('t1', 't2'), lambda b1, b2: b1.span(b2)
).dilate(-10)

In [48]:
coalesced_filtered_bikes.filter_size(min_size=0.0).size()

{'0002.mp4': 9}

In [49]:
coalesced_filtered_bikes.size()

{'0002.mp4': 9}

In [50]:
print_precision_recall_f1(
    cyclist_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist'),
    coalesced_filtered_bikes.filter_size(min_size=1.1).split(lambda intrvl: IntervalSet([
        Interval(Bounds3D(i / 10, (i + 1) / 10))
        for i in range(int(intrvl['t1'] * 10), int(intrvl['t2'] * 10))
    ]))
)

Precision: 0.8227599738391105
Recall: 0.9063400576368876
F1: 0.8625299965718203


In [51]:
# Visualize Mask-RCNN bboxes, and cyclist bboxes
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('cyclist_bounding_boxes', cyclist_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist')),
        ('mask_rcnn_bounding_boxes', coalesced_filtered_bikes),#.filter_size(min_size=2.0)),
        ('noisy_labels', noisy_labels_ism_dt.filter(
            lambda intrvl: intrvl['payload'] >= 0.99)),
#         ('persons', maskrcnn_bboxes_ism.filter(
#             lambda intrvl: intrvl['payload']['class'] == 'person'))
    ]),
    video_endpoint = VIDEO_COLLECTION_BASEURL
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'data': b'x\x9c\xd4\xbd\xdd\xae$\xbb\x91\xa5\xf9*\r]\x0b\x05\xe7?}.\xe7\x15f\xee\xe6\x…

In [52]:
# Visualize Mask-RCNN bboxes, and cyclist bboxes
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('cyclist_bounding_boxes', cyclist_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist')),
        ('mask_rcnn_bounding_boxes', coalesced_filtered_bikes.filter_size(min_size=1.1)),
        ('noisy_labels', noisy_labels_ism_dt.filter(
            lambda intrvl: intrvl['payload'] >= 0.99)),
#         ('persons', maskrcnn_bboxes_ism.filter(
#             lambda intrvl: intrvl['payload']['class'] == 'person'))
    ]),
    video_endpoint = VIDEO_COLLECTION_BASEURL
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'data': b'x\x9c\xd4\xbd\xdd\xae$\xbb\x91\xa5\xf9*\r]\x0b\x05\xe7?}.\xe7\x15f\xee\xe6\x…

In [53]:
true_positives = tp(
    cyclist_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist'),
    noisy_labels_ism_dt.filter(
            lambda intrvl: intrvl['payload'] >= 0.99)
).size()['0002.mp4']

In [54]:
cyclist_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist').size()

{'0002.mp4': 3652}

In [55]:
tp_intrvls = tp(
    cyclist_bboxes_ism_dt.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist'),
    noisy_labels_ism_dt.filter(
            lambda intrvl: intrvl['payload'] >= 0.99)
)

In [57]:
tp_intrvls.size()['0002.mp4']

1012

In [58]:
true_positives = tp_intrvls.size()['0002.mp4']

In [59]:
true_positives / cyclist_bboxes_ism_dt.filter(
    lambda intrvl: intrvl['payload']['class'] == 'Cyclist'
).group_by_axis(('t1', 't2'), Bounds3D(0, 1, 0, 1, 0, 1)).size()['0002.mp4']

0.6618705035971223

In [119]:
true_positives / noisy_labels_ism_dt.filter(
            lambda intrvl: intrvl['payload'] >= 0.99).size()['0002.mp4']

0.984

In [118]:
noisy_labels_ism_dt.filter(
            lambda intrvl: intrvl['payload'] >= 0.99).size()['0002.mp4']

1000

In [95]:
true_positives / noisy_labels_ism_dt.filter(
            lambda intrvl: intrvl['payload'] >= 0.99).size()['0002.mp4']

2.715

# If the above videos are not displaying, download the videos locally

In [36]:
for video in video_metadata:
    print("Downloading {}".format(os.path.join(VIDEO_COLLECTION_BASEURL, video.path)))
    req = requests.get(os.path.join(VIDEO_COLLECTION_BASEURL, video.path), verify=False)
    with open(video.path, 'wb') as f:
        f.write(req.content)

Downloading http://olimar.stanford.edu/hdd/intel_self_driving/0001.mp4
Downloading http://olimar.stanford.edu/hdd/intel_self_driving/0002.mp4
Downloading http://olimar.stanford.edu/hdd/intel_self_driving/0003.mp4
Downloading http://olimar.stanford.edu/hdd/intel_self_driving/0004.mp4
Downloading http://olimar.stanford.edu/hdd/intel_self_driving/0005.mp4


Or alternatively, navigate to a folder of your choosing and run the following command:
```
wget --no-check-certificate  https://olimar.stanford.edu/hdd/intel_self_driving/0001.mp4 \
    https://olimar.stanford.edu/hdd/intel_self_driving/0002.mp4 \
    https://olimar.stanford.edu/hdd/intel_self_driving/0003.mp4 \
    https://olimar.stanford.edu/hdd/intel_self_driving/0004.mp4 \
    https://olimar.stanford.edu/hdd/intel_self_driving/0005.mp4
```

# You'll need to start up a local fileserver to serve the videos.

Navigate to your esperlight folder (or to the folder where you downloaded the videos, and run:

`python3 -m http.server [PORTNUMBER]`

Where `[PORTNUMBER]` is a port of your choosing. Be sure to specify it correctly in the `video_endpoint` argument below.

In [42]:
# Visualize Mask-RCNN bboxes, and cyclist bboxes
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('mask_rcnn_bounding_boxes', maskrcnn_bboxes_ism),
        ('cyclist_bounding_boxes', cyclist_bboxes_ism)
    ]),
    video_endpoint = 'http://localhost:8080'
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xc4\xbd\xcb\xb2lKr]\xf7+\xb4jK\xb4x?\xd8\xd4/H=5h\…

In [43]:
# Visualize Mask-RCNN bboxes, and cyclist bboxes
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('cyclist_bounding_boxes', cyclist_bboxes_ism),
        ('mask_rcnn_bounding_boxes', maskrcnn_bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'person'))
    ]),
    video_endpoint = 'http://localhost:8080'
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xd4\xbd\xdb\xce4\xcbu\x1c\xf8*\x06\xaf\r!\xcf\x87\…

In [54]:
# Visualize Mask-RCNN bboxes, and cyclist bboxes
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('cyclist_bounding_boxes', cyclist_bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'Cyclist')),
        ('mask_rcnn_bounding_boxes', maskrcnn_bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'bicycle')),
        ('persons', maskrcnn_bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'person'))
    ]),
    video_endpoint = 'http://localhost:8080'
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xc4\xbd\xd9\x8ewIv\xdd\xf7*B_\x0bD\xcc\x83/\xf5\n\…

In [48]:
# Visualize Mask-RCNN bboxes, and cyclist bboxes
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('cyclist_bounding_boxes', cyclist_bboxes_ism),
        ('mask_rcnn_bounding_boxes', maskrcnn_bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'woman'))
    ]),
    video_endpoint = 'http://localhost:8080'
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xdc\xbd\xdd\xae$9r\xa5\xfb*\x83\xbe\x16\x04\xe7?9\…