In [32]:
%load_ext autoreload
%autoreload 2
from rekall import Interval, IntervalSet, IntervalSetMapping, Bounds3D
from rekall.predicates import *
from vgrid import VGridSpec, VideoMetadata, VideoBlockFormat, FlatFormat
from vgrid_jupyter import VGridWidget
import urllib3, requests, os
import pickle
from tqdm import tqdm
from PIL import Image
import matplotlib.pyplot as plt

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [33]:
# Hack to disable warnings about olimar's certificate
urllib3.disable_warnings()

VIDEO_COLLECTION_BASEURL = "https://olimar.stanford.edu/hdd/parking_lot/user_study"
VIDEO_METADATA_FILENAME = 'metadata.json'

# Load video file metadata
video_metadata = [ VideoMetadata(v['filename'], id=v['id'], fps=v['fps'],
                                 num_frames=v['num_frames'], width=v['width'],
                                 height=v['height'])
                  for v in requests.get(os.path.join(
                      VIDEO_COLLECTION_BASEURL, VIDEO_METADATA_FILENAME),
                                        verify=False).json() ]

VIDEO_FOLDER = 'videos'
BBOX_FOLDER = 'bboxes'
GT_FOLDER = 'empty_spaces'

In [34]:
dev_set = requests.get(
    os.path.join(VIDEO_COLLECTION_BASEURL, 'dev.txt'), verify=False
).content.decode('utf-8').strip().split('\n')
test_set = requests.get(
    os.path.join(VIDEO_COLLECTION_BASEURL, 'test.txt'), verify=False
).content.decode('utf-8').strip().split('\n')

In [35]:
video_metadata_dev = [
    vm
    for vm in video_metadata if vm.path in test_set
]

In [36]:
interval = 30
bboxes = [
    pickle.loads(requests.get(
        os.path.join(
            os.path.join(VIDEO_COLLECTION_BASEURL, BBOX_FOLDER),
            os.path.join(vm.path[:-4], 'bboxes.pkl')
        ),
        verify=False
    ).content)
    for vm in video_metadata_dev
]
bboxes_ism = IntervalSetMapping({
    metadata.id: IntervalSet([
        Interval(
            Bounds3D(
                t1 = 30 * i / metadata.fps,
                t2 = 30 * (i + interval) / metadata.fps,
                x1 = bbox[0] / metadata.width,
                x2 = bbox[2] / metadata.width,
                y1 = bbox[1] / metadata.height,
                y2 = bbox[3] / metadata.height
            ),
            payload = { 'class': bbox[4], 'score': bbox[5] }
        )
        for i, frame in enumerate(bbox_frame_list) if (i % interval == 0)
        for bbox in frame
    ])
    for bbox_frame_list, metadata in tqdm(zip(bboxes, video_metadata_dev),
                                          total = len(bboxes))
})

100%|██████████| 2/2 [00:00<00:00, 125.40it/s]


In [37]:
empty_parking_spaces = [
    pickle.loads(requests.get(
        os.path.join(
            os.path.join(VIDEO_COLLECTION_BASEURL, GT_FOLDER),
            os.path.join(vm.path[:-4], 'gt.pkl')
        ),
        verify=False
    ).content)
    for vm in video_metadata_dev
]
gt_ism = IntervalSetMapping({
    metadata.id: IntervalSet([
        Interval(
            Bounds3D(
                t1 = 30 * i / metadata.fps,
                t2 = 30 * (i + interval) / metadata.fps,
                x1 = bbox[0] / metadata.width,
                x2 = bbox[2] / metadata.width,
                y1 = bbox[1] / metadata.height,
                y2 = bbox[3] / metadata.height
            )
        )
        for i, frame in enumerate(space_frame_list) if (i % interval == 0)
        for bbox in frame
    ])
    for space_frame_list, metadata in tqdm(zip(empty_parking_spaces, video_metadata_dev),
                                          total = len(bboxes))
})

100%|██████████| 2/2 [00:00<00:00, 1631.71it/s]


In [38]:
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('bounding_boxes', bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'car'
        )),
        ('empty_spaces', gt_ism)
    ]),
    video_endpoint = os.path.join(VIDEO_COLLECTION_BASEURL, VIDEO_FOLDER)
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xcc\xbd\xdd\xaee\xcbq\xa5\xf7*\r^\xdbF\xfe\xff\xf8…

In [39]:
bboxes_ism.get_grouped_intervals().keys()

dict_keys([3, 1])

In [40]:
first_key = sorted(list(bboxes_ism.get_grouped_intervals().keys()))[0]

In [41]:
parking_spot_candidates = bboxes_ism[first_key].filter(
    lambda intrvl: intrvl['t1'] == 0.0 and intrvl['payload']['class'] == 'car'
)

In [42]:
video_durations = bboxes_ism.coalesce(
    axis=('t1', 't2'),
    bounds_merge_op=Bounds3D.span
)

In [43]:
video_durations.size()

{3: 1, 1: 1}

In [44]:
parking_spots = IntervalSetMapping({
    key: IntervalSet([
        Interval(Bounds3D(
            t1 = t,
            t2 = t + interval,
            x1 = parking_spot['x1'],
            x2 = parking_spot['x2'],
            y1 = parking_spot['y1'],
            y2 = parking_spot['y2']
        ))
        for parking_spot in parking_spot_candidates.get_intervals()
        for t in range(0, int(video_durations[key].get_intervals()[0]['t2']), interval)
    ])
    for key in video_durations
})

In [45]:
parking_spots.size()

{1: 572, 3: 1222}

In [46]:
empty_parking_spots = parking_spots.minus(
    bboxes_ism.filter(
        lambda intrvl: intrvl['payload']['class'] == 'car'
    ),
    predicate = and_pred(
        Bounds3D.T(overlaps()),
        Bounds3D.X(overlaps()),
        Bounds3D.Y(overlaps()),
        iou_at_least(0.25)
    ),
    window=0.0,
    progress_bar=True
)

100%|██████████| 2/2 [00:09<00:00,  3.52s/it]


In [47]:
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('bounding_boxes', bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'car'
        )),
#         ('empty_spaces', gt_ism),
        ('predicted_spaces', empty_parking_spots)
    ]),
    video_endpoint = os.path.join(VIDEO_COLLECTION_BASEURL, VIDEO_FOLDER)
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xdc\x9d\xdd\x8em\xc9q\x9c_\xc5\xe0\xb5m\xd4\xff\x8…

In [48]:
# Get rid of predictions that are empty for less than four minutes
empty_spots_coalesced_and_filtered = empty_parking_spots.map(
    lambda intrvl: Interval(intrvl['bounds'], [intrvl])
).coalesce(
    ('t1', 't2'),
    bounds_merge_op = Bounds3D.span,
    payload_merge_op = lambda p1, p2: p1 + p2,
    predicate = iou_at_least(0.5)
).filter_size(min_size=240).split(
    lambda intrvl: IntervalSet(intrvl['payload'])
)

In [49]:
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('bounding_boxes', bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'car'
        )),
#         ('empty_spaces', gt_ism),
        ('predicted_spaces', empty_spots_coalesced_and_filtered)
    ]),
    video_endpoint = os.path.join(VIDEO_COLLECTION_BASEURL, VIDEO_FOLDER)
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xcc\x9d\xdd\x8e-\xcbq\x9c_E\xe0\xb5m\xd4\xff\x8f/\…

In [50]:
# Get rid of predictions that have empty predictions on both sides
spots_with_left = empty_parking_spots.filter_against(
    empty_spots_coalesced_and_filtered,
    predicate = and_pred(
        Bounds3D.T(equal()),
        lambda spot1, spot2: 
            abs(spot1['x1'] - spot2['x2']) < (spot1['x2'] - spot1['x1']) / 2
    ),
    window = 0.0,
    progress_bar = True
)
spots_with_right = empty_parking_spots.filter_against(
    empty_spots_coalesced_and_filtered,
    predicate = and_pred(
        Bounds3D.T(equal()),
        lambda spot1, spot2:
            abs(spot1['x2'] - spot2['x1']) < (spot1['x2'] - spot1['x1']) / 2
    ),
    window = 0.0,
    progress_bar = True
)
spots_with_left_and_right = spots_with_left.filter_against(
    spots_with_right,
    predicate = and_pred(
        Bounds3D.T(equal()),
        iou_at_least(0.5)
    ),
    window = 0.0,
    progress_bar = True
)
empty_spots_without_left_and_right = empty_spots_coalesced_and_filtered.minus(
    spots_with_left_and_right,
    predicate = and_pred(
        Bounds3D.T(equal()),
        iou_at_least(0.5)
    ),
    window = 0.0,
    progress_bar = True
)

100%|██████████| 2/2 [00:00<00:00, 46.00it/s]
100%|██████████| 2/2 [00:00<00:00, 45.85it/s]
100%|██████████| 1/1 [00:00<00:00, 3320.91it/s]
100%|██████████| 1/1 [00:00<00:00, 1430.04it/s]


In [51]:
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('bounding_boxes', bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'car'
        )),
#         ('empty_spaces', gt_ism),
        ('predicted_spaces', empty_spots_without_left_and_right)
    ]),
    video_endpoint = os.path.join(VIDEO_COLLECTION_BASEURL, VIDEO_FOLDER)
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xcc\x9d\xdd\x8e-\xcbq\x9c_E\xe0\xb5m\xd4\xff\x8f/\…

In [52]:
spots_next_to_removed_spots = empty_spots_without_left_and_right.filter_against(
    spots_with_left_and_right,
    predicate = and_pred(
        Bounds3D.T(equal()),
        lambda spot1, spot2:
            (abs(spot1['x2'] - spot2['x1']) < (spot1['x2'] - spot1['x1']) / 2 or
            abs(spot1['x1'] - spot2['x2']) < (spot1['x2'] - spot1['x1']) / 2)
    ),
    window = 0.0,
    progress_bar = True
)
empty_spots_stricter = empty_spots_without_left_and_right.minus(
    spots_next_to_removed_spots,
    predicate = and_pred(
        Bounds3D.T(equal()),
        iou_at_least(0.5)
    ),
    window = 0.0,
    progress_bar = True
)

100%|██████████| 1/1 [00:00<00:00, 5053.38it/s]
100%|██████████| 1/1 [00:00<00:00, 1373.83it/s]


# Compute Precision/Recall

In [53]:
def compute_precision_recall(predictions, gt):
    true_positives = predictions.filter_against(
        gt,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_positives = predictions.minus(
        true_positives,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_negatives = gt.minus(
        predictions,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    
    print(false_positives.size())
    
    tp_count = sum(true_positives.size().values())
    fp_count = sum(false_positives.size().values())
    fn_count = sum(false_negatives.size().values())
    
    precision = tp_count / (tp_count + fp_count)
    recall = tp_count / (tp_count + fn_count)
    f1 = 2 * precision * recall / (precision + recall)
    
    return precision, recall, f1, tp_count, fp_count, fn_count

In [54]:
compute_precision_recall(empty_parking_spots, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 73.21it/s]
100%|██████████| 2/2 [00:00<00:00, 10.08it/s]
100%|██████████| 2/2 [00:00<00:00, 13.54it/s]

{1: 19, 3: 57}





(0.6591928251121076, 1.0, 0.7945945945945947, 147, 76, 0)

In [55]:
compute_precision_recall(empty_spots_coalesced_and_filtered, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 121.92it/s]
100%|██████████| 1/1 [00:00<00:00,  7.03it/s]
100%|██████████| 2/2 [00:00<00:00, 15.89it/s]

{3: 14}





(0.9130434782608695, 1.0, 0.9545454545454545, 147, 14, 0)

In [56]:
compute_precision_recall(empty_spots_without_left_and_right, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 122.94it/s]
100%|██████████| 1/1 [00:00<00:00,  7.06it/s]
100%|██████████| 2/2 [00:00<00:00, 15.79it/s]

{3: 14}





(0.9130434782608695, 1.0, 0.9545454545454545, 147, 14, 0)

In [57]:
compute_precision_recall(empty_spots_stricter, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 122.42it/s]
100%|██████████| 1/1 [00:00<00:00,  6.20it/s]
100%|██████████| 2/2 [00:00<00:00, 16.00it/s]

{3: 14}





(0.9130434782608695, 1.0, 0.9545454545454545, 147, 14, 0)

# Calculate AP

In [58]:
def compute_ap(predictions, gt):
    from sklearn.metrics import average_precision_score
    import numpy as np
    
    true_positives = predictions.filter_against(
        gt,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_positives = predictions.minus(
        true_positives,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_negatives = gt.minus(
        predictions,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    
    tp_count = sum(true_positives.size().values())
    fp_count = sum(false_positives.size().values())
    fn_count = sum(false_negatives.size().values())
    
    y_true = np.concatenate([
        np.ones(tp_count),
        np.ones(fn_count),
        np.zeros(fp_count)
    ])
    y_scores = np.concatenate([
        np.ones(tp_count),
        np.zeros(fn_count),
        np.ones(fp_count)
    ])
    
    return average_precision_score(y_true, y_scores)

In [59]:
compute_ap(empty_parking_spots, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 75.35it/s]
100%|██████████| 2/2 [00:00<00:00, 10.39it/s]
100%|██████████| 2/2 [00:00<00:00, 13.33it/s]


0.6591928251121076

In [60]:
compute_ap(empty_spots_coalesced_and_filtered, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 122.67it/s]
100%|██████████| 1/1 [00:00<00:00,  6.41it/s]
100%|██████████| 2/2 [00:00<00:00, 15.60it/s]


0.9130434782608695

In [61]:
compute_ap(empty_spots_without_left_and_right, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 122.48it/s]
100%|██████████| 1/1 [00:00<00:00,  7.11it/s]
100%|██████████| 2/2 [00:00<00:00, 12.80it/s]


0.9130434782608695

In [62]:
compute_ap(empty_spots_stricter, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 124.53it/s]
100%|██████████| 1/1 [00:00<00:00,  7.03it/s]
100%|██████████| 2/2 [00:00<00:00, 15.97it/s]


0.9130434782608695