In [2]:
%load_ext autoreload
%autoreload 2
from rekall import Interval, IntervalSet, IntervalSetMapping, Bounds3D
from rekall.predicates import *
from vgrid import VGridSpec, VideoMetadata, VideoBlockFormat, FlatFormat
from vgrid_jupyter import VGridWidget
import urllib3, requests, os
import pickle
from tqdm import tqdm
from PIL import Image
import matplotlib.pyplot as plt

In [9]:
# Hack to disable warnings about olimar's certificate
urllib3.disable_warnings()

VIDEO_COLLECTION_BASEURL = "https://olimar.stanford.edu/hdd/parking_lot/user_study"
VIDEO_METADATA_FILENAME = 'metadata.json'

# Load video file metadata
video_metadata = [ VideoMetadata(v['filename'], id=v['id'], fps=v['fps'],
                                 num_frames=v['num_frames'], width=v['width'],
                                 height=v['height'])
                  for v in requests.get(os.path.join(
                      VIDEO_COLLECTION_BASEURL, VIDEO_METADATA_FILENAME),
                                        verify=False).json() ]

VIDEO_FOLDER = 'videos'
BBOX_FOLDER = 'bboxes'
GT_FOLDER = 'empty_spaces'

In [16]:
dev_set = requests.get(
    os.path.join(VIDEO_COLLECTION_BASEURL, 'dev.txt'), verify=False
).content.decode('utf-8').strip().split('\n')
test_set = requests.get(
    os.path.join(VIDEO_COLLECTION_BASEURL, 'test.txt'), verify=False
).content.decode('utf-8').strip().split('\n')

In [18]:
video_metadata_dev = [
    vm
    for vm in video_metadata if vm.path in dev_set
]
video_metadata_test = [
    vm
    for vm in video_metadata if vm.path in test_set
]

In [21]:
interval = 30
bboxes = [
    pickle.loads(requests.get(
        os.path.join(
            os.path.join(VIDEO_COLLECTION_BASEURL, BBOX_FOLDER),
            os.path.join(vm.path[:-4], 'bboxes.pkl')
        ),
        verify=False
    ).content)
    for vm in video_metadata_dev
]
bboxes_ism = IntervalSetMapping({
    metadata.id: IntervalSet([
        Interval(
            Bounds3D(
                t1 = 30 * i / metadata.fps,
                t2 = 30 * (i + interval) / metadata.fps,
                x1 = bbox[0] / metadata.width,
                x2 = bbox[2] / metadata.width,
                y1 = bbox[1] / metadata.height,
                y2 = bbox[3] / metadata.height
            ),
            payload = { 'class': bbox[4], 'score': bbox[5] }
        )
        for i, frame in enumerate(bbox_frame_list) if (i % interval == 0)
        for bbox in frame
    ])
    for bbox_frame_list, metadata in tqdm(zip(bboxes, video_metadata_dev),
                                          total = len(bboxes))
})

100%|██████████| 2/2 [00:00<00:00, 64.13it/s]


In [22]:
empty_parking_spaces = [
    pickle.loads(requests.get(
        os.path.join(
            os.path.join(VIDEO_COLLECTION_BASEURL, GT_FOLDER),
            os.path.join(vm.path[:-4], 'gt.pkl')
        ),
        verify=False
    ).content)
    for vm in video_metadata_dev
]
gt_ism = IntervalSetMapping({
    metadata.id: IntervalSet([
        Interval(
            Bounds3D(
                t1 = 30 * i / metadata.fps,
                t2 = 30 * (i + interval) / metadata.fps,
                x1 = bbox[0] / metadata.width,
                x2 = bbox[2] / metadata.width,
                y1 = bbox[1] / metadata.height,
                y2 = bbox[3] / metadata.height
            )
        )
        for i, frame in enumerate(space_frame_list) if (i % interval == 0)
        for bbox in frame
    ])
    for space_frame_list, metadata in tqdm(zip(empty_parking_spaces, video_metadata_dev),
                                          total = len(bboxes))
})

100%|██████████| 2/2 [00:00<00:00, 880.69it/s]


In [23]:
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('bounding_boxes', bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'car'
        )),
        ('empty_spaces', gt_ism)
    ]),
    video_endpoint = os.path.join(VIDEO_COLLECTION_BASEURL, VIDEO_FOLDER)
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xc4\x9d\xeb\x8ee\xc9q\x9d_\xc5\xe0o[\xc8\xfb\xc5?\…

In [24]:
bboxes_ism.get_grouped_intervals().keys()

dict_keys([2, 0])

In [27]:
first_key = sorted(list(bboxes_ism.get_grouped_intervals().keys()))[0]

In [28]:
parking_spot_candidates = bboxes_ism[first_key].filter(
    lambda intrvl: intrvl['t1'] == 0.0 and intrvl['payload']['class'] == 'car'
)

In [29]:
video_durations = bboxes_ism.coalesce(
    axis=('t1', 't2'),
    bounds_merge_op=Bounds3D.span
)

In [30]:
video_durations.size()

{2: 1, 0: 1}

In [31]:
parking_spots = IntervalSetMapping({
    key: IntervalSet([
        Interval(Bounds3D(
            t1 = t,
            t2 = t + interval,
            x1 = parking_spot['x1'],
            x2 = parking_spot['x2'],
            y1 = parking_spot['y1'],
            y2 = parking_spot['y2']
        ))
        for parking_spot in parking_spot_candidates.get_intervals()
        for t in range(0, int(video_durations[key].get_intervals()[0]['t2']), interval)
    ])
    for key in video_durations
})

In [32]:
parking_spots.size()

{0: 572, 2: 1222}

In [33]:
empty_parking_spots = parking_spots.minus(
    bboxes_ism.filter(
        lambda intrvl: intrvl['payload']['class'] == 'car'
    ),
    predicate = and_pred(
        Bounds3D.T(overlaps()),
        Bounds3D.X(overlaps()),
        Bounds3D.Y(overlaps()),
        iou_at_least(0.25)
    ),
    window=0.0,
    progress_bar=True
)

100%|██████████| 2/2 [00:08<00:00,  3.45s/it]


In [34]:
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('bounding_boxes', bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'car'
        )),
#         ('empty_spaces', gt_ism),
        ('predicted_spaces', empty_parking_spots)
    ]),
    video_endpoint = os.path.join(VIDEO_COLLECTION_BASEURL, VIDEO_FOLDER)
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xdc\xbd\xdd\x8em\xc9u\x9d\xf9*\x06\xaf\xdbB\xfc\xf…

In [35]:
# Get rid of predictions that are empty for less than four minutes
empty_spots_coalesced_and_filtered = empty_parking_spots.map(
    lambda intrvl: Interval(intrvl['bounds'], [intrvl])
).coalesce(
    ('t1', 't2'),
    bounds_merge_op = Bounds3D.span,
    payload_merge_op = lambda p1, p2: p1 + p2,
    predicate = iou_at_least(0.5)
).filter_size(min_size=240).split(
    lambda intrvl: IntervalSet(intrvl['payload'])
)

In [36]:
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('bounding_boxes', bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'car'
        )),
#         ('empty_spaces', gt_ism),
        ('predicted_spaces', empty_spots_coalesced_and_filtered)
    ]),
    video_endpoint = os.path.join(VIDEO_COLLECTION_BASEURL, VIDEO_FOLDER)
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xc4\x9d\xeb\x8ee\xc9q\x9d_\xc5\x98\xdf\xb6\x90\xf7…

In [37]:
# Get rid of predictions that have empty predictions on both sides
spots_with_left = empty_parking_spots.filter_against(
    empty_spots_coalesced_and_filtered,
    predicate = and_pred(
        Bounds3D.T(equal()),
        lambda spot1, spot2: 
            abs(spot1['x1'] - spot2['x2']) < (spot1['x2'] - spot1['x1']) / 2
    ),
    window = 0.0,
    progress_bar = True
)
spots_with_right = empty_parking_spots.filter_against(
    empty_spots_coalesced_and_filtered,
    predicate = and_pred(
        Bounds3D.T(equal()),
        lambda spot1, spot2:
            abs(spot1['x2'] - spot2['x1']) < (spot1['x2'] - spot1['x1']) / 2
    ),
    window = 0.0,
    progress_bar = True
)
spots_with_left_and_right = spots_with_left.filter_against(
    spots_with_right,
    predicate = and_pred(
        Bounds3D.T(equal()),
        iou_at_least(0.5)
    ),
    window = 0.0,
    progress_bar = True
)
empty_spots_without_left_and_right = empty_spots_coalesced_and_filtered.minus(
    spots_with_left_and_right,
    predicate = and_pred(
        Bounds3D.T(equal()),
        iou_at_least(0.5)
    ),
    window = 0.0,
    progress_bar = True
)

100%|██████████| 2/2 [00:00<00:00, 24.84it/s]
100%|██████████| 2/2 [00:00<00:00, 24.62it/s]
100%|██████████| 1/1 [00:00<00:00, 916.79it/s]
100%|██████████| 1/1 [00:00<00:00, 282.71it/s]


In [38]:
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('bounding_boxes', bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'car'
        )),
#         ('empty_spaces', gt_ism),
        ('predicted_spaces', empty_spots_without_left_and_right)
    ]),
    video_endpoint = os.path.join(VIDEO_COLLECTION_BASEURL, VIDEO_FOLDER)
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xc4\x9d\xeb\x8ee\xc9q\x9d_\xc5\x98\xdf\xb6\x90\xf7…

In [39]:
spots_next_to_removed_spots = empty_spots_without_left_and_right.filter_against(
    spots_with_left_and_right,
    predicate = and_pred(
        Bounds3D.T(equal()),
        lambda spot1, spot2:
            (abs(spot1['x2'] - spot2['x1']) < (spot1['x2'] - spot1['x1']) / 2 or
            abs(spot1['x1'] - spot2['x2']) < (spot1['x2'] - spot1['x1']) / 2)
    ),
    window = 0.0,
    progress_bar = True
)
empty_spots_stricter = empty_spots_without_left_and_right.minus(
    spots_next_to_removed_spots,
    predicate = and_pred(
        Bounds3D.T(equal()),
        iou_at_least(0.5)
    ),
    window = 0.0,
    progress_bar = True
)

100%|██████████| 1/1 [00:00<00:00, 1567.96it/s]
100%|██████████| 1/1 [00:00<00:00, 183.72it/s]


# Compute Precision/Recall

In [40]:
def compute_precision_recall(predictions, gt):
    true_positives = predictions.filter_against(
        gt,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_positives = predictions.minus(
        true_positives,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_negatives = gt.minus(
        predictions,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    
    print(false_positives.size())
    
    tp_count = sum(true_positives.size().values())
    fp_count = sum(false_positives.size().values())
    fn_count = sum(false_negatives.size().values())
    
    precision = tp_count / (tp_count + fp_count)
    recall = tp_count / (tp_count + fn_count)
    f1 = 2 * precision * recall / (precision + recall)
    
    return precision, recall, f1, tp_count, fp_count, fn_count

In [41]:
compute_precision_recall(empty_parking_spots, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 42.62it/s]
100%|██████████| 2/2 [00:00<00:00,  5.16it/s]
100%|██████████| 2/2 [00:00<00:00,  5.07it/s]

{0: 13, 2: 79}





(0.7097791798107256, 1.0, 0.8302583025830258, 225, 92, 0)

In [42]:
compute_precision_recall(empty_spots_coalesced_and_filtered, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 50.70it/s]
100%|██████████| 1/1 [00:00<00:00,  2.86it/s]
100%|██████████| 2/2 [00:00<00:00,  6.23it/s]

{2: 33}





(0.872093023255814, 1.0, 0.9316770186335404, 225, 33, 0)

In [43]:
compute_precision_recall(empty_spots_without_left_and_right, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 52.30it/s]
100%|██████████| 1/1 [00:00<00:00,  2.61it/s]
100%|██████████| 2/2 [00:00<00:00,  6.10it/s]

{2: 33}





(0.872093023255814, 1.0, 0.9316770186335404, 225, 33, 0)

In [44]:
compute_precision_recall(empty_spots_stricter, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 54.11it/s]
100%|██████████| 1/1 [00:00<00:00,  2.98it/s]
100%|██████████| 2/2 [00:00<00:00,  6.26it/s]

{2: 30}





(0.8809523809523809, 0.9866666666666667, 0.930817610062893, 222, 30, 3)

# Calculate AP

In [45]:
def compute_ap(predictions, gt):
    from sklearn.metrics import average_precision_score
    import numpy as np
    
    true_positives = predictions.filter_against(
        gt,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_positives = predictions.minus(
        true_positives,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_negatives = gt.minus(
        predictions,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    
    tp_count = sum(true_positives.size().values())
    fp_count = sum(false_positives.size().values())
    fn_count = sum(false_negatives.size().values())
    
    y_true = np.concatenate([
        np.ones(tp_count),
        np.ones(fn_count),
        np.zeros(fp_count)
    ])
    y_scores = np.concatenate([
        np.ones(tp_count),
        np.zeros(fn_count),
        np.ones(fp_count)
    ])
    
    return average_precision_score(y_true, y_scores)

In [46]:
compute_ap(empty_parking_spots, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 42.84it/s]
100%|██████████| 2/2 [00:00<00:00,  5.29it/s]
100%|██████████| 2/2 [00:00<00:00,  5.14it/s]


0.7097791798107256

In [47]:
compute_ap(empty_spots_coalesced_and_filtered, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 49.38it/s]
100%|██████████| 1/1 [00:00<00:00,  2.89it/s]
100%|██████████| 2/2 [00:00<00:00,  6.44it/s]


0.872093023255814

In [48]:
compute_ap(empty_spots_without_left_and_right, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 52.15it/s]
100%|██████████| 1/1 [00:00<00:00,  2.90it/s]
100%|██████████| 2/2 [00:00<00:00,  6.52it/s]


0.872093023255814

In [49]:
compute_ap(empty_spots_stricter, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 53.05it/s]
100%|██████████| 1/1 [00:00<00:00,  3.04it/s]
100%|██████████| 2/2 [00:00<00:00,  6.46it/s]


0.8809710550887021

In [54]:
# Compute average precision, but use random scores from 0.5 to 1 for positives
def compute_ap_random_scores(prediction, gt):
    from sklearn.metrics import average_precision_score
    import numpy as np
    
    true_positives = prediction.filter_against(
        gt,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_positives = prediction.minus(
        true_positives,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_negatives = gt.minus(
        prediction,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    
    tp_count = sum(true_positives.size().values())
    fp_count = sum(false_positives.size().values())
    fn_count = sum(false_negatives.size().values())
    
    y_true = np.concatenate([
        np.ones(tp_count),
        np.ones(fn_count),
        np.zeros(fp_count)
    ])
    y_scores = np.concatenate([
        (np.random.rand(tp_count) + 1) / 2,
        np.zeros(fn_count),
        (np.random.rand(fp_count) + 1) / 2
    ])
    
    return average_precision_score(y_true, y_scores)

In [57]:
compute_ap_random_scores(empty_parking_spots, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 42.34it/s]
100%|██████████| 2/2 [00:00<00:00,  4.97it/s]
100%|██████████| 2/2 [00:00<00:00,  5.09it/s]


0.7010711551187949

In [58]:
compute_ap_random_scores(empty_spots_coalesced_and_filtered, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 51.29it/s]
100%|██████████| 1/1 [00:00<00:00,  2.89it/s]
100%|██████████| 2/2 [00:00<00:00,  6.45it/s]


0.9211044268202175

In [61]:
compute_ap_random_scores(empty_spots_without_left_and_right, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 51.15it/s]
100%|██████████| 1/1 [00:00<00:00,  2.82it/s]
100%|██████████| 2/2 [00:00<00:00,  5.71it/s]


0.890337579963384

In [69]:
compute_ap_random_scores(empty_spots_stricter, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 53.49it/s]
100%|██████████| 1/1 [00:00<00:00,  2.71it/s]
100%|██████████| 2/2 [00:00<00:00,  6.46it/s]


0.8763985981882618