In [1]:
%load_ext autoreload
%autoreload 2
from rekall import Interval, IntervalSet, IntervalSetMapping, Bounds3D
from rekall.predicates import *
from vgrid import VGridSpec, VideoMetadata, VideoBlockFormat, FlatFormat
from vgrid_jupyter import VGridWidget
import urllib3, requests, os
import pickle
from tqdm import tqdm
from PIL import Image
import matplotlib.pyplot as plt

In [2]:
# Hack to disable warnings about olimar's certificate
urllib3.disable_warnings()

VIDEO_COLLECTION_BASEURL = "https://olimar.stanford.edu/hdd/parking_lot/user_study"
VIDEO_METADATA_FILENAME = 'metadata.json'

# Load video file metadata
video_metadata = [ VideoMetadata(v['filename'], id=v['id'], fps=v['fps'],
                                 num_frames=v['num_frames'], width=v['width'],
                                 height=v['height'])
                  for v in requests.get(os.path.join(
                      VIDEO_COLLECTION_BASEURL, VIDEO_METADATA_FILENAME),
                                        verify=False).json() ]

VIDEO_FOLDER = 'videos'
BBOX_FOLDER = 'bboxes'
GT_FOLDER = 'empty_spaces'

In [3]:
dev_set = requests.get(
    os.path.join(VIDEO_COLLECTION_BASEURL, 'dev.txt'), verify=False
).content.decode('utf-8').strip().split('\n')
test_set = requests.get(
    os.path.join(VIDEO_COLLECTION_BASEURL, 'test.txt'), verify=False
).content.decode('utf-8').strip().split('\n')

In [4]:
video_metadata_dev = [
    vm
    for vm in video_metadata if vm.path in test_set
]

In [5]:
interval = 30
bboxes = [
    pickle.loads(requests.get(
        os.path.join(
            os.path.join(VIDEO_COLLECTION_BASEURL, BBOX_FOLDER),
            os.path.join(vm.path[:-4], 'bboxes.pkl')
        ),
        verify=False
    ).content)
    for vm in video_metadata_dev
]
bboxes_ism = IntervalSetMapping({
    metadata.id: IntervalSet([
        Interval(
            Bounds3D(
                t1 = 30 * i / metadata.fps,
                t2 = 30 * (i + interval) / metadata.fps,
                x1 = bbox[0] / metadata.width,
                x2 = bbox[2] / metadata.width,
                y1 = bbox[1] / metadata.height,
                y2 = bbox[3] / metadata.height
            ),
            payload = { 'class': bbox[4], 'score': bbox[5] }
        )
        for i, frame in enumerate(bbox_frame_list) if (i % interval == 0)
        for bbox in frame
    ])
    for bbox_frame_list, metadata in tqdm(zip(bboxes, video_metadata_dev),
                                          total = len(bboxes))
})

100%|██████████| 2/2 [00:00<00:00, 10.26it/s]


In [6]:
empty_parking_spaces = [
    pickle.loads(requests.get(
        os.path.join(
            os.path.join(VIDEO_COLLECTION_BASEURL, GT_FOLDER),
            os.path.join(vm.path[:-4], 'gt.pkl')
        ),
        verify=False
    ).content)
    for vm in video_metadata_dev
]
gt_ism = IntervalSetMapping({
    metadata.id: IntervalSet([
        Interval(
            Bounds3D(
                t1 = 30 * i / metadata.fps,
                t2 = 30 * (i + interval) / metadata.fps,
                x1 = bbox[0] / metadata.width,
                x2 = bbox[2] / metadata.width,
                y1 = bbox[1] / metadata.height,
                y2 = bbox[3] / metadata.height
            )
        )
        for i, frame in enumerate(space_frame_list) if (i % interval == 0)
        for bbox in frame
    ])
    for space_frame_list, metadata in tqdm(zip(empty_parking_spaces, video_metadata_dev),
                                          total = len(bboxes))
})

100%|██████████| 2/2 [00:00<00:00, 136.66it/s]


In [7]:
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('bounding_boxes', bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'car'
        )),
        ('empty_spaces', gt_ism)
    ]),
    video_endpoint = os.path.join(VIDEO_COLLECTION_BASEURL, VIDEO_FOLDER)
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xcc\xbd\xdd\xaee\xcbq\xa5\xf7*\r^\xdbF\xfe\xff\xf8…

In [8]:
bboxes_ism.get_grouped_intervals().keys()

dict_keys([3, 1])

In [9]:
first_key = sorted(list(bboxes_ism.get_grouped_intervals().keys()))[0]

In [10]:
parking_spot_candidates = bboxes_ism[first_key].filter(
    lambda intrvl: intrvl['t1'] == 0.0 and intrvl['payload']['class'] == 'car'
)

In [11]:
video_durations = bboxes_ism.coalesce(
    axis=('t1', 't2'),
    bounds_merge_op=Bounds3D.span
)

In [12]:
video_durations.size()

{3: 1, 1: 1}

In [13]:
parking_spots = IntervalSetMapping({
    key: IntervalSet([
        Interval(Bounds3D(
            t1 = t,
            t2 = t + interval,
            x1 = parking_spot['x1'],
            x2 = parking_spot['x2'],
            y1 = parking_spot['y1'],
            y2 = parking_spot['y2']
        ))
        for parking_spot in parking_spot_candidates.get_intervals()
        for t in range(0, int(video_durations[key].get_intervals()[0]['t2']), interval)
    ])
    for key in video_durations
})

In [14]:
parking_spots.size()

{1: 572, 3: 1222}

In [15]:
empty_parking_spots = parking_spots.minus(
    bboxes_ism.filter(
        lambda intrvl: True
    ),
    predicate = and_pred(
        Bounds3D.T(overlaps()),
        Bounds3D.X(overlaps()),
        Bounds3D.Y(overlaps()),
        iou_at_least(0.25)
    ),
    window=0.0,
    progress_bar=True
)

100%|██████████| 2/2 [00:33<00:00, 13.05s/it]


In [16]:
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('bounding_boxes', bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'car'
        )),
#         ('empty_spaces', gt_ism),
        ('predicted_spaces', empty_parking_spots)
    ]),
    video_endpoint = os.path.join(VIDEO_COLLECTION_BASEURL, VIDEO_FOLDER)
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xd4\x9d\xdd\x8em\xc9q\x9c_E\xe0\xb5m\xd4\xff\x8f/\…

In [17]:
# Get rid of predictions that are empty for less than four minutes
empty_spots_coalesced_and_filtered = empty_parking_spots.map(
    lambda intrvl: Interval(intrvl['bounds'], [intrvl])
).coalesce(
    ('t1', 't2'),
    bounds_merge_op = Bounds3D.span,
    payload_merge_op = lambda p1, p2: p1 + p2,
    predicate = iou_at_least(0.5)
).filter_size(min_size=240).split(
    lambda intrvl: IntervalSet(intrvl['payload'])
)

In [18]:
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('bounding_boxes', bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'car'
        )),
#         ('empty_spaces', gt_ism),
        ('predicted_spaces', empty_spots_coalesced_and_filtered)
    ]),
    video_endpoint = os.path.join(VIDEO_COLLECTION_BASEURL, VIDEO_FOLDER)
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xcc\x9d\xdd\x8e-\xcbq\x9c_E\xe0\xb5m\xd4\xff\x8f/\…

In [19]:
# Get rid of predictions that have empty predictions on both sides
spots_with_left = empty_parking_spots.filter_against(
    empty_spots_coalesced_and_filtered,
    predicate = and_pred(
        Bounds3D.T(equal()),
        lambda spot1, spot2: 
            abs(spot1['x1'] - spot2['x2']) < (spot1['x2'] - spot1['x1']) / 2
    ),
    window = 0.0,
    progress_bar = True
)
spots_with_right = empty_parking_spots.filter_against(
    empty_spots_coalesced_and_filtered,
    predicate = and_pred(
        Bounds3D.T(equal()),
        lambda spot1, spot2:
            abs(spot1['x2'] - spot2['x1']) < (spot1['x2'] - spot1['x1']) / 2
    ),
    window = 0.0,
    progress_bar = True
)
spots_with_left_and_right = spots_with_left.filter_against(
    spots_with_right,
    predicate = and_pred(
        Bounds3D.T(equal()),
        iou_at_least(0.5)
    ),
    window = 0.0,
    progress_bar = True
)
empty_spots_without_left_and_right = empty_spots_coalesced_and_filtered.minus(
    spots_with_left_and_right,
    predicate = and_pred(
        Bounds3D.T(equal()),
        iou_at_least(0.5)
    ),
    window = 0.0,
    progress_bar = True
)

100%|██████████| 2/2 [00:00<00:00, 10.85it/s]
100%|██████████| 2/2 [00:00<00:00, 21.13it/s]
100%|██████████| 1/1 [00:00<00:00, 6710.89it/s]
100%|██████████| 1/1 [00:00<00:00, 306.53it/s]


In [20]:
vgrid_spec = VGridSpec(
    video_meta = video_metadata,
    vis_format = VideoBlockFormat(imaps = [
        ('bounding_boxes', bboxes_ism.filter(
            lambda intrvl: intrvl['payload']['class'] == 'car'
        )),
#         ('empty_spaces', gt_ism),
        ('predicted_spaces', empty_spots_without_left_and_right)
    ]),
    video_endpoint = os.path.join(VIDEO_COLLECTION_BASEURL, VIDEO_FOLDER)
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xcc\x9d\xdd\x8e-\xcbq\x9c_E\xe0\xb5m\xd4\xff\x8f/\…

In [21]:
spots_next_to_removed_spots = empty_spots_without_left_and_right.filter_against(
    spots_with_left_and_right,
    predicate = and_pred(
        Bounds3D.T(equal()),
        lambda spot1, spot2:
            (abs(spot1['x2'] - spot2['x1']) < (spot1['x2'] - spot1['x1']) / 2 or
            abs(spot1['x1'] - spot2['x2']) < (spot1['x2'] - spot1['x1']) / 2)
    ),
    window = 0.0,
    progress_bar = True
)
empty_spots_stricter = empty_spots_without_left_and_right.minus(
    spots_next_to_removed_spots,
    predicate = and_pred(
        Bounds3D.T(equal()),
        iou_at_least(0.5)
    ),
    window = 0.0,
    progress_bar = True
)

100%|██████████| 1/1 [00:00<00:00, 2286.97it/s]
100%|██████████| 1/1 [00:00<00:00, 494.26it/s]


# Compute Precision/Recall

In [22]:
def compute_precision_recall(predictions, gt):
    true_positives = predictions.filter_against(
        gt,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_positives = predictions.minus(
        true_positives,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_negatives = gt.minus(
        predictions,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    
    print(false_positives.size())
    
    tp_count = sum(true_positives.size().values())
    fp_count = sum(false_positives.size().values())
    fn_count = sum(false_negatives.size().values())
    
    precision = tp_count / (tp_count + fp_count)
    recall = tp_count / (tp_count + fn_count)
    f1 = 2 * precision * recall / (precision + recall)
    
    return precision, recall, f1, tp_count, fp_count, fn_count

In [23]:
compute_precision_recall(empty_parking_spots, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 29.84it/s]
100%|██████████| 2/2 [00:00<00:00,  3.69it/s]
100%|██████████| 2/2 [00:00<00:00,  4.55it/s]

{1: 2, 3: 7}





(0.9423076923076923, 1.0, 0.9702970297029703, 147, 9, 0)

In [24]:
compute_precision_recall(empty_spots_coalesced_and_filtered, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 49.73it/s]
100%|██████████| 1/1 [00:00<00:00,  2.27it/s]
100%|██████████| 2/2 [00:00<00:00,  5.04it/s]

{3: 1}





(0.9932432432432432, 1.0, 0.9966101694915254, 147, 1, 0)

In [25]:
compute_precision_recall(empty_spots_without_left_and_right, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 27.22it/s]
100%|██████████| 1/1 [00:00<00:00,  1.68it/s]
100%|██████████| 2/2 [00:00<00:00,  5.23it/s]

{3: 1}





(0.9932432432432432, 1.0, 0.9966101694915254, 147, 1, 0)

In [26]:
compute_precision_recall(empty_spots_stricter, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 49.74it/s]
100%|██████████| 1/1 [00:00<00:00,  2.50it/s]
100%|██████████| 2/2 [00:00<00:00,  4.39it/s]

{3: 1}





(0.9932432432432432, 1.0, 0.9966101694915254, 147, 1, 0)

# Calculate AP

In [27]:
def compute_ap(predictions, gt):
    from sklearn.metrics import average_precision_score
    import numpy as np
    
    true_positives = predictions.filter_against(
        gt,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_positives = predictions.minus(
        true_positives,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_negatives = gt.minus(
        predictions,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    
    tp_count = sum(true_positives.size().values())
    fp_count = sum(false_positives.size().values())
    fn_count = sum(false_negatives.size().values())
    
    y_true = np.concatenate([
        np.ones(tp_count),
        np.ones(fn_count),
        np.zeros(fp_count)
    ])
    y_scores = np.concatenate([
        np.ones(tp_count),
        np.zeros(fn_count),
        np.ones(fp_count)
    ])
    
    return average_precision_score(y_true, y_scores)

In [28]:
compute_ap(empty_parking_spots, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 32.72it/s]
100%|██████████| 2/2 [00:00<00:00,  4.92it/s]
100%|██████████| 2/2 [00:00<00:00,  4.70it/s]


0.9423076923076923

In [29]:
compute_ap(empty_spots_coalesced_and_filtered, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 26.87it/s]
100%|██████████| 1/1 [00:00<00:00,  2.15it/s]
100%|██████████| 2/2 [00:00<00:00,  5.22it/s]


0.9932432432432432

In [30]:
compute_ap(empty_spots_without_left_and_right, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 22.79it/s]
100%|██████████| 1/1 [00:00<00:00,  2.52it/s]
100%|██████████| 2/2 [00:00<00:00,  5.15it/s]


0.9932432432432432

In [31]:
compute_ap(empty_spots_stricter, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 26.00it/s]
100%|██████████| 1/1 [00:00<00:00,  1.77it/s]
100%|██████████| 2/2 [00:00<00:00,  5.01it/s]


0.9932432432432432

In [32]:
# Compute average precision, but use random scores from 0.5 to 1 for positives
def compute_ap_random_scores(prediction, gt):
    from sklearn.metrics import average_precision_score
    import numpy as np
    
    true_positives = prediction.filter_against(
        gt,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_positives = prediction.minus(
        true_positives,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    false_negatives = gt.minus(
        prediction,
        predicate = and_pred(
            Bounds3D.T(equal()),
            iou_at_least(0.5)
        ),
        window = 0.0,
        progress_bar = True
    )
    
    tp_count = sum(true_positives.size().values())
    fp_count = sum(false_positives.size().values())
    fn_count = sum(false_negatives.size().values())
    
    y_true = np.concatenate([
        np.ones(tp_count),
        np.ones(fn_count),
        np.zeros(fp_count)
    ])
    y_scores = np.concatenate([
        (np.random.rand(tp_count) + 1) / 2,
        np.zeros(fn_count),
        (np.random.rand(fp_count) + 1) / 2
    ])
    
    return average_precision_score(y_true, y_scores)

In [33]:
compute_ap_random_scores(empty_parking_spots, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 20.10it/s]
100%|██████████| 2/2 [00:00<00:00,  4.88it/s]
100%|██████████| 2/2 [00:00<00:00,  5.20it/s]


0.9396547513897743

In [34]:
compute_ap_random_scores(empty_spots_coalesced_and_filtered, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 49.59it/s]
100%|██████████| 1/1 [00:00<00:00,  2.59it/s]
100%|██████████| 2/2 [00:00<00:00,  5.26it/s]


0.9925265498350218

In [35]:
compute_ap_random_scores(empty_spots_without_left_and_right, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 29.00it/s]
100%|██████████| 1/1 [00:00<00:00,  2.12it/s]
100%|██████████| 2/2 [00:00<00:00,  5.27it/s]


0.9992756256329199

In [36]:
compute_ap_random_scores(empty_spots_stricter, gt_ism)

100%|██████████| 2/2 [00:00<00:00, 24.62it/s]
100%|██████████| 1/1 [00:00<00:00,  2.17it/s]
100%|██████████| 2/2 [00:00<00:00,  5.06it/s]


0.9931811420487281