In [7]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'  # default is ‘last_expr’

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
import sys
sys.path.append('/data/home/marmot/camtrap/PyCharm/CameraTraps-benchmark')

In [111]:
import json
import os

from PIL import Image
from tqdm import tqdm

from data_management.cct_json_utils import CameraTrapJsonUtils, IndexedJsonDb
from visualization.visualization_utils import plot_stacked_bar_chart, render_db_bounding_boxes, resize_image

## Exclude insect and distant birds from SS bbox annotations

A small number of images with insects and distant birds still have bounding boxes around these. Manually filtering these out to form the `20190903` version of `SnapshotSerengetiBboxes_20190409.json`.

In [6]:
with open('/beaver_disk/camtrap/ss_season1/original/SnapshotSerengetiBboxes_20190409.json') as f:
    original = json.load(f)

In [11]:
len(original['images'])
len(original['annotations'])
original['categories']
original['info']

82938

147026

[{'id': 1, 'name': 'animal'},
 {'id': 2, 'name': 'person'},
 {'id': 3, 'name': 'group'},
 {'id': 4, 'name': 'vehicle'}]

{'contributor': 'SMB',
 'date_created': '2019-04-14',
 'description': 'Reprocessed bounding box annotations for Snapshot Serengeti seasons 1 to 6.',
 'version': '20190409',
 'year': 2018}

In [8]:
ss_dir = '/home/marmot/camtrap/mnt/snapshot-serengeti-v2/SER'

In [77]:
area_threshold_pixels = 300
upper_area_threshold_pixels = 400

problem_annos = []
num_group = 0

for a in tqdm(original['annotations']):
    if a['category_id'] == 3:
        num_group += 1
        continue
    
    assert a['category_id'] in [1, 2]
    
    _, _, w, h = a['bbox']
    area = w * h
    if area > area_threshold_pixels and area < upper_area_threshold_pixels:
        problem_annos.append(a)
len(problem_annos)

100%|██████████| 147026/147026 [00:00<00:00, 1013764.18it/s]


614

area_threshold_pixels, num of problem_annos

100, 575

200, 1226

300, 1226 + 601 = 1827

400, 1827 + 614 = 2441

In [78]:
rendered_problem_annos = []

for i in tqdm(problem_annos):
    image = Image.open(os.path.join(ss_dir, i['image_id'] + '.JPG'))
    category = i['category_id']
    render_db_bounding_boxes([i['bbox']] , [category], image, original_size=None, label_map=None, thickness=4)
    image = resize_image(image, 1000)
    rendered_problem_annos.append((i['id'], i['image_id'], image))

100%|██████████| 614/614 [02:34<00:00,  3.23it/s]


In [79]:
len(rendered_problem_annos)

614

In [None]:
for anno_id, image_id, im in rendered_problem_annos[550:]:
    print(anno_id)
    print(image_id)
    im
    print('')

In [22]:
for a in tqdm(original['annotations']):
    if a['id'] == 'jGmjS1533032672368':
        print(a)
        break

  0%|          | 0/147026 [00:00<?, ?it/s]

{'id': 'jGmjS1533032672368', 'category_id': 1, 'image_id': 'S1/J04/J04_R2/S1_J04_R2_PICT0968', 'bbox': [271.9229798279782, 345.93088361620994, 10.652652818003354, 14.577314382531071]}





## Update database JSON

In [94]:
# these are annotation IDs for annotation entries to delete
with open('/beaver_disk/camtrap/ss_season1/manual_filter/SS_annos_to_delete.csv') as f:
    annos_to_del = f.read().splitlines()

In [97]:
len(annos_to_del)
annos_to_del = [a for a in annos_to_del if a != '']
len(annos_to_del)

380

353

In [101]:
# these are images where all annotations of bbox smaller than 400 sq pixel can be deleted for all such boxes on the image.
with open('/beaver_disk/camtrap/ss_season1/manual_filter/SS_annos_to_delete_image_with_small_boxes.csv') as f:
    images_annos_to_del = f.read().splitlines()

In [103]:
len(images_annos_to_del)
images_annos_to_del = [a for a in images_annos_to_del if a != '']
len(images_annos_to_del)

213

127

In [106]:
annos_to_del = set(annos_to_del)
images_annos_to_del = set(images_annos_to_del)

len(annos_to_del)
len(images_annos_to_del)

353

124

Exclude annotations manually filtered out from the new version of annotation entries.

In [108]:
print('Originally had {} annotation entries.'.format(len(original['annotations'])))

new_annotations = []
num_bboxes_excluded = 0

for a in tqdm(original['annotations']):
    # if in list of annotation IDs, exclude this entry
    if a['id'] in annos_to_del:
        num_bboxes_excluded += 1
        continue
    
    if a['category_id'] == 3:
        new_annotations.append(a)
    else:
        assert a['category_id'] in [1, 2]

        _, _, w, h = a['bbox']
        area = w * h
        if area < 400 and a['image_id'] in images_annos_to_del:
            continue
        else:
            new_annotations.append(a)
print('After the exclusions, now have {} annotation entries.'.format(len(new_annotations)))

 42%|████▏     | 62446/147026 [00:00<00:00, 624448.69it/s]

Originally had 147026 annotation entries.


100%|██████████| 147026/147026 [00:00<00:00, 612427.62it/s]

After the exclusions, now have 146359 annotation entries.





In [109]:
147026 - 146359  # number of bboxes excluded

667

Make no changes to the image entries. If an image no longer has annotation entries, it is confirmed empty.

In [114]:
new_info = {
    'contributor': 'Sara Beery, this version updated by Siyu Yang',
    'date_created': '2019-09-03',
    'description': 'Reprocessed bounding box annotations for Snapshot Serengeti seasons 1 to 6, with remaining small insect and distant bird bboxes smaller than 400 sq pixel excluded manually.',
    'version': '20190903',
    'year': 2019
}

In [115]:
new_db = {
    'info': new_info,
    'categories': original['categories'],
    'annotations': new_annotations,
    'images': original['images']
}

In [119]:
len(new_db['annotations'])
len(new_db['images'])

146359

82938

In [120]:
new_db = CameraTrapJsonUtils.order_db_keys(new_db)

In [121]:
with open('/beaver_disk/camtrap/ss_season1/original/SnapshotSerengetiBboxes_20190903.json', 'w') as f:
    json.dump(new_db, f, indent=1)