In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'  # default is ‘last_expr'

%load_ext autoreload
%autoreload 2

import sys
sys.path.append('/Users/siyuyang/Source/repos/GitHub_MSFT/CameraTraps')  # append this repo to PYTHONPATH

import json
import os
from collections import Counter, defaultdict
from random import sample
import math

from tqdm import tqdm

from data_management.megadb.schema import sequences_schema_check
from data_management.annotations.add_bounding_boxes_to_megadb import *
from data_management.megadb.converters.cct_to_megadb import make_cct_embedded, process_sequences, write_json

In [None]:
path_to_output_public = '/Users/siyuyang/Source/temp_data/CameraTrap/megadb_from_cct/island_conservation_200529/island_conservation_200529_megadb.json'
path_to_output_private = '/Users/siyuyang/Source/temp_data/CameraTrap/megadb_from_cct/island_conservation_200529/island_conservation_200529_private_megadb.json'

dataset_name = 'islandconservation_200529'
dataset_private_name = 'islandconservation_200529_private'
label_map = {'human': 'person', 'vehicle': 'vehicle', 'empty': 'empty'}

# Path to the CCT json, or a loaded json object
path_to_image_cct = '/Users/siyuyang/Source/temp_data/CameraTrap/megadb_from_cct/island_conservation_200529/island_conservation.json'
path_to_bbox_cct = '/Users/siyuyang/Source/temp_data/CameraTrap/megadb_from_cct/island_conservation_200529/island_conservation.json'
assert not (path_to_image_cct is None and path_to_bbox_cct is None)

In [None]:
embedded = make_cct_embedded(image_db=path_to_image_cct, bbox_db=path_to_bbox_cct)

In [None]:
sequences = process_sequences(embedded, dataset_name)

In [None]:
# by Vardhan:
for index, entry in enumerate(sequences):
    for prop in ['id', 'bbox', 'class', 'conf']:
        if prop == 'conf' or prop == 'class':
            sequences[index]['images'][0][prop] = entry[prop]
        elif prop == 'bbox':
            for pos, ele in enumerate(entry[prop]):
                ele['class'] = ele['category']
                ele['category'] = label_map.get(ele['category'], 'animal')
                entry[prop][pos] = ele
            sequences[index]['images'][0][prop] = entry[prop]
        del entry[prop]
    sequences[index] = entry

In [None]:
sequences[100]
sequences[-100]

Drop the `conf` field

In [None]:
for seq in sequences:
    for im in seq['images']:
        del im['conf']

In [None]:
sequences[13000]

In [None]:
# separate out the private vs public sets because the images are in different containers
sequences_non_human = []
sequences_human = []

for seq in sequences:
    if 'human' in seq['images'][0]['class']:
        sequences_human.append(seq)
    else:
        sequences_non_human.append(seq)
        
len(sequences_non_human)
len(sequences_human)

There are exactly 6178 images in the 'human' folder in the private container, so looks good.

In [None]:
# change the dataset name field of the human set

for seq in sequences_human:
    seq['dataset'] = dataset_private_name

In [None]:
sequences_non_human[1001]
sequences_human[1002]

In [None]:
sequences_schema_check.sequences_schema_check(sequences_non_human)

In [None]:
sequences_schema_check.sequences_schema_check(sequences_human)

In [None]:
write_json(path_to_output_public, sequences_non_human)

In [None]:
write_json(path_to_output_private, sequences_human)

## Add width and height back in for the non-human set
Using existing width and height to avoid downloading each image.

In [None]:
from data_management.cct_json_utils import IndexedJsonDb

In [None]:
cct_json_db = IndexedJsonDb(path_to_image_cct)

In [None]:
file_name_to_dims = {}
for image_id, item in cct_json_db.image_id_to_image.items():
    file_name_to_dims[item['file_name']] = item

In [None]:
def round_to_int(f):
    return int(round(f))

In [None]:
for seq in sequences_non_human:
    for im in seq['images']:
        item = file_name_to_dims[im['file']]
        im['width'] = item['width']
        im['height'] = item['height']
        
        image_width = im['width']
        image_height = im['height']
        
        for b in im['bbox']:
            coords = b['bbox']
            x = max(round_to_int(coords[0] * image_width), 0)
            y = max(round_to_int(coords[1] * image_height), 0)
            box_w = min(round_to_int(coords[2] * image_width), image_width)
            box_h = min(round_to_int(coords[3] * image_height), image_height)

            b['bbox'] = [x, y, box_w, box_h]

In [None]:
write_json('/Users/siyuyang/Source/temp_data/CameraTrap/megadb_from_cct/island_conservation_200529/megadb_to_cct_files/island_conservation_200529_megadb_abs.json', sequences_non_human)

Command for converting from these MegaDB entries back to CCT ("one source of truth"):

```
python data_management/megadb/converters/megadb_to_cct.py "islandconservation_200529" /Users/siyuyang/Source/temp_data/CameraTrap/megadb_from_cct/island_conservation_200529/megadb_to_cct_files/island_conservation_200529_megadb_abs.json /Users/siyuyang/Source/temp_data/CameraTrap/megadb_from_cct/island_conservation_200529/megadb_to_cct_files/island_conservation_200529_megadb_to_cct.json
```

```
...
Final CCT DB has 154379 image entries, and 213562 annotation entries.
```