In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from PIL import Image
import cv2

import os
import json
import math
import random

In [None]:
random.seed(42)
np.random.seed(42)

In [None]:
path = os.path.join('..', 'data', 'images')
images = os.listdir(path)
print(len(images))

In [None]:
save_path = os.path.join('..', 'data')
label_path = os.path.join(save_path, 'labels')
if not os.path.exists(label_path):
    os.makedirs(label_path)

In [None]:
p = os.path.join('..', 'data_original', 'iwildcam2020_train_annotations.json')
with open(p, 'r') as f:
    annot = json.loads(f.read())

In [None]:
category_map = dict()
category_idx_map = dict()
categories = annot['categories']
for i, category in enumerate(categories):
    category_map[category['id']] = {'name': category['name'], 'idx': i}
    category_idx_map[int(category['id'])] = i

In [None]:
# Format the detector results in a more readable way
p = os.path.join('..', 'data_original', 'iwildcam2020_megadetector_results.json')
with open(p, 'r') as f:
    detections = json.loads(f.read())

In [None]:
write_detections = dict()
for i, image in enumerate(detections['images']):
    ID = image['id']
    _detections = image['detections']
    boxes = []
    for _detection in _detections:
        boxes.append(_detection['bbox'])
    write_detections[ID] = boxes

In [None]:
with open(os.path.join(save_path, 'megadetector_results.json'), 'w') as f:
    json.dump(write_detections, f)

In [None]:
cat_images = dict()
cat_count = dict()

In [None]:
annotations = annot['annotations']
_images = annot['images']
for i, annotation in enumerate(annotations):
    # print(im['id'])
    _img = annotation['image_id']
    # cat = category_map[annotation['category_id']]['idx']
    cat = annotation['category_id']
    
    # Check if the file is corrupt. If so, don't consider for
    # further processing
    try:
        detections = write_detections[_img]
    except KeyError:
        continue
    
    imgs = cat_images.get(cat, None)
    if imgs is None:
        cat_images[cat] = [{'image_id': _img, 'category': cat}]
    else:
        cat_images[cat].append({'image_id': _img, 'category': cat})
        
    count = cat_count.get(cat, 0)
    if count == 0:
        cat_count[cat] = 1
    else:
        cat_count[cat] += 1

In [None]:
n_train = dict()
n_val = dict()

for cat, count in cat_count.items():
    _train = math.floor(count * 0.70)
    if _train < 1:
        _train = 1
    _val = count - _train
    n_train[cat] = _train
    n_val[cat] = _val

In [None]:
# make train - test image list
train_images = []
val_images = []
for cat in cat_images.keys():
    # Shuffle
    random.shuffle(cat_images[cat])
    train_images += cat_images[cat][:n_train[cat]]
    val_images += cat_images[cat][n_train[cat]:]

In [None]:
train_image_list = [os.path.join(os.path.abspath(path), '{}.jpg'.format(img['image_id'])) for img in train_images]
random.shuffle(train_image_list)
val_image_list = [os.path.join(os.path.abspath(path), '{}.jpg'.format(img['image_id'])) for img in val_images]
random.shuffle(val_image_list)

In [None]:
with open(os.path.join(save_path, 'train.txt'), 'w') as f:
    f.write('\n'.join(train_image_list))
with open(os.path.join(save_path, 'valid.txt'), 'w') as f:
    f.write('\n'.join(val_image_list))

In [None]:
train_images_df = pd.DataFrame(train_images)
val_images_df = pd.DataFrame(val_images)

In [None]:
train_images_df.to_csv(os.path.join(save_path, 'train_images.csv'), index=False)
val_images_df.to_csv(os.path.join(save_path, 'val_images.csv'), index=False)

In [None]:
with open(os.path.join(save_path, 'category_idx_map.json'), 'w') as f:
    json.dump(category_idx_map, f)

In [None]:
with open(os.path.join(save_path, 'category_map.json'), 'w') as f:
    json.dump(category_map, f)

In [None]:
# Save class names in YOLO compatible format
class_names = ['' for _ in range(len(category_map))]
for class_id, content in category_map.items():
    category_name = content['name']
    idx = content['idx']
    class_names[idx] = category_name
class_names = '\n'.join(class_names)
with open(os.path.join(save_path, 'iWildCam2020.names'), 'w') as f:
    f.write(class_names)

In [None]:
print(len(train_images_df) + len(val_images_df))

In [None]:
for image in train_images:
    img = image['image_id']
    cls = category_idx_map[image['category']]
    try:
        detections = write_detections[img]
    except KeyError:
        continue
    labels = []
    if len(detections) == 0:
        detections = [[0., 0., 0., 0.]]
    for detection in detections:
        x = detection[0]
        y = detection[1]
        w = detection[2]
        h = detection[3]
        
        x_center = x + w / 2.
        y_center = y + h / 2.
        labels.append('{} {} {} {} {}'.format(cls, x_center, y_center, w, h))
    labels = '\n'.join(labels)
    with open(os.path.join(label_path, '{}.txt'.format(img)), 'w') as f:
        f.write(labels)

In [None]:
for image in val_images:
    img = image['image_id']
    cls = category_idx_map[image['category']]
    try:
        detections = write_detections[img]
    except KeyError:
        continue
    labels = []
    if len(detections) == 0:
        detections = [[0., 0., 0., 0.]]
    for detection in detections:
        x = detection[0]
        y = detection[1]
        w = detection[2]
        h = detection[3]
        
        x_center = x + w / 2.
        y_center = y + h / 2.
        labels.append('{} {} {} {} {}'.format(cls, x_center, y_center, w, h))
    labels = '\n'.join(labels)
    with open(os.path.join(label_path, '{}.txt'.format(img)), 'w') as f:
        f.write(labels)

In [None]:
import pickle as pkl
import json

In [None]:
save_path = os.path.join('..', 'data')
with open(os.path.join(save_path, 'category_idx_map.json'), 'r') as f:
    d = json.loads(f.read())

In [None]:
len(d.keys())