### load dataset ground truth

In [None]:
import json
import pandas as pd
from tabulate import tabulate

GT_PATH = '/path/to/dronewaste/dronewaste_v1.0.json'

with open(GT_PATH, 'r') as f:
    gt = json.load(f)

# enumerate categories
categories = [c['name'] for c in gt['categories']]

images = []
for img in gt['images']:
    images.append([
        img['id'],
        img['site'],
        len([a for a in gt['annotations'] if a['image_id'] == img['id']]),
    ])

# images dataframe
dfi = pd.DataFrame(images, columns=['id', 'site', 'annots'])

annotations = []
for ann in gt['annotations']:
    annotations.append([
        ann['id'],
        ann['image_id'],
        categories[ann['category_id']],
        dfi[dfi['id'] == ann['image_id']]['site'].iloc[0],
        ann['area'],
        ann['iscrowd'],
    ])

# annotations dataframe
dfa = pd.DataFrame(annotations, columns=['id', 'image_id', 'category', 'site', 'area', 'iscrowd'])

print(GT_PATH)
dfa.head()

### compute general statistics

In [None]:
n_sites = len(dfi['site'].unique())
n_cats = len(dfa['category'].unique())
n_imgs = len(dfi)
n_empty = len(dfi[dfi['annots'] == 0])
n_annotated = len(dfi[dfi['annots'] > 0])
n_annots = len(dfa)
n_instances = len(dfa[dfa['iscrowd'] == 0])
n_crowds = len(dfa[dfa['iscrowd'] == 1])

print(f'sites: {n_sites}, cats: {n_cats}')
print(f'imgs: {n_imgs}, empty: {n_empty}, annotated: {n_annotated}')
print(f'annots: {n_annots}, instances: {n_instances}, crowds: {n_crowds}')

### count instance and crowd annotations

In [None]:
# count annotations, instances and crowds by category
annots = ['annots']
instances = ['instances']
crowds = ['crowds']
for cat in categories:
    cat_annots = dfa[dfa['category'] == cat]
    annots.append(len(cat_annots))
    instances.append(len(cat_annots[cat_annots['iscrowd'] == 0]))
    crowds.append(len(cat_annots[cat_annots['iscrowd'] == 1]))

print(tabulate([annots, instances, crowds], headers=categories))

### count annotations by site and by category

In [None]:
# filter instance annotations (which are not crowd)
dfnc = dfa[dfa['iscrowd'] == 0]

# count annotations by category and site
counts = []
for site in sorted(dfi['site'].unique()):
    site_row = [site]
    for cat in categories:
        # count annotations for current site and category
        cat_site = dfnc[(dfnc['site'] == site) & (dfnc['category'] == cat)]
        site_row.append(len(cat_site))
    counts.append(site_row)

print(tabulate(counts, headers=['site'] + categories))