In [14]:
from collections import defaultdict

def get_info_from_coco(coco_dict, include_crowdedness=False):
    # if include_crowdedness, you need to make sure no overlapping image_ids if 'images' are merged
    if include_crowdedness:
        counts = defaultdict(int)
    sizes = []
    ars = []
    for annot in coco_dict['annotations']:
        if include_crowdedness:
            img_id = annot['image_id']
            counts[img_id] += 1
        l,t,w,h = annot['bbox']
        ar = h/w #anchor aspect ratios are height/width
        size = ( w * h ) ** 0.5
        ars.append(ar)
        sizes.append(size)
    if include_crowdedness:
        crowdedness = list(counts.values())
        return ars, sizes, crowdedness
    else:
        return ars, sizes

In [15]:
from sklearn.cluster import KMeans 
import numpy as np

def get_clusters(juice_list, k=3):
    kmeans = KMeans(n_clusters=k)
    juice = np.array(juice_list).reshape(-1,1)
    clusters = kmeans.fit(juice)
    cluster_centers = sorted(clusters.cluster_centers_.flatten().tolist())
#     print(clusters)
    print('[[{:.1f}, {:.1f}, {:.1f}]]'.format(*cluster_centers))
    return cluster_centers 

In [16]:
from scipy.stats import describe 
from statistics import median

def describe_stats(juice_list):
    descrip = describe(juice_list)
    median_val = median(juice_list)
    sd = descrip.variance**0.5
    print(f'minmax {descrip.minmax}, mean {descrip.mean}, median {median_val}, sd {sd}')
    return descrip.minmax, descrip.mean, median_val, sd

In [17]:
def process(coco_dict, include_crowdedness):
    res = get_info_from_coco(coco_dict, include_crowdedness=include_crowdedness)
    ars_clusters = get_clusters(res[0])    
    size_clusters = get_clusters(res[1])
    if include_crowdedness:
        crowd_minmax, crowd_mean, crowd_med, crowd_sd = describe_stats(res[2])
        return ars_clusters, size_clusters, crowd_minmax, crowd_mean, crowd_med, crowd_sd
    else:
        return ars_clusters, size_clusters

In [18]:
import json
def load_coco_jsons(json_list):
    all_coco_dict = {'images':[], 'annotations':[]}
    for p in json_list:
        with open(p, 'r') as f:
            coco_dict = json.load(f)
            all_coco_dict['images'].extend(coco_dict['images'])
            all_coco_dict['annotations'].extend(coco_dict['annotations'])      
    return all_coco_dict

In [20]:
all_jsons = [
    '/media/dh/HDD/persdet/EuroCity_Persons/data/ECP_day/day_all.json',
    '/media/dh/HDD/persdet/EuroCity_Persons/data/ECP_night/night_all.json'
]

In [21]:
all_coco_dict = load_coco_jsons(all_jsons)
ars_clusters, size_clusters = process(all_coco_dict, include_crowdedness=False)
print(ars_clusters)

[[2.4, 3.3, 8.3]]
[[40.7, 114.8, 270.6]]
[2.3570537479387808, 3.3154328892114195, 8.26113462254997]


In [22]:
coco_dict = load_coco_jsons(['/media/dh/HDD/persdet/EuroCity_Persons/data/ECP_day/day_all.json'])
process(coco_dict, include_crowdedness=True)

[[2.3, 3.3, 8.5]]
[[39.7, 112.7, 269.2]]
minmax (1, 68), mean 6.18631583429131, median 4, sd 6.1519086317902305


([2.3464482017587267, 3.3133079412540143, 8.467081401730354],
 [39.70070069899383, 112.66212355417952, 269.20293951200006],
 (1, 68),
 6.18631583429131,
 4,
 6.1519086317902305)

In [24]:
coco_dict = load_coco_jsons(['/media/dh/HDD/persdet/EuroCity_Persons/data/ECP_night/night_all.json'])
process(coco_dict, include_crowdedness=True)

[[2.2, 2.9, 3.7]]
[[45.2, 118.9, 267.6]]
minmax (1, 48), mean 6.662465339047139, median 4, sd 6.55515105222178


([2.1565155047185782, 2.878856071602998, 3.7134438242302177],
 [45.21649622309776, 118.8915159434832, 267.63120321083375],
 (1, 48),
 6.662465339047139,
 4,
 6.55515105222178)

In [19]:
coco_dict = load_coco_jsons(['/media/dh/HDD/coco/annotations/instances_val2017.json'])
process(coco_dict, include_crowdedness=True)

[[0.9, 2.4, 5.2]]
[[41.3, 166.2, 371.6]]
minmax (1, 63), mean 7.427504038772213, median 4.0, sd 7.439433952242311


([0.8677967583504549, 2.377234663758716, 5.209625996250921],
 [41.26471535368576, 166.2468774628651, 371.55026392354694],
 (1, 63),
 7.427504038772213,
 4.0,
 7.439433952242311)

In [12]:
coco_dict = load_coco_jsons(['/media/dh/HDD/coco/annotations/instances_train2017.json'])
process(coco_dict, include_crowdedness=False)

[[0.9, 2.5, 5.4]]
[[41.9, 170.0, 377.1]]


([0.8752519263080673, 2.4507500093109047, 5.443212345500358],
 [41.85086036093929, 170.02619322870692, 377.0692509236443])

In [27]:
coco_persons = [
    '/media/dh/HDD/coco/annotations/person_keypoints_train2017.json',
    '/media/dh/HDD/coco/annotations/person_keypoints_val2017.json'
]
all_coco_dict = load_coco_jsons(coco_persons)
ars_clusters, size_clusters = process(all_coco_dict, include_crowdedness=False)

[[1.2, 2.5, 4.4]]
[[41.7, 171.2, 355.6]]
