This notebook uses K-means algorithm to compute k anchor box sizes by outputing the average widths and heights of bounding boxes in each cluster.

In [1]:
import os

path = '../annotations_train'
ann_files = [os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]

In [2]:
import xml.etree.ElementTree as ET

ws = []
hs = []
for i, file in enumerate(ann_files):
        # actual parsing 
        in_file = open(file)
        tree=ET.parse(in_file)
        root = tree.getroot()
        
        for obj in root.iter('object'):
            name = obj.find('name').text

            xmlbox = obj.find('bndbox')
            xn = int(float(xmlbox.find('xmin').text))
            xx = int(float(xmlbox.find('xmax').text))
            yn = int(float(xmlbox.find('ymin').text))
            yx = int(float(xmlbox.find('ymax').text))
            
            w = xx - xn
            h = yx - yn
            ws.append(w)
            hs.append(h)

In [3]:
import pandas as pd

d = {'w': ws, 'h': hs}
df = pd.DataFrame(data=d)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 2 columns):
w    800 non-null int64
h    800 non-null int64
dtypes: int64(2)
memory usage: 12.6 KB


In [4]:
n_grid = 13
W = 800
H = 533
grid_w = W / n_grid
grid_h = H / n_grid

In [5]:
import numpy as np
from sklearn import cluster

for k in range(1, 11):
    model = cluster.KMeans(n_clusters=k, random_state=0).fit(df)
    dist = model.transform(df)
    min_dist = [min(d) for d in dist]
    print('k = {}'.format(k))
    print(model.cluster_centers_ / [grid_w, grid_h])
    print(model.cluster_centers_[:,0]/model.cluster_centers_[:,1])
    print('avg dist = {}'.format(np.average(min_dist)))

k = 1
[[6.88670937 5.81518293]]
[1.77750632]
avg dist = 98.3352180436405
k = 2
[[8.20402848 6.80191417]
 [5.60191667 4.85281542]]
[1.81033434 1.73262928]
avg dist = 60.96775771899808
k = 3
[[4.98782143 4.27317073]
 [8.66702778 7.09566396]
 [6.63071094 5.74672256]]
[1.75195695 1.83332697 1.73181957]
avg dist = 49.24330466926945
k = 4
[[6.03461977 5.34628582]
 [4.68806481 3.86142728]
 [9.12093558 7.36360916]
 [7.54251569 6.37871211]]
[1.69418377 1.82225237 1.85913718 1.77478602]
avg dist = 42.43370298899423
k = 5
[[7.59069137 6.17332182]
 [4.58666667 3.76943923]
 [9.1336034  7.31074977]
 [5.80571078 8.03538977]
 [6.07170594 5.00739704]]
[1.84554736 1.82634664 1.8751802  1.08445423 1.81995848]
avg dist = 38.71412432156099
k = 6
[[8.22476744 6.71809416]
 [5.72353261 8.09703075]
 [5.87025246 4.89859426]
 [4.52478972 3.70731707]
 [9.50658854 7.53328252]
 [7.06237926 5.73794346]]
[1.83755488 1.06096523 1.79865591 1.83189867 1.89410138 1.84738558]
avg dist = 34.43185951444302
k = 7
[[4.5257023