In [1]:
import glob
import xml.etree.ElementTree as ET

import numpy as np

In [2]:
# Path of Pascal VOC Trainval Annotation files
ANNOTATIONS_PATH = "./data/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/Annotations/"

In [3]:
def load_dataset(path):
	dataset = []
	for xml_file in glob.glob("{}/*xml".format(path)):
		tree = ET.parse(xml_file)

		height = int(tree.findtext("./size/height"))
		width = int(tree.findtext("./size/width"))

		for obj in tree.iter("object"):
			xmin = int(obj.findtext("bndbox/xmin")) / width
			ymin = int(obj.findtext("bndbox/ymin")) / height
			xmax = int(obj.findtext("bndbox/xmax")) / width
			ymax = int(obj.findtext("bndbox/ymax")) / height

			dataset.append([xmax - xmin, ymax - ymin])

	return np.array(dataset)

In [4]:
data = load_dataset(ANNOTATIONS_PATH)

In [5]:
print (data.shape)

(15662, 2)


In [6]:
def iou(box, clusters):
    """
    Calculates the Intersection over Union (IoU) between a box and k clusters.
    """
    x = np.minimum(clusters[:, 0], box[0])
    y = np.minimum(clusters[:, 1], box[1])
    if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
        raise ValueError("Box has no area")

    intersection = x * y
    box_area = box[0] * box[1]
    cluster_area = clusters[:, 0] * clusters[:, 1]

    iou = intersection / (box_area + cluster_area - intersection)

    return iou

In [7]:
def avg_iou(boxes, clusters):
    """
    Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters.
    """
    return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])

In [8]:
def translate_boxes(boxes):
    """
    Translates all the boxes to the origin.
    """
    new_boxes = boxes.copy()
    for row in range(new_boxes.shape[0]):
        new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
        new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
    return np.delete(new_boxes, [0, 1], axis=1)

In [9]:
def kmeans(boxes, k, dist=np.median):
    """
    Calculates k-means clustering with the Intersection over Union (IoU) metric.
    """
    rows = boxes.shape[0]

    distances = np.empty((rows, k))
    last_clusters = np.zeros((rows,))

    # initiate centroids
    clusters = boxes[np.random.choice(rows, k, replace=False)]

    while True:
        for row in range(rows):
            distances[row] = 1 - iou(boxes[row], clusters)

        nearest_clusters = np.argmin(distances, axis=1)

        if (last_clusters == nearest_clusters).all():
            break

        for cluster in range(k):
            clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)

        last_clusters = nearest_clusters

    return clusters

In [10]:
out_5 = kmeans(data, k=5)

In [11]:
print("Accuracy: {:.2f}%".format(avg_iou(data, out_5) * 100))
print("Boxes:\n {}".format(out_5))

ratios = np.around(out_5[:, 0] / out_5[:, 1], decimals=2).tolist()
print("Ratios:\n {}".format(sorted(ratios)))

Accuracy: 60.07%
Boxes:
 [[0.342      0.49866667]
 [0.162      0.3042328 ]
 [0.09       0.14418605]
 [0.75466667 0.76276276]
 [0.038      0.06933333]]
Ratios:
 [0.53, 0.55, 0.62, 0.69, 0.99]


In [None]:
out_9 = kmeans(data, k=9)

In [22]:
print("Accuracy: {:.2f}%".format(avg_iou(data, out_9) * 100))
print("Boxes:\n {}".format(out_9))

ratios = np.around(out_9[:, 0] / out_9[:, 1], decimals=2).tolist()
print("Ratios:\n {}".format(sorted(ratios)))

Accuracy: 66.99%
Boxes:
 [[0.238      0.23466667]
 [0.122      0.10933333]
 [0.312      0.65066667]
 [0.092      0.25866667]
 [0.052      0.13855422]
 [0.52533333 0.40533333]
 [0.038      0.05866667]
 [0.78       0.80225   ]
 [0.168      0.42133333]]
Ratios:
 [0.36, 0.38, 0.4, 0.48, 0.65, 0.97, 1.01, 1.12, 1.3]
