In [33]:
import numpy as np

width_in_cfg_file = 416.
height_in_cfg_file = 416.

def IOU(x, centroids):
    """ intersection over union
    """
    similarities = []
    k = len(centroids)
    for centroid in centroids:
        c_w, c_h = centroid
        w, h = x
        if c_w >= w and c_h >= h:
            similarity = w * h / (c_w * c_h)
        elif c_w >= w and c_h <= h:
            similarity = w * c_h / (w * h + (c_w - w) * c_h)
        elif c_w <= w and c_h >= h:
            similarity = c_w * h / (w * h + c_w * (c_h - h))
        else: #means both w,h are bigger than c_w and c_h respectively
            similarity = (c_w * c_h) / (w * h)
        similarities.append(similarity) # will become (k,) shape
    return np.array(similarities)


def avg_IOU(X,centroids):
    n,d = X.shape
    sum = 0.
    for i in range(X.shape[0]):
        #note IOU() will return array which contains IoU for each centroid and X[i] // slightly ineffective, but I am too lazy
        sum+= max(IOU(X[i],centroids)) 
    return sum/n


def write_anchors_to_file(centroids,X,anchor_file):
    f = open(anchor_file,'w')
    
    anchors = centroids.copy()

    for i in range(anchors.shape[0]):
        anchors[i][0]*=width_in_cfg_file/32.
        anchors[i][1]*=height_in_cfg_file/32.

    widths = anchors[:,0]
    sorted_indices = np.argsort(widths)

    print('Anchors = ', anchors[sorted_indices])
        
    for i in sorted_indices[:-1]:
        f.write('%0.2f,%0.2f, '%(anchors[i,0],anchors[i,1]))

    #there should not be comma after last anchor, that's why
    f.write('%0.2f,%0.2f\n'%(anchors[sorted_indices[-1:],0],anchors[sorted_indices[-1:],1]))
    
    f.write('%f\n'%(avg_IOU(X,centroids)))
    print()

    
def kmeans(X,centroids,eps,anchor_file):
    N = X.shape[0]
    iterations = 0
    k,dim = centroids.shape
    prev_assignments = np.ones(N)*(-1)    
    iteration = 0
    old_D = np.zeros((N,k))

    while True:
        D = [] 
        iteration += 1           
        for i in range(N):
            d = 1 - IOU(X[i], centroids)
            D.append(d)
        D = np.array(D) # D.shape = (N,k)
        
        print("iter {}: dists = {}".format(iteration,np.sum(np.abs(old_D-D))))
            
        #assign samples to centroids 
        assignments = np.argmin(D,axis=1)  # assignments shape is N
        
        if (assignments == prev_assignments).all() :
            print("Centroids = ",centroids)           
            write_anchors_to_file(centroids,X,anchor_file)
            return

        #calculate new centroids
        centroid_sums=np.zeros((k,dim),np.float)
        for i in range(N):
            centroid_sums[assignments[i]]+=X[i]        
        for j in range(k):
            centroids[j] = centroid_sums[j]/(np.sum(assignments==j))
        
        prev_assignments = assignments.copy()     
        old_D = D.copy()

In [35]:
import os

num_clusters = 5

pwd = os.getcwd()
output_dir = os.path.join(pwd, 'anchor')
filelist = os.path.join(pwd, 'train_list.txt')
anchor_file = os.path.join(output_dir, 'anchors{}.txt'.format(num_clusters))

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

with open(filelist) as f:
    lines = [line.rstrip('\n') for line in f.readlines()]

# a list of bbox normalized w, h
annotation_dims = []
for line in lines:
    with open(line.replace('.jpg','.txt')) as f2:
        for line in f2.readlines():
            w, h = line.rstrip('\n').split(' ')[3:]
            annotation_dims.append([w, h])
annotation_dims = np.array(annotation_dims).astype(float)

# randomly pick initial cluster centroids
indices = np.random.randint(len(annotation_dims), size=num_clusters)
centroids = annotation_dims[indices]
eps = 0.005

# kmeans with standard expection-maximization; distance metrics is customized
kmeans(annotation_dims, centroids, eps, anchor_file)

print('centroids.shape', centroids.shape)


iter 1: dists = 43.38348313998492
iter 2: dists = 3.208040763680277
iter 3: dists = 1.4467973205071707
iter 4: dists = 1.2595827121477354
iter 5: dists = 0.7616529709335693
iter 6: dists = 0.7964806104079969
iter 7: dists = 0.9021189805007712
iter 8: dists = 0.311434288526269
iter 9: dists = 0.6397421200427144
Centroids =  [[ 0.67669206  0.67680459]
 [ 0.7569162   0.75712671]
 [ 0.69275869  0.6928935 ]
 [ 0.73028852  0.7307415 ]
 [ 0.71164821  0.71179706]]
Anchors =  [[ 8.79699674  8.79845962]
 [ 9.00586293  9.00761556]
 [ 9.25142672  9.25336178]
 [ 9.49375075  9.49963947]
 [ 9.83991054  9.84264725]]

centroids.shape (5, 2)
