In [1]:
import numpy as np
from sklearn.cluster import KMeans
import os

In [2]:
def scan_files(directory, prefix=None, postfix=None):
    files_list = []
    for root, sub_dirs, files in os.walk(directory):
        for special_file in files:
            if postfix:
                if special_file.endswith(postfix):
                    files_list.append(os.path.join(root, special_file))
            elif prefix:
                if special_file.startswith(prefix):
                    files_list.append(os.path.join(root, special_file))
            else:
                files_list.append(os.path.join(root, special_file))
    return files_list


def collect_sizes(txt_fname, size):
    sizes = []
    with open(txt_fname, 'r') as f:
        for line in f.readlines():
            tokens = line.strip().split()
            sizes.append([int(float(tokens[3])*size), int(float(tokens[4])*size)])
    return sizes

def main(data_path, size=608):
    txt_fnames = scan_files(data_path, postfix=".txt")
    sizes = []
    for txt_fname in txt_fnames:
        sizes += collect_sizes(txt_fname, size)
    return sizes

In [3]:
data_path = "/home/ssd0/Develop/liyu/batch6_neg_608/train"
sizes = main(data_path)

In [4]:
X = np.array(sizes)
kmeans = KMeans(n_clusters = 9, random_state= 1).fit(X)

In [5]:
centers = []
for center in kmeans.cluster_centers_:
    print("du", center)
    centers.append(center)

du [213.80019964 268.88452785]
du [44.45136256 45.32691747]
du [471.90860044 349.92002539]
du [179.26771081 187.89928628]
du [251.26895833 199.33808333]
du [300.49402692 272.63864106]
du [122.15934141 119.00551095]
du [550.04358974 574.90769231]
du [312.08120096 401.42766974]


In [6]:
tosort = {int(center[0]*center[1]):center for center in centers}
print(tosort)

{316224: array([550.04358974, 574.90769231]), 33684: array([179.26771081, 187.89928628]), 125278: array([312.08120096, 401.42766974]), 81926: array([300.49402692, 272.63864106]), 50087: array([251.26895833, 199.33808333]), 14537: array([122.15934141, 119.00551095]), 165130: array([471.90860044, 349.92002539]), 2014: array([44.45136256, 45.32691747]), 57487: array([213.80019964, 268.88452785])}


In [7]:
hassorted = sorted(tosort.items())
print(hassorted)
print(",  ".join(["{},{}".format(int(value[1][0]),int(value[1][1])) for value in hassorted]))

[(2014, array([44.45136256, 45.32691747])), (14537, array([122.15934141, 119.00551095])), (33684, array([179.26771081, 187.89928628])), (50087, array([251.26895833, 199.33808333])), (57487, array([213.80019964, 268.88452785])), (81926, array([300.49402692, 272.63864106])), (125278, array([312.08120096, 401.42766974])), (165130, array([471.90860044, 349.92002539])), (316224, array([550.04358974, 574.90769231]))]
44,45,  122,119,  179,187,  251,199,  213,268,  300,272,  312,401,  471,349,  550,574
