In [1]:
import numpy as np
from sklearn.cluster import KMeans
import os

In [2]:
def scan_files(directory, prefix=None, postfix=None):
    files_list = []
    for root, sub_dirs, files in os.walk(directory):
        for special_file in files:
            if postfix:
                if special_file.endswith(postfix):
                    files_list.append(os.path.join(root, special_file))
            elif prefix:
                if special_file.startswith(prefix):
                    files_list.append(os.path.join(root, special_file))
            else:
                files_list.append(os.path.join(root, special_file))
    return files_list


def collect_sizes(txt_fname, size):
    sizes = []
    with open(txt_fname, 'r') as f:
        for line in f.readlines():
            tokens = line.strip().split()
            sizes.append([int(float(tokens[3])*size), int(float(tokens[4])*size)])
    return sizes

def main(data_path, size=608):
    txt_fnames = scan_files(data_path, postfix=".txt")
    sizes = []
    for txt_fname in txt_fnames:
        sizes += collect_sizes(txt_fname, size)
    return sizes

In [3]:
data_path = "/home/cnn/Documents/batch6_1216/train_hls"
sizes = main(data_path)

In [4]:
X = np.array(sizes)
kmeans = KMeans(n_clusters = 9, random_state= 1).fit(X)

In [5]:
centers = []
for center in kmeans.cluster_centers_:
    print("du", center)
    centers.append(center)

du [140.17495512  92.32626431]
du [12.82156588 12.80892892]
du [521.15207101 262.14970414]
du [126.20308345 163.62991447]
du [31.32352743 32.25011069]
du [65.86080909 62.26409286]
du [ 88.58248586 100.39295845]
du [227.01268849 207.28811144]
du [322.93981083 562.21410146]


In [6]:
tosort = {int(center[0]*center[1]):center for center in centers}
print(tosort)

{47057: array([227.01268849, 207.28811144]), 1010: array([31.32352743, 32.25011069]), 164: array([12.82156588, 12.80892892]), 181561: array([322.93981083, 562.21410146]), 4100: array([65.86080909, 62.26409286]), 20650: array([126.20308345, 163.62991447]), 136619: array([521.15207101, 262.14970414]), 12941: array([140.17495512,  92.32626431]), 8893: array([ 88.58248586, 100.39295845])}


In [7]:
hassorted = sorted(tosort.items())
print(hassorted)
print(",  ".join(["{},{}".format(int(value[1][0]),int(value[1][1])) for value in hassorted]))

[(164, array([12.82156588, 12.80892892])), (1010, array([31.32352743, 32.25011069])), (4100, array([65.86080909, 62.26409286])), (8893, array([ 88.58248586, 100.39295845])), (12941, array([140.17495512,  92.32626431])), (20650, array([126.20308345, 163.62991447])), (47057, array([227.01268849, 207.28811144])), (136619, array([521.15207101, 262.14970414])), (181561, array([322.93981083, 562.21410146]))]
12,12,  31,32,  65,62,  88,100,  140,92,  126,163,  227,207,  521,262,  322,562
