# kmeans.ipynb

This notebook generates k-means++ centroids of HF Bayesian uncertainty (c_j)

Please run this code after doing Training / Inference of SR space generation model in [./flow](./flow)

This notebook assumes you have a HF Bayesian uncertainty maps of the training set, e.g., in './flow/experiments/train_vmap'.

Please modify the path in the code if necessary.

In [None]:
import numpy as np
import glob
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import os

In [None]:
file_path = './flow/experiments/train_vmap/*.npy' # modify if necessary
file_list = sorted(glob.glob(file_path))
vmaps = np.stack([np.load(f) for f in file_list])
print(vmaps.shape, vmaps.dtype)

In [None]:
vmaps[:,160-20:160+20,160-20:160+20] = 0 # masking out the LF region
vmaps_flatten = (vmaps).reshape(-1,320*320)
norms = np.linalg.norm(vmaps_flatten, axis=1)
vmaps_fn = vmaps_flatten / norms[:,np.newaxis]

Now you should select the number of candidates J (n_clusters)

In [None]:
n_clusters = 3 # this is J in the paper
if n_clusters == 1:
    centroids = np.expand_dims( vmaps_fn.mean(axis=0), axis=0)
    ms = centroids.reshape(-1,320,320)
    plt.imshow(ms[0])
else:
    kmeans = KMeans(n_clusters=n_clusters, random_state=1).fit(vmaps_fn)
    labels = kmeans.labels_
    centroids = kmeans.cluster_centers_
    ms = centroids.reshape(-1,320,320)
    fig, axs = plt.subplots(1, ms.shape[0], figsize=(10, 10))
    for i in range(len(ms)):
        axs[i].imshow(np.log(ms[i]))
    plt.show()

In [None]:
# If you want to know the population of each clusters, please uncomment the following
# print(centroids.shape) # (J, 102400)
# print((labels==0).sum(),(labels==1).sum(),(labels==2).sum())

Now you should define the path of the output (centroids of HF uncertainty)

In [None]:
DIR_MASKS = './mask'
if os.path.isdir(DIR_MASKS) is False: os.mkdir(DIR_MASKS)
for i in range(n_clusters):
    np.save(DIR_MASKS+'vmap_kmeans_'+str(i+1)+'of'+str(n_clusters)+'.npy',ms[i])