In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

In [10]:
df = pd.read_csv("data/k-means_points.csv")
dat = df.to_numpy()

In [11]:
def Voronoi(cents, data, iter=None, basic=False):
    x_min = (data.T)[0].min() - 0.5
    x_max = (data.T)[0].max() + 0.5
    y_min = (data.T)[1].min() - 0.5
    y_max = (data.T)[1].max() + 0.5
    plt.figure(1)
    plt.clf()
    if not basic:
        incr = 0.01
        xx, yy = np.meshgrid(
            np.arange(x_min, x_max, incr), 
            np.arange(y_min, y_max, incr)
        )
        Z = np.c_[xx.ravel(), yy.ravel()]
        L = []
        for z in Z:
            dists = [np.linalg.norm(z - c) for c in cents]
            L.append(dists.index(min(dists)))
        L = np.array(L)
        L = L.reshape(xx.shape)
        plt.imshow(
            L,
            interpolation="nearest",
            extent=(xx.min(), xx.max(), yy.min(), yy.max()),
            cmap=plt.cm.Paired,
            aspect="auto",
            origin="lower",
        )
        plt.tick_params(
            left = False, 
            right = False, 
            labelleft = False, 
            labelbottom = False, 
            bottom = False
        )
    plt.scatter(data[:,0], data[:,1], c="black")
    if not basic:
        cents = np.array(cents)
        plt.scatter(cents[:,0], cents[:,1], c="white", marker="x", linewidths=2, s=100)
    plt.savefig("imgs/k_means_iter{0}.png".format(iter))

def BasicKMeansPlots(k, data):
    n = len(data)

    # Randomly assign k data points to be centroids
    cents = set([])
    while len(cents) < k:
        cents.add(tuple(data[np.random.randint(0, n)]))
    cents = list(cents) 
    count = 0
    
    # Initiate labels
    L0 = [-1]*n
    L = [0]*n 
    
    while L0 != L:
        Voronoi(cents, data, iter=count)
        L0 = L.copy() 

        # Update labels from centroids
        for i in range(n):
            dist_vec = [np.linalg.norm(data[i] - cents[j]) for j in range(k)]
            m = min(dist_vec)
            L[i] = dist_vec.index(m)

        # Update centroids
        for i in range(k):
            i_lab = [data[j] for j in range(n) if L[j] == i]
            cents[i] = sum(i_lab) / len(i_lab)

        count += 1
    return cents

In [12]:
BasicKMeansPlots(4, dat)
Voronoi(0, dat, iter=-1, basic=True)