In [1]:
import numpy as np
import matplotlib.pyplot as plt 

N = 100
M = int(1e7)
#voronoi = np.load("voronoi.npy")

### Kmeans for the first initiation

In [2]:
from sklearn.cluster import KMeans
y = np.random.normal(size=(M, 4))
kmeans = KMeans(n_clusters=100,
                init='k-means++',
                n_init=10,
                max_iter=300,
                tol=0.0001,
                precompute_distances=True,
                verbose=0,
                random_state=None,
                copy_x=True,
                n_jobs=-1,
                algorithm='auto'
               ).fit(y)
np.save("voronoi", kmeans.cluster_centers_)



KeyboardInterrupt: 

### Competitive Learning Vector Quantization (CLVQ) for a approximate

In [6]:
M = int(1e7)
def CLVQ(voronoi):
    y = np.random.normal(size=(M, 4))
    for j in range(M):
        index = np.argmin(np.linalg.norm(voronoi - y[j], axis=1))
        voronoi[index] -= (voronoi[index] - y[j]) / (j + 1)
    return voronoi

In [10]:
def proba_CLVQ(voronoi, y):
    sommes_y = np.copy(voronoi)
    sommes_ind = np.ones(N)
    # The sum of each cellule
    indice = Parallel(n_jobs=-1)(delayed(nearest_neighbor)(voronoi, y[j]) for j in range(M))
    for j in range(M):
        sommes_y[indice[j]] += y[j]
        sommes_ind[indice[j]] += 1
    # Take the average
    new_voronoi = (sommes_y.transpose() / sommes_ind).transpose()
    return new_voronoi, sommes_ind / M
voronoi = np.load("voronoi_0.npy")
_, proba = proba_CLVQ(voronoi, y)
np.save("proba_0", proba)

In [14]:
%%time
voronoi = np.copy(kmeans.cluster_centers_)
voronoi = CLVQ(voronoi)
np.save("voronoi_0", voronoi)

CPU times: user 2min 7s, sys: 4.13 s, total: 2min 11s
Wall time: 2min 7s


### Lloyd's algorithm for refine the centers

In [25]:
# Parallel
from joblib import Parallel, delayed

def nearest_neighbor(voronoi, elem):
    return np.argmin(np.linalg.norm(voronoi - elem, axis=1))

def iteration_Lloyd(voronoi, y):
    sommes_y = np.copy(voronoi)
    sommes_ind = np.ones(N)
    # The sum of each cellule
    indice = Parallel(n_jobs=-1)(delayed(nearest_neighbor)(voronoi, y[j]) for j in range(M))
    distortion = 0.
    for j in range(M):
        sommes_y[indice[j]] += y[j]
        sommes_ind[indice[j]] += 1
    # Take the average
    sommes_ind /= M
    new_voronoi = (sommes_y.transpose() / sommes_ind).transpose()
    for j in range(M):
        distortion += np.linalg.norm(voronoi[indice[j]] - y[j]) ** 2 * sommes_ind[indice[j]]
    print(distortion)
    return new_voronoi, sommes_ind

In [26]:
M = int(1e7)
from tqdm.notebook import tqdm
last = 79
voronoi = np.load("voronoi_{}.npy".format(last))
for i in range(last + 21, 5000):
    y = np.random.normal(size=(M, 4))
    voronoi, proba = iteration_Lloyd(voronoi, y)
    np.save("voronoi_{}".format(i), voronoi)
    np.save("proba_{}".format(i), proba)

59201.32275620663


KeyboardInterrupt: 

In [3]:
import numpy as np
i = 50
with open("{}_4_nopti".format(i), "r") as f:
    lines = f.readlines()
voronoi = []
proba = []
for line in lines:
    values = list(map(float, line.split("  ")[1:6]))
    if values[0] == 0:
        break
    proba.append(values[0])
    voronoi.append(values[1:])
proba = np.array(proba)
voronoi = np.array(voronoi)
np.save("voronoi_{}".format(i), voronoi)
np.save("proba_{}".format(i), proba)

## Distortion

In [2]:
voronoi = np.load("voronoi.npy")
proba = np.load("proba.npy")

In [3]:
M = int(1e7)

# Parallel
from joblib import Parallel, delayed

def nearest_neighbor(voronoi, elem):
    return np.argmin(np.linalg.norm(voronoi - elem, axis=1))

def iteration_Lloyd(voronoi, y):
    sommes_y = np.copy(voronoi)
    sommes_ind = np.ones(N)
    # The sum of each cellule
    indice = Parallel(n_jobs=-1)(delayed(nearest_neighbor)(voronoi, y[j]) for j in range(M))
    distortion = 0.
    for j in range(M):
        sommes_y[indice[j]] += y[j]
        sommes_ind[indice[j]] += 1
    # Take the average
    sommes_ind /= M
    new_voronoi = (sommes_y.transpose() / sommes_ind).transpose()
    for j in range(M):
        distortion += np.linalg.norm(voronoi[indice[j]] - y[j]) ** 2 * sommes_ind[indice[j]]
    print(distortion)
    return new_voronoi, sommes_ind

def distortion(voronoi):
    y = np.random.normal(size=(M, 4))
    indice = Parallel(n_jobs=-1)(delayed(nearest_neighbor)(voronoi, y[j]) for j in range(M))
    distortion_ = 0.
    for j in range(M):
        distortion_ += np.linalg.norm(voronoi[indice[j]] - y[j]) ** 2
    distortion_ /= M
    return distortion_

In [4]:
distortion(voronoi)

0.5973656354172222

In [None]:
base_distortion = 0.5973656354172222
ind = 0
while True:
    candidate = iteration_Lloyd(voronoi, np.random.normal(size=(M, 4)))
    new_distortion = distortion(candidate)
    if new_distortion < base_distortion:
        base_distortion = new_distortion
        voronoi = np.copy(candidate)
        print(ind, new_distortion)
        ind += 1
        np.save("voronoi_{}".format(ind), voronoi)