In [1]:
import numpy as np
import sklearn.cluster as clr

In [2]:
rng = np.random.default_rng(3985)

## API Usage

In [49]:
N_CLRS = 3
N_FEATS = 3
N_PTS = 10

In [50]:
points = rng.integers(0, 10, [N_PTS, N_FEATS])
points

array([[1, 4, 8],
       [6, 0, 0],
       [7, 2, 6],
       [8, 4, 8],
       [1, 3, 9],
       [5, 7, 5],
       [6, 4, 2],
       [8, 5, 3],
       [7, 5, 8],
       [5, 7, 2]])

In [52]:
init_idxs = rng.integers(0, N_PTS, N_CLRS)
init_pts = points[init_idxs]
init_pts

array([[7, 2, 6],
       [5, 7, 2],
       [8, 5, 3]])

In [53]:
km = clr.KMeans(
    n_clusters=N_CLRS, init=init_pts, n_init=1, max_iter=300,
    random_state=3985, algorithm='lloyd', verbose=1
)

In [54]:
km.fit(points)

Initialization complete
Iteration 0, inertia 165.0.
Iteration 1, inertia 84.63333333333334.
Converged at iteration 1: strict convergence.


In [55]:
km.cluster_centers_

array([[4.8       , 3.6       , 7.8       ],
       [5.        , 7.        , 3.5       ],
       [6.66666667, 3.        , 1.66666667]])

## Test Determinism

In [82]:
N_FEATS = 1
points = rng.integers(0, 10, [N_CLRS, N_FEATS])
points *= 100
points = np.concatenate([points, points - 1, points + 1, points - 2, points + 2])
points

array([[400],
       [300],
       [100],
       [399],
       [299],
       [ 99],
       [401],
       [301],
       [101],
       [398],
       [298],
       [ 98],
       [402],
       [302],
       [102]])

In [83]:
N_PTS = points.shape[0]
init_pts = points[:N_CLRS, :]
init_pts

array([[400],
       [300],
       [100]])

### First Time

In [84]:
km = clr.KMeans(
    n_clusters=N_CLRS, init=init_pts, n_init=1, max_iter=300,
    random_state=3985, algorithm='lloyd', verbose=1
)
km.fit(points)

Initialization complete
Iteration 0, inertia 30.0.
Converged at iteration 0: center shift 8.077935669463161e-28 within tolerance 1.5557555555555558.


In [85]:
km.cluster_centers_

array([[400.],
       [300.],
       [100.]])

### Second Time

In [86]:
km = clr.KMeans(
    n_clusters=N_CLRS, init=init_pts, n_init=1, max_iter=300,
    random_state=3985, algorithm='lloyd', verbose=1
)
km.fit(points)

Initialization complete
Iteration 0, inertia 30.0.
Converged at iteration 0: center shift 8.077935669463161e-28 within tolerance 1.5557555555555558.


In [88]:
km.cluster_centers_

array([[400.],
       [300.],
       [100.]])

## Generate Testcases

In [53]:
N_PTS = 1024
N_FEATS = 128
N_CLRS = 8

In [54]:
points = rng.random([N_PTS, N_FEATS])
points

array([[0.56317041, 0.95547997, 0.18312246, ..., 0.24811908, 0.36786775,
        0.47234641],
       [0.54234708, 0.78222908, 0.36823254, ..., 0.14482314, 0.75532335,
        0.61035202],
       [0.43228993, 0.93069523, 0.69397088, ..., 0.76438538, 0.76332209,
        0.72476161],
       ...,
       [0.21329565, 0.49538786, 0.8798866 , ..., 0.83336527, 0.15184803,
        0.21125614],
       [0.07781889, 0.66145335, 0.30461088, ..., 0.8156401 , 0.66564087,
        0.90632432],
       [0.4581499 , 0.01187021, 0.62477843, ..., 0.79474108, 0.08554994,
        0.47615746]])

In [55]:
np.savetxt('tcase2.txt', points, fmt='%.8f', header=f'{N_PTS} {N_FEATS}')

In [56]:
init_idxs = rng.integers(0, N_PTS, N_CLRS)
init_idxs

array([414, 822, 844, 114, 551, 299, 363, 235])

In [57]:
np.savetxt('tcase2_cfg.txt', init_idxs, fmt='%d', header=f'{N_CLRS}')

In [58]:
km = clr.KMeans(
    n_clusters=N_CLRS, init=points[init_idxs], n_init=1, max_iter=300,
    random_state=3985, algorithm='lloyd', verbose=1
)
km.fit(points)

Initialization complete
Iteration 0, inertia 18957.27895195155.
Iteration 1, inertia 10628.102346552223.
Iteration 2, inertia 10591.752657807843.
Iteration 3, inertia 10574.047927286092.
Iteration 4, inertia 10563.121857697406.
Iteration 5, inertia 10555.615746436339.
Iteration 6, inertia 10551.849081694167.
Iteration 7, inertia 10548.228375788902.
Iteration 8, inertia 10546.484426903466.
Iteration 9, inertia 10545.101010073846.
Iteration 10, inertia 10543.771685159825.
Iteration 11, inertia 10543.606925631455.
Converged at iteration 11: strict convergence.


In [60]:
km.inertia_

10543.606925631455

### Simple Testcase

In [41]:
N_PTS = 10
N_CLRS = 3
N_FEATS = 2

In [42]:
points = rng.integers(0, 10, [N_CLRS, N_FEATS])
points *= 100
points = np.concatenate([points, points - 1, points + 1, points - 2, points + 2])
points

array([[300, 600],
       [300, 500],
       [500, 700],
       [299, 599],
       [299, 499],
       [499, 699],
       [301, 601],
       [301, 501],
       [501, 701],
       [298, 598],
       [298, 498],
       [498, 698],
       [302, 602],
       [302, 502],
       [502, 702]])

In [43]:
N_PTS = points.shape[0]
init_idxs = np.arange(N_CLRS)
init_idxs

array([0, 1, 2])

In [44]:
np.savetxt('tcase1.txt', points, fmt='%.8f', header=f'{N_PTS} {N_FEATS}')
np.savetxt('tcase1_cfg.txt', init_idxs, fmt='%d', header=f'{N_CLRS}')

In [45]:
km = clr.KMeans(
    n_clusters=N_CLRS, init=points[init_idxs], n_init=1, max_iter=300,
    random_state=3985, algorithm='lloyd', verbose=1
)
km.fit(points)

Initialization complete
Iteration 0, inertia 60.0.
Converged at iteration 0: center shift 0.0 within tolerance 0.7779777777777778.


In [46]:
np.int32(km.cluster_centers_)

array([[300, 600],
       [300, 500],
       [500, 700]], dtype=int32)

In [50]:
np.savetxt('tcase1_gt.txt', km.cluster_centers_, fmt='%.8f', header=f'{N_CLRS} {N_FEATS}')

In [48]:
!cat tcase1_gt.txt

300.00000000 600.00000000
300.00000000 500.00000000
500.00000000 700.00000000
