In [2]:
import numpy as np

# Read the SIFT data
X = np.fromfile('./data/SIFT/SIFT.dat', dtype=np.uint8).astype(np.float32)
X = X.reshape((-1, 128))
print(f'Descriptors format: {X.shape}')

Descriptors format: (2097152, 128)


In [None]:
from sklearn.cluster import KMeans, MiniBatchKMeans


N_CENTERS = 32000
SEED=17

# Initialize centers (we use uniform random initialization)
np.random.seed(SEED)
centers = np.random.uniform(
    np.min(X), np.max(X),
    size=N_CENTERS * X.shape[1]
).reshape(N_CENTERS, X.shape[1]).astype(X.dtype)
print(f'Init shape: ', centers.shape)

# Fit k-means
km = MiniBatchKMeans(n_clusters=32000, 
            init=centers,
            n_init=1, 
            max_iter=30, 
            random_state=SEED, 
#             algorithm='full', 
            batch_size=15000,
#             n_jobs=-1, 
#             copy_x=True, 
            verbose=10, 
#             precompute_distances=False
                    )
km.fit(X)

Init shape:  (32000, 128)
Init 1/1 with method: [[6.27636452e+01 1.13014977e+02 4.07939262e+01 ... 1.89180328e+02
  4.01684380e+01 1.66571915e+02]
 [1.37156265e+02 1.03389984e+02 1.97886566e+02 ... 1.82938675e+02
  7.14931641e+01 2.87881069e+01]
 [1.29865494e+02 1.40626846e+02 2.08107834e+02 ... 1.99609619e+02
  9.64705200e+01 1.94848190e+02]
 ...
 [1.99190491e+02 1.96180435e+02 1.87824005e+02 ... 4.10881186e+00
  8.92299194e+01 8.22306595e+01]
 [7.38449326e+01 1.48732500e+01 6.93937912e+01 ... 1.85481415e+02
  6.01717873e+01 8.73280334e+01]
 [3.08515968e+01 6.37604147e-02 1.76212425e+01 ... 9.85245514e+01
  1.83178497e+02 9.96589355e+01]]
Inertia for init 1/1: 4575646720.000000
Minibatch iteration 1/4200: mean batch inertia: 101678.532267, ewa inertia: 101678.532267 
Minibatch iteration 2/4200: mean batch inertia: 94175.317333, ewa inertia: 101571.197967 
Minibatch iteration 3/4200: mean batch inertia: 91390.438400, ewa inertia: 101425.561103 
Minibatch iteration 4/4200: mean batch in

Minibatch iteration 80/4200: mean batch inertia: 61442.227200, ewa inertia: 77819.435087 
Minibatch iteration 81/4200: mean batch inertia: 61477.115733, ewa inertia: 77585.656445 
Minibatch iteration 82/4200: mean batch inertia: 61507.123200, ewa inertia: 77355.651292 
Minibatch iteration 83/4200: mean batch inertia: 61391.662933, ewa inertia: 77127.284716 
Minibatch iteration 84/4200: mean batch inertia: 61366.877867, ewa inertia: 76901.830395 
Minibatch iteration 85/4200: mean batch inertia: 61426.705067, ewa inertia: 76680.457057 
Minibatch iteration 86/4200: mean batch inertia: 61265.152000, ewa inertia: 76459.939454 
Minibatch iteration 87/4200: mean batch inertia: 61071.872000, ewa inertia: 76239.811488 
Minibatch iteration 88/4200: mean batch inertia: 61331.481600, ewa inertia: 76026.546220 
Minibatch iteration 89/4200: mean batch inertia: 61110.092800, ewa inertia: 75813.164744 
[MiniBatchKMeans] Reassigning 7500 cluster centers.
Minibatch iteration 90/4200: mean batch inertia:

Minibatch iteration 167/4200: mean batch inertia: 58687.018667, ewa inertia: 64651.886273 
Minibatch iteration 168/4200: mean batch inertia: 58168.622933, ewa inertia: 64559.142491 
[MiniBatchKMeans] Reassigning 7500 cluster centers.
Minibatch iteration 169/4200: mean batch inertia: 58214.720000, ewa inertia: 64468.384842 
Minibatch iteration 170/4200: mean batch inertia: 58337.088000, ewa inertia: 64380.675979 
Minibatch iteration 171/4200: mean batch inertia: 58069.282133, ewa inertia: 64290.390809 
Minibatch iteration 172/4200: mean batch inertia: 58279.786667, ewa inertia: 64204.408468 
Minibatch iteration 173/4200: mean batch inertia: 58384.716800, ewa inertia: 64121.157151 
Minibatch iteration 174/4200: mean batch inertia: 58240.494933, ewa inertia: 64037.033643 
Minibatch iteration 175/4200: mean batch inertia: 58337.126400, ewa inertia: 63955.495855 
Minibatch iteration 176/4200: mean batch inertia: 58173.495467, ewa inertia: 63872.783715 
Minibatch iteration 177/4200: mean bat

Minibatch iteration 254/4200: mean batch inertia: 57166.523733, ewa inertia: 59621.612972 
Minibatch iteration 255/4200: mean batch inertia: 57011.660800, ewa inertia: 59584.277324 
Minibatch iteration 256/4200: mean batch inertia: 57079.786667, ewa inertia: 59548.450315 
Minibatch iteration 257/4200: mean batch inertia: 57327.978667, ewa inertia: 59516.686229 
Minibatch iteration 258/4200: mean batch inertia: 57115.878400, ewa inertia: 59482.342413 
Minibatch iteration 259/4200: mean batch inertia: 57244.177067, ewa inertia: 59450.325217 
[MiniBatchKMeans] Reassigning 7500 cluster centers.
Minibatch iteration 260/4200: mean batch inertia: 57234.926933, ewa inertia: 59418.633705 
Minibatch iteration 261/4200: mean batch inertia: 56993.672533, ewa inertia: 59383.944374 
Minibatch iteration 262/4200: mean batch inertia: 57237.947733, ewa inertia: 59353.245660 
Minibatch iteration 263/4200: mean batch inertia: 57178.197333, ewa inertia: 59322.131359 
Minibatch iteration 264/4200: mean bat