In [1]:
from sklearn.cluster._kmeans import _kmeans_plusplus as _sk_kmeans_plusplus

In [2]:
%load_ext cython

In [4]:
np.random.seed(10)
np.random.rand(), uniforms(1)[0]

(0.771320643266746, 0.7773891045592656)

In [33]:
%%cython -f --compile-args=-fopenmp --link-args=-fopenmp --annotate
# cython: profile=True
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False

import numpy as np
cimport numpy as np
cimport cython
from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer
from libc.stdint cimport uint16_t, uint64_t
from numpy.random cimport bitgen_t
from numpy.random import PCG64
from numpy.random.c_distributions cimport (
    random_standard_uniform_fill,
    random_standard_uniform_fill_f
)
from cython cimport floating, integral

DEF FLOAT_INF = 1e36

from sklearn.utils._cython_blas cimport (
    BLAS_Order,
    BLAS_Trans,
    ColMajor,
    NoTrans,
    RowMajor,
    Trans,
    _gemm,
)


cpdef uniforms(Py_ssize_t n):
    """
    Create an array of `n` uniformly distributed doubles.
    A 'real' distribution would want to process the values into
    some non-uniform distribution
    """
    cdef Py_ssize_t i
    cdef bitgen_t *rng
    cdef const char *capsule_name = "BitGenerator"
    cdef double[::1] random_values

    x = PCG64()
    capsule = x.capsule
    # Optional check that the capsule if from a BitGenerator
    if not PyCapsule_IsValid(capsule, capsule_name):
        raise ValueError("Invalid pointer to anon_func_state")
    # Cast the pointer
    rng = <bitgen_t *> PyCapsule_GetPointer(capsule, capsule_name)
    random_values = np.empty(n, dtype='float64')
    with x.lock, nogil:
        for i in range(n):
            # Call the function
            random_values[i] = rng.next_double(rng.state)
    randoms = np.asarray(random_values)

    return randoms


cdef inline floating squarred_norm(
    const floating[:, ::1] centroids,
    integral i,
) nogil:
    cdef:
        integral j
        floating sq_norm = 0 
    
    for j in range(centroids.shape[1]):
        sq_norm += centroids[i, j] * centroids[i, j]
        
    return sq_norm
    

cdef int _cumulated_euclidean_distances(
    const floating[:, ::1] centroids,   # IN
    const floating[:, ::1] X,           # IN
    const floating[::1] X_sq_norms,     # IN
    floating[:, ::1] cum_sum_dist_sq,   # OUT
) nogil except -1:
    cdef:
        int i, j
        floating i_sq_norm
            
    # Careful: LDA, LDB and LDC are given for F-ordered arrays.
    # Here, we use their counterpart values as indicated in the documentation.
    # See the documentation of parameters here:
    # https://www.netlib.org/lapack/explore-html/db/dc9/group__single__blas__level3_gafe51bacb54592ff5de056acabd83c260.html
    #
    # cum_sum_dist_sq = -2 * centroids.dot(X.T)
    _gemm(RowMajor, NoTrans, Trans,
          centroids.shape[0], X.shape[0], centroids.shape[1],
          -2.0,
          &centroids[0, 0], centroids.shape[1],
          &X[0, 0], centroids.shape[1], 0,
          &cum_sum_dist_sq[0, 0], X.shape[0])
    
    # Computing the cumulative sum
    for i in range(centroids.shape[0]):
        i_sq_norm = squarred_norm(centroids, i)
        cum_sum_dist_sq[i, 0] += i_sq_norm + X_sq_norms[0]
        for j in range(0, X.shape[0] - 1):
            cum_sum_dist_sq[i, j + 1] += cum_sum_dist_sq[i, j] + i_sq_norm + X_sq_norms[j + 1]
    
    return 0
    
            
cpdef int _kmeans_plusplus_inner(
    const floating[:, ::1] X,            # IN
    integral n_clusters,                 # IN
    const floating[::1] X_sq_norms,      # IN
    integral n_local_trials,             # IN
    integral[:, ::1] indices,            # OUT
    floating[:, ::1] centers,            # OUT
) except -1:
    cdef:
        int_dtype = np.int32 if integral is int else np.int64
        float_dtype = np.float32 if floating is float else np.float64
        
        integral n_samples = X.shape[0]
        integral n_features = X.shape[1]
        integral c
        integral best_candidate
        floating current_pot
        floating[:, ::1] cum_sum_dist_sq = np.zeros((n_local_trials, n_samples), dtype=float_dtype)
        floating[::1] rand_vals = np.zeros((n_local_trials, ), dtype=float_dtype)
        integral[::1] candidate_ids = np.zeros((n_local_trials, ), dtype=float_dtype)
    
    _cumulated_euclidean_distances(
        X[0:n_local_trials, :],
        X,
        X_sq_norms,
        cum_sum_dist_sq,
    )
    
    best_candidate = 0
    current_pot = cum_sum_dist_sq[best_candidate, X.shape[0] - 1]

    # Pick the remaining n_clusters-1 points
    for c in range(1, n_clusters):
        # Choose center candidates by sampling with probability proportional
        # to the squared distance to the closest existing center
        
        rand_vals = uniforms(n_local_trials) * current_pot
        
        # TODO: find a way to perform efficient binary search
        candidate_ids = np.searchsorted(cum_sum_dist_sq[best_candidate, :], rand_vals)

        # Compute distances to center candidates
        # TODO: Cython compilation fails here
        # TODO: think of how one would chunk this
        _cumulated_euclidean_distances(
            np.asarray(X)[candidate_ids, :],
            X,
            X_sq_norms,
            cum_sum_dist_sq
        )


        best_candidate = 0
        current_pot = cum_sum_dist_sq[best_candidate, X.shape[0] - 1]
        for i in range(1, n_local_trials):
            if cum_sum_dist_sq[i, X.shape[0] - 1] < current_pot:
                best_candidate = i

        # Permanently add best center candidate found in local tries
        centers[c] = X[candidate_ids[best_candidate]]
        indices[c] = candidate_ids[best_candidate]

    return centers, indices

def kmeans_plusplus(
    floating[:, ::1] X,
    integral n_clusters,
    integral n_local_trials,
    bint use_cython=True):
    cdef:
        int_dtype = np.int32 if integral is int else np.int64
        float_dtype = np.float32 if floating is float else np.float64
        
        integral n_samples = X.shape[0]
        integral n_features = X.shape[1]
        floating[:, ::1] centers = np.empty((n_clusters, n_features), dtype=float_dtype)
        floating[::1] X_sq_norms = np.einsum('ij,ij->i', X, X)
        integral[:, ::1] indices = np.empty((n_clusters,), dtype=int_dtype)

    
    _kmeans_plusplus_inner(X, n_clusters, X_sq_norms, n_local_trials, indices, centers)
    
    return centers, indices 
    


Error compiling Cython file:
------------------------------------------------------------
...
        candidate_ids = np.searchsorted(cum_sum_dist_sq[best_candidate, :], rand_vals)

        # Compute distances to center candidates
        # TODO: think of how one would chunk this
        _cumulated_euclidean_distances(
            <floating[:, ::1]> np.asarray(&X)[candidate_ids, :],
                                         ^
------------------------------------------------------------

/home/jsquared/.cache/ipython/cython/_cython_magic_61720c15c8a257f2f1c219f74f98ca171c23f8b2.pyx:151:42: Cannot take address of memoryview slice

Error compiling Cython file:
------------------------------------------------------------
...
        candidate_ids = np.searchsorted(cum_sum_dist_sq[best_candidate, :], rand_vals)

        # Compute distances to center candidates
        # TODO: think of how one would chunk this
        _cumulated_euclidean_distances(
            <floating[:, ::1]> np.asarray(