Merge pull request #3683 from richardtsai/vq_rewrite

ENH: cluster: rewrite and optimize `vq` in Cython
scipy · Jun 1, 2014 · 99aff1e · 99aff1e
2 parents 93656a8 + dd8a93f
commit 99aff1e
Show file tree

Hide file tree

Showing 15 changed files with 305 additions and 581 deletions.
diff --git a/.gitignore b/.gitignore
@@ -104,7 +104,7 @@ doc/source/generated
 # Things specific to this project #
 ###################################
 scipy/__config__.py
-scipy/cluster/_vq_rewrite.c
+scipy/cluster/_vq.c
 scipy/fftpack/_fftpackmodule.c
 scipy/fftpack/convolvemodule.c
 scipy/fftpack/src/dct.c

diff --git a/scipy/cluster/_vq.pyx b/scipy/cluster/_vq.pyx
@@ -0,0 +1,229 @@
+"""
+Cython rewrite of the vector quantization module, originally written
+in C at src/vq.c and the wrapper at src/vq_module.c. This should be
+easier to maintain than old SWIG output.
+
+Original C version by Damian Eads.
+Translated to Cython by David Warde-Farley, October 2009.
+"""
+
+import numpy as np
+cimport numpy as np
+from cluster_blas cimport *
+
+cdef extern from "math.h":
+    float sqrtf(float num)
+    double sqrt(double num)
+
+cdef extern from "numpy/npy_math.h":
+    cdef enum:
+        NPY_INFINITY
+
+ctypedef np.float64_t float64_t
+ctypedef np.float32_t float32_t
+ctypedef np.int32_t int32_t
+
+# Use Cython fused types for templating
+# Define supported data types as vq_type
+ctypedef fused vq_type:
+    float32_t
+    float64_t
+
+# When the number of features is less than this number,
+# switch back to the naive algorithm to avoid high overhead.
+DEF NFEATURES_CUTOFF=5
+
+# Initialize the NumPy C API
+np.import_array()
+
+
+cdef inline vq_type _sqrt(vq_type x):
+    if vq_type is float32_t:
+        return sqrtf(x)
+    else:
+        return sqrt(x)
+
+
+cdef inline vq_type vec_sqr(int n, vq_type *p):
+    cdef vq_type result = 0.0
+    cdef int i
+    for i in range(n):
+        result += p[i] * p[i]
+    return result
+
+
+cdef inline void cal_M(int nobs, int ncodes, int nfeat, vq_type *obs,
+                       vq_type *code_book, vq_type *M):
+    """
+    Calculate M = obs * code_book.T
+    """
+    cdef vq_type alpha = -2.0, beta = 0.0
+
+    # Call BLAS functions with Fortran ABI
+    # Note that BLAS Fortran ABI uses column-major order
+    if vq_type is float32_t:
+        f_sgemm("T", "N", &ncodes, &nobs, &nfeat,
+                &alpha, code_book, &nfeat, obs, &nfeat, &beta, M, &ncodes)
+    else:
+        f_dgemm("T", "N", &ncodes, &nobs, &nfeat,
+                &alpha, code_book, &nfeat, obs, &nfeat, &beta, M, &ncodes)
+
+
+cdef void _vq(vq_type *obs, vq_type *code_book,
+              int ncodes, int nfeat, int nobs,
+              int32_t *codes, vq_type *low_dist):
+    # Naive algorithm is prefered when nfeat is small
+    if nfeat < NFEATURES_CUTOFF:
+        _vq_small_nf(obs, code_book, ncodes, nfeat, nobs, codes, low_dist)
+        return
+
+    cdef int obs_index, code_index
+    cdef vq_type *p_obs
+    cdef vq_type *p_codes
+    cdef vq_type dist_sqr
+    cdef np.ndarray[vq_type, ndim=1] obs_sqr, codes_sqr
+    cdef np.ndarray[vq_type, ndim=2] M
+
+    if vq_type is float32_t:
+        obs_sqr = np.ndarray(nobs, np.float32)
+        codes_sqr = np.ndarray(ncodes, np.float32)
+        M = np.ndarray((nobs, ncodes), np.float32)
+    else:
+        obs_sqr = np.ndarray(nobs, np.float64)
+        codes_sqr = np.ndarray(ncodes, np.float64)
+        M = np.ndarray((nobs, ncodes), np.float64)
+
+    p_obs = obs
+    for obs_index in range(nobs):
+        # obs_sqr[i] is the inner product of the i-th observation with itself
+        obs_sqr[obs_index] = vec_sqr(nfeat, p_obs)
+        p_obs += nfeat
+
+    p_codes = code_book
+    for code_index in range(ncodes):
+        # codes_sqr[i] is the inner product of the i-th code with itself
+        codes_sqr[code_index] = vec_sqr(nfeat, p_codes)
+        p_codes += nfeat
+
+    # M[i][j] is the inner product of the i-th obs and j-th code
+    # M = obs * codes.T
+    cal_M(nobs, ncodes, nfeat, obs, code_book, <vq_type *>M.data)
+
+    for obs_index in range(nobs):
+        for code_index in range(ncodes):
+            dist_sqr = (M[obs_index, code_index] +
+                    obs_sqr[obs_index] + codes_sqr[code_index])
+            if dist_sqr < low_dist[obs_index]:
+                codes[obs_index] = code_index
+                low_dist[obs_index] = dist_sqr
+
+        # dist_sqr may be negative due to float point errors
+        if low_dist[obs_index] > 0:
+            low_dist[obs_index] = _sqrt(low_dist[obs_index])
+        else:
+            low_dist[obs_index] = 0
+
+
+cdef void _vq_small_nf(vq_type *obs, vq_type *code_book,
+                       int ncodes, int nfeat, int nobs,
+                       int32_t *codes, vq_type *low_dist):
+    """
+    Vector quantization for float32 using naive algorithm.
+    This is prefered when nfeat is small.
+    """
+    # Temporary variables
+    cdef vq_type dist_sqr, diff
+    cdef int32_t obs_index, code_index, feature
+    cdef int32_t offset = 0
+
+    # Index and pointer to keep track of the current position in
+    # both arrays so that we don't have to always do index * nfeat.
+    cdef int codebook_pos
+    cdef vq_type *current_obs
+
+    for obs_index in range(nobs):
+        codebook_pos = 0
+        for code_index in range(ncodes):
+            dist_sqr = 0
+
+            # Distance between code_book[code_index] and obs[obs_index]
+            for feature in range(nfeat):
+                # Use current_obs pointer and codebook_pos to minimize
+                # pointer arithmetic necessary (i.e. no multiplications)
+                current_obs = &(obs[offset])
+                diff = code_book[codebook_pos] - current_obs[feature]
+                dist_sqr += diff * diff
+                codebook_pos += 1
+
+            # Replace the code assignment and record distance if necessary
+            if dist_sqr < low_dist[obs_index]:
+                codes[obs_index] = code_index
+                low_dist[obs_index] = dist_sqr
+
+        low_dist[obs_index] = _sqrt(low_dist[obs_index])
+
+        # Update the offset of the current observation
+        offset += nfeat
+
+
+def vq(np.ndarray obs, np.ndarray codes):
+    """
+    Vector quantization ndarray wrapper. Only support float32 and float64.
+
+    Parameters
+    ----------
+    obs : ndarray
+        The observation matrix. Each row is an observation.
+    codes : ndarray
+        The code book matrix.
+
+    Notes
+    -----
+    The observation matrix and code book matrix should have same rank and
+    same number of columns (features). Only rank 1 and rank 2 are supported.
+    """
+    cdef int nobs, ncodes, nfeat
+    cdef np.ndarray obs_a, codes_a
+    cdef np.ndarray outcodes, outdists
+    cdef int flags = np.NPY_CONTIGUOUS | np.NPY_NOTSWAPPED | np.NPY_ALIGNED
+
+    # Ensure the arrays are contiguous - done in C to minimize overhead.
+    obs_a = np.PyArray_FROM_OF(obs, flags)
+    codes_a = np.PyArray_FROM_OF(codes, flags)
+
+    if obs_a.ndim != codes_a.ndim:
+        raise ValueError('observation and code should have same rank')
+
+    if obs_a.ndim == 1:
+        nfeat = 1
+        nobs = obs_a.shape[0]
+        ncodes = codes_a.shape[0]
+    elif obs_a.ndim == 2:
+        nfeat = obs_a.shape[1]
+        nobs = obs_a.shape[0]
+        ncodes = codes_a.shape[0]
+        if nfeat != codes_a.shape[1]:
+            raise ValueError('obs and code should have same number of '
+                             'features (columns)')
+    else:
+        raise ValueError('rank different than 1 or 2 are not supported')
+
+    # Initialize outdists and outcodes array.
+    # Outdists should be initialized as INF.
+    outdists = np.empty((nobs,), dtype=obs.dtype)
+    outdists.fill(NPY_INFINITY)
+    outcodes = np.empty((nobs,), dtype=np.int32)
+
+    if obs.dtype == np.float32:
+        _vq(<float32_t *>obs_a.data, <float32_t *>codes_a.data,
+            ncodes, nfeat, nobs, <int32_t *>outcodes.data,
+            <float32_t *>outdists.data)
+    elif obs.dtype == np.float64:
+        _vq(<float64_t *>obs_a.data, <float64_t *>codes_a.data,
+            ncodes, nfeat, nobs, <int32_t *>outcodes.data,
+            <float64_t *>outdists.data)
+    else:
+        raise ValueError('type other than float or double not supported')
+
+    return outcodes, outdists
+
diff --git a/scipy/cluster/_vq_rewrite.pyx b/scipy/cluster/_vq_rewrite.pyx
diff --git a/scipy/cluster/bento.info b/scipy/cluster/bento.info
@@ -1,7 +1,9 @@
+HookFile: bscript
+
 Library:
     Extension: _hierarchy_wrap
         Sources:
              src/hierarchy_wrap.c, src/hierarchy.c
     Extension: _vq
         Sources:
-             src/vq_module.c, src/vq.c
+             _vq.c
diff --git a/scipy/cluster/bscript b/scipy/cluster/bscript
@@ -0,0 +1,6 @@
+from bento.commands import hooks
+
+@hooks.pre_build
+def pre_build(context):
+    context.tweak_extension("_vq",
+                            use="LAPACK CLIB")