Skip to content

Commit

Permalink
Merge pull request #3683 from richardtsai/vq_rewrite
Browse files Browse the repository at this point in the history
ENH: cluster: rewrite and optimize `vq` in Cython
  • Loading branch information
rgommers committed Jun 1, 2014
2 parents 93656a8 + dd8a93f commit 99aff1e
Show file tree
Hide file tree
Showing 15 changed files with 305 additions and 581 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Expand Up @@ -104,7 +104,7 @@ doc/source/generated
# Things specific to this project #
###################################
scipy/__config__.py
scipy/cluster/_vq_rewrite.c
scipy/cluster/_vq.c
scipy/fftpack/_fftpackmodule.c
scipy/fftpack/convolvemodule.c
scipy/fftpack/src/dct.c
Expand Down
229 changes: 229 additions & 0 deletions scipy/cluster/_vq.pyx
@@ -0,0 +1,229 @@
"""
Cython rewrite of the vector quantization module, originally written
in C at src/vq.c and the wrapper at src/vq_module.c. This should be
easier to maintain than old SWIG output.
Original C version by Damian Eads.
Translated to Cython by David Warde-Farley, October 2009.
"""

import numpy as np
cimport numpy as np
from cluster_blas cimport *

cdef extern from "math.h":
float sqrtf(float num)
double sqrt(double num)

cdef extern from "numpy/npy_math.h":
cdef enum:
NPY_INFINITY

ctypedef np.float64_t float64_t
ctypedef np.float32_t float32_t
ctypedef np.int32_t int32_t

# Use Cython fused types for templating
# Define supported data types as vq_type
ctypedef fused vq_type:
float32_t
float64_t

# When the number of features is less than this number,
# switch back to the naive algorithm to avoid high overhead.
DEF NFEATURES_CUTOFF=5

# Initialize the NumPy C API
np.import_array()


cdef inline vq_type _sqrt(vq_type x):
if vq_type is float32_t:
return sqrtf(x)
else:
return sqrt(x)


cdef inline vq_type vec_sqr(int n, vq_type *p):
cdef vq_type result = 0.0
cdef int i
for i in range(n):
result += p[i] * p[i]
return result


cdef inline void cal_M(int nobs, int ncodes, int nfeat, vq_type *obs,
vq_type *code_book, vq_type *M):
"""
Calculate M = obs * code_book.T
"""
cdef vq_type alpha = -2.0, beta = 0.0

# Call BLAS functions with Fortran ABI
# Note that BLAS Fortran ABI uses column-major order
if vq_type is float32_t:
f_sgemm("T", "N", &ncodes, &nobs, &nfeat,
&alpha, code_book, &nfeat, obs, &nfeat, &beta, M, &ncodes)
else:
f_dgemm("T", "N", &ncodes, &nobs, &nfeat,
&alpha, code_book, &nfeat, obs, &nfeat, &beta, M, &ncodes)


cdef void _vq(vq_type *obs, vq_type *code_book,
int ncodes, int nfeat, int nobs,
int32_t *codes, vq_type *low_dist):
# Naive algorithm is prefered when nfeat is small
if nfeat < NFEATURES_CUTOFF:
_vq_small_nf(obs, code_book, ncodes, nfeat, nobs, codes, low_dist)
return

cdef int obs_index, code_index
cdef vq_type *p_obs
cdef vq_type *p_codes
cdef vq_type dist_sqr
cdef np.ndarray[vq_type, ndim=1] obs_sqr, codes_sqr
cdef np.ndarray[vq_type, ndim=2] M

if vq_type is float32_t:
obs_sqr = np.ndarray(nobs, np.float32)
codes_sqr = np.ndarray(ncodes, np.float32)
M = np.ndarray((nobs, ncodes), np.float32)
else:
obs_sqr = np.ndarray(nobs, np.float64)
codes_sqr = np.ndarray(ncodes, np.float64)
M = np.ndarray((nobs, ncodes), np.float64)

p_obs = obs
for obs_index in range(nobs):
# obs_sqr[i] is the inner product of the i-th observation with itself
obs_sqr[obs_index] = vec_sqr(nfeat, p_obs)
p_obs += nfeat

p_codes = code_book
for code_index in range(ncodes):
# codes_sqr[i] is the inner product of the i-th code with itself
codes_sqr[code_index] = vec_sqr(nfeat, p_codes)
p_codes += nfeat

# M[i][j] is the inner product of the i-th obs and j-th code
# M = obs * codes.T
cal_M(nobs, ncodes, nfeat, obs, code_book, <vq_type *>M.data)

for obs_index in range(nobs):
for code_index in range(ncodes):
dist_sqr = (M[obs_index, code_index] +
obs_sqr[obs_index] + codes_sqr[code_index])
if dist_sqr < low_dist[obs_index]:
codes[obs_index] = code_index
low_dist[obs_index] = dist_sqr

# dist_sqr may be negative due to float point errors
if low_dist[obs_index] > 0:
low_dist[obs_index] = _sqrt(low_dist[obs_index])
else:
low_dist[obs_index] = 0


cdef void _vq_small_nf(vq_type *obs, vq_type *code_book,
int ncodes, int nfeat, int nobs,
int32_t *codes, vq_type *low_dist):
"""
Vector quantization for float32 using naive algorithm.
This is prefered when nfeat is small.
"""
# Temporary variables
cdef vq_type dist_sqr, diff
cdef int32_t obs_index, code_index, feature
cdef int32_t offset = 0

# Index and pointer to keep track of the current position in
# both arrays so that we don't have to always do index * nfeat.
cdef int codebook_pos
cdef vq_type *current_obs

for obs_index in range(nobs):
codebook_pos = 0
for code_index in range(ncodes):
dist_sqr = 0

# Distance between code_book[code_index] and obs[obs_index]
for feature in range(nfeat):
# Use current_obs pointer and codebook_pos to minimize
# pointer arithmetic necessary (i.e. no multiplications)
current_obs = &(obs[offset])
diff = code_book[codebook_pos] - current_obs[feature]
dist_sqr += diff * diff
codebook_pos += 1

# Replace the code assignment and record distance if necessary
if dist_sqr < low_dist[obs_index]:
codes[obs_index] = code_index
low_dist[obs_index] = dist_sqr

low_dist[obs_index] = _sqrt(low_dist[obs_index])

# Update the offset of the current observation
offset += nfeat


def vq(np.ndarray obs, np.ndarray codes):
"""
Vector quantization ndarray wrapper. Only support float32 and float64.
Parameters
----------
obs : ndarray
The observation matrix. Each row is an observation.
codes : ndarray
The code book matrix.
Notes
-----
The observation matrix and code book matrix should have same rank and
same number of columns (features). Only rank 1 and rank 2 are supported.
"""
cdef int nobs, ncodes, nfeat
cdef np.ndarray obs_a, codes_a
cdef np.ndarray outcodes, outdists
cdef int flags = np.NPY_CONTIGUOUS | np.NPY_NOTSWAPPED | np.NPY_ALIGNED

# Ensure the arrays are contiguous - done in C to minimize overhead.
obs_a = np.PyArray_FROM_OF(obs, flags)
codes_a = np.PyArray_FROM_OF(codes, flags)

if obs_a.ndim != codes_a.ndim:
raise ValueError('observation and code should have same rank')

if obs_a.ndim == 1:
nfeat = 1
nobs = obs_a.shape[0]
ncodes = codes_a.shape[0]
elif obs_a.ndim == 2:
nfeat = obs_a.shape[1]
nobs = obs_a.shape[0]
ncodes = codes_a.shape[0]
if nfeat != codes_a.shape[1]:
raise ValueError('obs and code should have same number of '
'features (columns)')
else:
raise ValueError('rank different than 1 or 2 are not supported')

# Initialize outdists and outcodes array.
# Outdists should be initialized as INF.
outdists = np.empty((nobs,), dtype=obs.dtype)
outdists.fill(NPY_INFINITY)
outcodes = np.empty((nobs,), dtype=np.int32)

if obs.dtype == np.float32:
_vq(<float32_t *>obs_a.data, <float32_t *>codes_a.data,
ncodes, nfeat, nobs, <int32_t *>outcodes.data,
<float32_t *>outdists.data)
elif obs.dtype == np.float64:
_vq(<float64_t *>obs_a.data, <float64_t *>codes_a.data,
ncodes, nfeat, nobs, <int32_t *>outcodes.data,
<float64_t *>outdists.data)
else:
raise ValueError('type other than float or double not supported')

return outcodes, outdists

130 changes: 0 additions & 130 deletions scipy/cluster/_vq_rewrite.pyx

This file was deleted.

4 changes: 3 additions & 1 deletion scipy/cluster/bento.info
@@ -1,7 +1,9 @@
HookFile: bscript

Library:
Extension: _hierarchy_wrap
Sources:
src/hierarchy_wrap.c, src/hierarchy.c
Extension: _vq
Sources:
src/vq_module.c, src/vq.c
_vq.c
6 changes: 6 additions & 0 deletions scipy/cluster/bscript
@@ -0,0 +1,6 @@
from bento.commands import hooks

@hooks.pre_build
def pre_build(context):
context.tweak_extension("_vq",
use="LAPACK CLIB")

0 comments on commit 99aff1e

Please sign in to comment.