Skip to content

Commit

Permalink
Merge pull request #12375 from peterbell10/minkowski-fast-path
Browse files Browse the repository at this point in the history
ENH: Add fast path for minkowski distance with p=1,2 and support p=inf
  • Loading branch information
rgommers committed Aug 9, 2020
2 parents 5309179 + b3b1672 commit 28d4244
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 101 deletions.
20 changes: 12 additions & 8 deletions benchmarks/benchmarks/spatial.py
Expand Up @@ -346,30 +346,34 @@ class Xdist(Benchmark):
'seuclidean', 'sqeuclidean', 'cosine', 'correlation', 'hamming', 'jaccard',
'jensenshannon', 'chebyshev', 'canberra', 'braycurtis', 'mahalanobis',
'yule', 'dice', 'kulsinski', 'rogerstanimoto', 'russellrao',
'sokalmichener', 'sokalsneath', 'wminkowski'])
'sokalmichener', 'sokalsneath', 'wminkowski', 'minkowski-P3'])
param_names = ['num_points', 'metric']

def setup(self, num_points, metric):
np.random.seed(123)
self.points = np.random.random_sample((num_points, 3))
# use an equal weight vector to satisfy those metrics
# that require weights
if metric == 'wminkowski':
self.w = np.ones(3)
self.metric = metric
if metric == 'minkowski-P3':
# p=2 is just the euclidean metric, try another p value as well
self.kwargs = {'p': 3.0}
self.metric = 'minkowski'
elif metric == 'wminkowski':
# use an equal weight vector since weights are required
self.kwargs = {'w': np.ones(3)}
else:
self.w = None
self.kwargs = {}

def time_cdist(self, num_points, metric):
"""Time scipy.spatial.distance.cdist over a range of input data
sizes and metrics.
"""
distance.cdist(self.points, self.points, metric, w=self.w)
distance.cdist(self.points, self.points, self.metric, **self.kwargs)

def time_pdist(self, num_points, metric):
"""Time scipy.spatial.distance.pdist over a range of input data
sizes and metrics.
"""
distance.pdist(self.points, metric, w=self.w)
distance.pdist(self.points, self.metric, **self.kwargs)


class ConvexHullBench(Benchmark):
Expand Down
8 changes: 7 additions & 1 deletion scipy/spatial/distance.py
Expand Up @@ -285,6 +285,10 @@ def _validate_mahalanobis_kwargs(X, m, n, **kwargs):
def _validate_minkowski_kwargs(X, m, n, **kwargs):
if 'p' not in kwargs:
kwargs['p'] = 2.
else:
if kwargs['p'] < 1:
raise ValueError("p must be at least 1")

return kwargs


Expand Down Expand Up @@ -508,9 +512,11 @@ def minkowski(u, v, p=2, w=None):
w = _validate_weights(w)
if p == 1:
root_w = w
if p == 2:
elif p == 2:
# better precision and speed
root_w = np.sqrt(w)
elif p == np.inf:
root_w = (w != 0)
else:
root_w = np.power(w, 1/p)
u_v = root_w * u_v
Expand Down
4 changes: 3 additions & 1 deletion scipy/spatial/setup.py
Expand Up @@ -70,7 +70,9 @@ def configuration(parent_package='', top_path=None):
config.add_extension('_distance_wrap',
sources=[join('src', 'distance_wrap.c')],
depends=[join('src', 'distance_impl.h')],
include_dirs=[get_numpy_include_dirs()],
include_dirs=[
get_numpy_include_dirs(),
join(dirname(dirname(__file__)), '_lib')],
extra_info=get_misc_info("npymath"))

config.add_extension('_voronoi',
Expand Down
163 changes: 91 additions & 72 deletions scipy/spatial/src/distance_impl.h
Expand Up @@ -31,6 +31,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "_c99compat.h"


static NPY_INLINE void
Expand Down Expand Up @@ -425,6 +426,78 @@ compute_mean_vector(double *res, const double *X, npy_intp num_rows, const npy_i
}
#endif

#define DEFINE_CDIST(name, type) \
static int cdist_ ## name ## _ ## type(const type *XA, const type *XB, \
double *dm, \
const npy_intp num_rowsA, \
const npy_intp num_rowsB, \
const npy_intp num_cols) \
{ \
Py_ssize_t i, j; \
for (i = 0; i < num_rowsA; ++i) { \
const type *u = XA + num_cols * i; \
for (j = 0; j < num_rowsB; ++j, ++dm) { \
const type *v = XB + num_cols * j; \
*dm = name ## _distance_ ## type(u, v, num_cols); \
} \
} \
return 0;\
}

DEFINE_CDIST(bray_curtis, double)
DEFINE_CDIST(canberra, double)
DEFINE_CDIST(chebyshev, double)
DEFINE_CDIST(city_block, double)
DEFINE_CDIST(euclidean, double)
DEFINE_CDIST(jaccard, double)
DEFINE_CDIST(jensenshannon, double)
DEFINE_CDIST(sqeuclidean, double)

DEFINE_CDIST(dice, char)
DEFINE_CDIST(jaccard, char)
DEFINE_CDIST(kulsinski, char)
DEFINE_CDIST(rogerstanimoto, char)
DEFINE_CDIST(russellrao, char)
DEFINE_CDIST(sokalmichener, char)
DEFINE_CDIST(sokalsneath, char)
DEFINE_CDIST(yule, char)


#define DEFINE_PDIST(name, type) \
static int pdist_ ## name ## _ ## type(const type *X, double *dm, \
const npy_intp num_rows, \
const npy_intp num_cols) \
{ \
Py_ssize_t i, j; \
double *it = dm; \
for (i = 0; i < num_rows; ++i) { \
const type *u = X + num_cols * i; \
for (j = i + 1; j < num_rows; ++j, it++) { \
const type *v = X + num_cols * j; \
*it = name ## _distance_ ## type(u, v, num_cols); \
} \
} \
return 0; \
}

DEFINE_PDIST(bray_curtis, double)
DEFINE_PDIST(canberra, double)
DEFINE_PDIST(chebyshev, double)
DEFINE_PDIST(city_block, double)
DEFINE_PDIST(euclidean, double)
DEFINE_PDIST(jaccard, double)
DEFINE_PDIST(jensenshannon, double)
DEFINE_PDIST(sqeuclidean, double)

DEFINE_PDIST(dice, char)
DEFINE_PDIST(jaccard, char)
DEFINE_PDIST(kulsinski, char)
DEFINE_PDIST(rogerstanimoto, char)
DEFINE_PDIST(russellrao, char)
DEFINE_PDIST(sokalmichener, char)
DEFINE_PDIST(sokalsneath, char)
DEFINE_PDIST(yule, char)

static NPY_INLINE int
pdist_mahalanobis(const double *X, double *dm, const npy_intp num_rows,
const npy_intp num_cols, const double *covinv)
Expand Down Expand Up @@ -499,6 +572,15 @@ pdist_minkowski(const double *X, double *dm, npy_intp num_rows,
const npy_intp num_cols, const double p)
{
npy_intp i, j;
if (p == 1.0) {
return pdist_city_block_double(X, dm, num_rows, num_cols);
}
if (p == 2.0) {
return pdist_euclidean_double(X, dm, num_rows, num_cols);
}
if (sc_isinf(p)) {
return pdist_chebyshev_double(X, dm, num_rows, num_cols);
}

for (i = 0; i < num_rows; ++i) {
const double *u = X + (num_cols * i);
Expand Down Expand Up @@ -693,6 +775,15 @@ cdist_minkowski(const double *XA, const double *XB, double *dm,
const npy_intp num_cols, const double p)
{
npy_intp i, j;
if (p == 1.0) {
return cdist_city_block_double(XA, XB, dm, num_rowsA, num_rowsB, num_cols);
}
if (p == 2.0) {
return cdist_euclidean_double(XA, XB, dm, num_rowsA, num_rowsB, num_cols);
}
if (sc_isinf(p)) {
return cdist_chebyshev_double(XA, XB, dm, num_rowsA, num_rowsB, num_cols);
}

for (i = 0; i < num_rowsA; ++i) {
const double *u = XA + (num_cols * i);
Expand Down Expand Up @@ -757,75 +848,3 @@ cdist_hamming_char(const char *XA, const char *XB, double *dm,
}
return 0;
}

#define DEFINE_CDIST(name, type) \
static int cdist_ ## name ## _ ## type(const type *XA, const type *XB, \
double *dm, \
const npy_intp num_rowsA, \
const npy_intp num_rowsB, \
const npy_intp num_cols) \
{ \
Py_ssize_t i, j; \
for (i = 0; i < num_rowsA; ++i) { \
const type *u = XA + num_cols * i; \
for (j = 0; j < num_rowsB; ++j, ++dm) { \
const type *v = XB + num_cols * j; \
*dm = name ## _distance_ ## type(u, v, num_cols); \
} \
} \
return 0;\
}

DEFINE_CDIST(bray_curtis, double)
DEFINE_CDIST(canberra, double)
DEFINE_CDIST(chebyshev, double)
DEFINE_CDIST(city_block, double)
DEFINE_CDIST(euclidean, double)
DEFINE_CDIST(jaccard, double)
DEFINE_CDIST(jensenshannon, double)
DEFINE_CDIST(sqeuclidean, double)

DEFINE_CDIST(dice, char)
DEFINE_CDIST(jaccard, char)
DEFINE_CDIST(kulsinski, char)
DEFINE_CDIST(rogerstanimoto, char)
DEFINE_CDIST(russellrao, char)
DEFINE_CDIST(sokalmichener, char)
DEFINE_CDIST(sokalsneath, char)
DEFINE_CDIST(yule, char)


#define DEFINE_PDIST(name, type) \
static int pdist_ ## name ## _ ## type(const type *X, double *dm, \
const npy_intp num_rows, \
const npy_intp num_cols) \
{ \
Py_ssize_t i, j; \
double *it = dm; \
for (i = 0; i < num_rows; ++i) { \
const type *u = X + num_cols * i; \
for (j = i + 1; j < num_rows; ++j, it++) { \
const type *v = X + num_cols * j; \
*it = name ## _distance_ ## type(u, v, num_cols); \
} \
} \
return 0; \
}

DEFINE_PDIST(bray_curtis, double)
DEFINE_PDIST(canberra, double)
DEFINE_PDIST(chebyshev, double)
DEFINE_PDIST(city_block, double)
DEFINE_PDIST(euclidean, double)
DEFINE_PDIST(jaccard, double)
DEFINE_PDIST(jensenshannon, double)
DEFINE_PDIST(sqeuclidean, double)

DEFINE_PDIST(dice, char)
DEFINE_PDIST(jaccard, char)
DEFINE_PDIST(kulsinski, char)
DEFINE_PDIST(rogerstanimoto, char)
DEFINE_PDIST(russellrao, char)
DEFINE_PDIST(sokalmichener, char)
DEFINE_PDIST(sokalsneath, char)
DEFINE_PDIST(yule, char)
31 changes: 12 additions & 19 deletions scipy/spatial/tests/test_distance.py
Expand Up @@ -442,28 +442,13 @@ def test_cdist_euclidean_random_unicode(self):
Y2 = wcdist_no_const(X1, X2, 'test_euclidean')
_assert_within_tol(Y1, Y2, eps, verbose > 2)

def test_cdist_minkowski_random_p3d8(self):
@pytest.mark.parametrize("p", [1.0, 1.23, 2.0, 3.8, 4.6, np.inf])
def test_cdist_minkowski_random(self, p):
eps = 1e-07
X1 = eo['cdist-X1']
X2 = eo['cdist-X2']
Y1 = wcdist_no_const(X1, X2, 'minkowski', p=3.8)
Y2 = wcdist_no_const(X1, X2, 'test_minkowski', p=3.8)
_assert_within_tol(Y1, Y2, eps, verbose > 2)

def test_cdist_minkowski_random_p4d6(self):
eps = 1e-07
X1 = eo['cdist-X1']
X2 = eo['cdist-X2']
Y1 = wcdist_no_const(X1, X2, 'minkowski', p=4.6)
Y2 = wcdist_no_const(X1, X2, 'test_minkowski', p=4.6)
_assert_within_tol(Y1, Y2, eps, verbose > 2)

def test_cdist_minkowski_random_p1d23(self):
eps = 1e-07
X1 = eo['cdist-X1']
X2 = eo['cdist-X2']
Y1 = wcdist_no_const(X1, X2, 'minkowski', p=1.23)
Y2 = wcdist_no_const(X1, X2, 'test_minkowski', p=1.23)
Y1 = wcdist_no_const(X1, X2, 'minkowski', p=p)
Y2 = wcdist_no_const(X1, X2, 'test_minkowski', p=p)
_assert_within_tol(Y1, Y2, eps, verbose > 2)

def test_cdist_cosine_random(self):
Expand Down Expand Up @@ -949,6 +934,14 @@ def test_pdist_correlation_iris_nonC(self):
Y_test2 = wpdist(X, 'test_correlation')
_assert_within_tol(Y_test2, Y_right, eps)

@pytest.mark.parametrize("p", [1.0, 2.0, 3.2, np.inf])
def test_pdist_minkowski_random_p(self, p):
eps = 1e-05
X = eo['pdist-double-inp']
Y1 = wpdist_no_const(X, 'minkowski', p=p)
Y2 = wpdist_no_const(X, 'test_minkowski', p=p)
_assert_within_tol(Y1, Y2, eps)

def test_pdist_minkowski_random(self):
eps = 1e-05
X = eo['pdist-double-inp']
Expand Down

0 comments on commit 28d4244

Please sign in to comment.