Permalink
Browse files

BUG: spatial: fix Canberra distance. Closes #1430.

Also fix potential problems with integer division in braycurtis and kulsinski.
  • Loading branch information...
1 parent fba8f78 commit 32f9e3d8e0154da1b9413a9d2c6ebec578ee29af @rgommers rgommers committed Jun 12, 2011
Showing with 46 additions and 20 deletions.
  1. +22 −9 scipy/spatial/distance.py
  2. +7 −5 scipy/spatial/src/distance.c
  3. +17 −6 scipy/spatial/tests/test_distance.py
View
31 scipy/spatial/distance.py
@@ -415,7 +415,7 @@ def kulsinski(u, v):
"""
u = np.asarray(u, order='c')
v = np.asarray(v, order='c')
- n = len(u)
+ n = float(len(u))
(nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
return (ntf + nft - ntt + n) / (ntf + nft + n)
@@ -441,7 +441,7 @@ def seuclidean(u, v, V):
"""
u = np.asarray(u, order='c')
v = np.asarray(v, order='c')
- V = np.asarray(V, order='c')
+ V = np.asarray(V, order='c', dtype=np.float64)
if len(V.shape) != 1 or V.shape[0] != u.shape[0] or u.shape[0] != v.shape[0]:
raise TypeError('V must be a 1-D array of the same dimension as u and v.')
return np.sqrt(((u-v)**2 / V).sum())
@@ -533,6 +533,9 @@ def braycurtis(u, v):
\sum{|u_i-v_i|} / \sum{|u_i+v_i|}.
+ The Bray-Curtis distance is in the range [0, 1] if all coordinates are
+ positive, and is undefined if the inputs are of length zero.
+
Parameters
----------
u : ndarray
@@ -546,8 +549,8 @@ def braycurtis(u, v):
The Bray-Curtis distance between vectors ``u`` and ``v``.
"""
u = np.asarray(u, order='c')
- v = np.asarray(v, order='c')
- return abs(u-v).sum() / abs(u+v).sum()
+ v = np.asarray(v, order='c', dtype=np.float64)
+ return abs(u - v).sum() / abs(u + v).sum()
def canberra(u, v):
r"""
@@ -556,9 +559,8 @@ def canberra(u, v):
.. math::
- \frac{\sum_i {|u_i-v_i|}}
- {\sum_i {|u_i|+|v_i|}}.
-
+ \sum_u \frac{|u_i-v_i|}
+ {(|u_i|+|v_i|)}.
Parameters
----------
@@ -571,10 +573,21 @@ def canberra(u, v):
-------
d : double
The Canberra distance between vectors ``u`` and ``v``.
+
+ Notes
+ -----
+ Whe u[i] and v[i] are 0 for given i, then the fraction 0/0 = 0 is used in
+ the calculation.
+
"""
u = np.asarray(u, order='c')
- v = np.asarray(v, order='c')
- return abs(u-v).sum() / (abs(u).sum() + abs(v).sum())
+ v = np.asarray(v, order='c', dtype=np.float64)
+ olderr = np.seterr(invalid='ignore')
+ try:
+ d = np.nansum(abs(u - v) / (abs(u) + abs(v)))
+ finally:
+ np.seterr(**olderr)
+ return d
def _nbool_correspond_all(u, v):
if u.dtype != v.dtype:
View
12 scipy/spatial/src/distance.c
@@ -75,13 +75,15 @@ static NPY_INLINE double chebyshev_distance(const double *u, const double *v, in
static NPY_INLINE double canberra_distance(const double *u, const double *v, int n) {
int i;
- double snum = 0.0, sdenom_u = 0.0, sdenom_v = 0.0;
+ double snum = 0.0, sdenom = 0.0, tot = 0.0;
for (i = 0; i < n; i++) {
- snum += fabs(u[i] - v[i]);
- sdenom_u += fabs(u[i]);
- sdenom_v += fabs(v[i]);
+ snum = fabs(u[i] - v[i]);
+ sdenom = fabs(u[i]) + fabs(v[i]);
+ if (sdenom > 0.0) {
+ tot += snum / sdenom;
+ }
}
- return snum / (sdenom_u + sdenom_v);
+ return tot;
}
static NPY_INLINE double bray_curtis_distance(const double *u, const double *v, int n) {
View
23 scipy/spatial/tests/test_distance.py
@@ -38,11 +38,12 @@
import numpy as np
from numpy.testing import verbose, TestCase, run_module_suite, \
- assert_raises, assert_array_equal
+ assert_raises, assert_array_equal, assert_equal, assert_almost_equal
from scipy.spatial.distance import squareform, pdist, cdist, matching, \
jaccard, dice, sokalsneath, rogerstanimoto, \
russellrao, yule, num_obs_y, num_obs_dm, \
- is_valid_dm, is_valid_y, wminkowski
+ is_valid_dm, is_valid_y, wminkowski, \
+ canberra, braycurtis
_filenames = ["iris.txt",
"cdist-X1.txt",
@@ -118,7 +119,7 @@ def test_cdist_euclidean_random(self):
if verbose > 2:
print (Y1-Y2).max()
self.assertTrue(within_tol(Y1, Y2, eps))
-
+
def test_cdist_euclidean_random_unicode(self):
"Tests cdist(X, u'euclidean') using unicode metric string"
eps = 1e-07
@@ -130,7 +131,7 @@ def test_cdist_euclidean_random_unicode(self):
if verbose > 2:
print (Y1-Y2).max()
self.assertTrue(within_tol(Y1, Y2, eps))
-
+
def test_cdist_sqeuclidean_random(self):
"Tests cdist(X, 'sqeuclidean') on random data."
eps = 1e-07
@@ -477,7 +478,7 @@ def test_pdist_euclidean_random(self):
Y_test1 = pdist(X, 'euclidean')
self.assertTrue(within_tol(Y_test1, Y_right, eps))
-
+
def test_pdist_euclidean_random_u(self):
"Tests pdist(X, 'euclidean') with unicode metric string"
eps = 1e-07
@@ -487,7 +488,7 @@ def test_pdist_euclidean_random_u(self):
Y_test1 = pdist(X, u'euclidean')
self.assertTrue(within_tol(Y_test1, Y_right, eps))
-
+
def test_pdist_euclidean_random_float32(self):
"Tests pdist(X, 'euclidean') on random data (float32)."
eps = 1e-07
@@ -1734,6 +1735,16 @@ def test_sokalsneath_all_false():
"""Regression test for ticket #876"""
assert_raises(ValueError, sokalsneath, [False, False, False], [False, False, False])
+def test_canberra():
+ """Regression test for ticket #1430."""
+ assert_equal(canberra([1,2,3], [2,4,6]), 1)
+ assert_equal(canberra([1,1,0,0], [1,0,1,0]), 2)
+
+def test_braycurtis():
+ """Regression test for ticket #1430."""
+ assert_almost_equal(braycurtis([1,2,3], [2,4,6]), 1./3, decimal=15)
+ assert_almost_equal(braycurtis([1,1,0,0], [1,0,1,0]), 0.5, decimal=15)
+
if __name__=="__main__":
run_module_suite()

0 comments on commit 32f9e3d

Please sign in to comment.