Skip to content

Commit

Permalink
BUG: spatial: fix Canberra distance. Closes #1430.
Browse files Browse the repository at this point in the history
Also fix potential problems with integer division in braycurtis and kulsinski.
  • Loading branch information
rgommers committed Jun 12, 2011
1 parent fba8f78 commit 32f9e3d
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 20 deletions.
31 changes: 22 additions & 9 deletions scipy/spatial/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ def kulsinski(u, v):
"""
u = np.asarray(u, order='c')
v = np.asarray(v, order='c')
n = len(u)
n = float(len(u))
(nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)

return (ntf + nft - ntt + n) / (ntf + nft + n)
Expand All @@ -441,7 +441,7 @@ def seuclidean(u, v, V):
"""
u = np.asarray(u, order='c')
v = np.asarray(v, order='c')
V = np.asarray(V, order='c')
V = np.asarray(V, order='c', dtype=np.float64)
if len(V.shape) != 1 or V.shape[0] != u.shape[0] or u.shape[0] != v.shape[0]:
raise TypeError('V must be a 1-D array of the same dimension as u and v.')
return np.sqrt(((u-v)**2 / V).sum())
Expand Down Expand Up @@ -533,6 +533,9 @@ def braycurtis(u, v):
\sum{|u_i-v_i|} / \sum{|u_i+v_i|}.
The Bray-Curtis distance is in the range [0, 1] if all coordinates are
positive, and is undefined if the inputs are of length zero.
Parameters
----------
u : ndarray
Expand All @@ -546,8 +549,8 @@ def braycurtis(u, v):
The Bray-Curtis distance between vectors ``u`` and ``v``.
"""
u = np.asarray(u, order='c')
v = np.asarray(v, order='c')
return abs(u-v).sum() / abs(u+v).sum()
v = np.asarray(v, order='c', dtype=np.float64)
return abs(u - v).sum() / abs(u + v).sum()

def canberra(u, v):
r"""
Expand All @@ -556,9 +559,8 @@ def canberra(u, v):
.. math::
\frac{\sum_i {|u_i-v_i|}}
{\sum_i {|u_i|+|v_i|}}.
\sum_u \frac{|u_i-v_i|}
{(|u_i|+|v_i|)}.
Parameters
----------
Expand All @@ -571,10 +573,21 @@ def canberra(u, v):
-------
d : double
The Canberra distance between vectors ``u`` and ``v``.
Notes
-----
Whe u[i] and v[i] are 0 for given i, then the fraction 0/0 = 0 is used in
the calculation.
"""
u = np.asarray(u, order='c')
v = np.asarray(v, order='c')
return abs(u-v).sum() / (abs(u).sum() + abs(v).sum())
v = np.asarray(v, order='c', dtype=np.float64)
olderr = np.seterr(invalid='ignore')
try:
d = np.nansum(abs(u - v) / (abs(u) + abs(v)))
finally:
np.seterr(**olderr)
return d

def _nbool_correspond_all(u, v):
if u.dtype != v.dtype:
Expand Down
12 changes: 7 additions & 5 deletions scipy/spatial/src/distance.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,15 @@ static NPY_INLINE double chebyshev_distance(const double *u, const double *v, in

static NPY_INLINE double canberra_distance(const double *u, const double *v, int n) {
int i;
double snum = 0.0, sdenom_u = 0.0, sdenom_v = 0.0;
double snum = 0.0, sdenom = 0.0, tot = 0.0;
for (i = 0; i < n; i++) {
snum += fabs(u[i] - v[i]);
sdenom_u += fabs(u[i]);
sdenom_v += fabs(v[i]);
snum = fabs(u[i] - v[i]);
sdenom = fabs(u[i]) + fabs(v[i]);
if (sdenom > 0.0) {
tot += snum / sdenom;
}
}
return snum / (sdenom_u + sdenom_v);
return tot;
}

static NPY_INLINE double bray_curtis_distance(const double *u, const double *v, int n) {
Expand Down
23 changes: 17 additions & 6 deletions scipy/spatial/tests/test_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,12 @@

import numpy as np
from numpy.testing import verbose, TestCase, run_module_suite, \
assert_raises, assert_array_equal
assert_raises, assert_array_equal, assert_equal, assert_almost_equal
from scipy.spatial.distance import squareform, pdist, cdist, matching, \
jaccard, dice, sokalsneath, rogerstanimoto, \
russellrao, yule, num_obs_y, num_obs_dm, \
is_valid_dm, is_valid_y, wminkowski
is_valid_dm, is_valid_y, wminkowski, \
canberra, braycurtis

_filenames = ["iris.txt",
"cdist-X1.txt",
Expand Down Expand Up @@ -118,7 +119,7 @@ def test_cdist_euclidean_random(self):
if verbose > 2:
print (Y1-Y2).max()
self.assertTrue(within_tol(Y1, Y2, eps))

def test_cdist_euclidean_random_unicode(self):
"Tests cdist(X, u'euclidean') using unicode metric string"
eps = 1e-07
Expand All @@ -130,7 +131,7 @@ def test_cdist_euclidean_random_unicode(self):
if verbose > 2:
print (Y1-Y2).max()
self.assertTrue(within_tol(Y1, Y2, eps))

def test_cdist_sqeuclidean_random(self):
"Tests cdist(X, 'sqeuclidean') on random data."
eps = 1e-07
Expand Down Expand Up @@ -477,7 +478,7 @@ def test_pdist_euclidean_random(self):

Y_test1 = pdist(X, 'euclidean')
self.assertTrue(within_tol(Y_test1, Y_right, eps))

def test_pdist_euclidean_random_u(self):
"Tests pdist(X, 'euclidean') with unicode metric string"
eps = 1e-07
Expand All @@ -487,7 +488,7 @@ def test_pdist_euclidean_random_u(self):

Y_test1 = pdist(X, u'euclidean')
self.assertTrue(within_tol(Y_test1, Y_right, eps))

def test_pdist_euclidean_random_float32(self):
"Tests pdist(X, 'euclidean') on random data (float32)."
eps = 1e-07
Expand Down Expand Up @@ -1734,6 +1735,16 @@ def test_sokalsneath_all_false():
"""Regression test for ticket #876"""
assert_raises(ValueError, sokalsneath, [False, False, False], [False, False, False])

def test_canberra():
"""Regression test for ticket #1430."""
assert_equal(canberra([1,2,3], [2,4,6]), 1)
assert_equal(canberra([1,1,0,0], [1,0,1,0]), 2)

def test_braycurtis():
"""Regression test for ticket #1430."""
assert_almost_equal(braycurtis([1,2,3], [2,4,6]), 1./3, decimal=15)
assert_almost_equal(braycurtis([1,1,0,0], [1,0,1,0]), 0.5, decimal=15)


if __name__=="__main__":
run_module_suite()

0 comments on commit 32f9e3d

Please sign in to comment.