Permalink
Browse files

FIX: second argument in euclidean_distances.

This method failed when second argument was not given. Slightly
changed the API to always take a second argument without speed
penalization and add optional argument axis.

Added test.
  • Loading branch information...
1 parent c8f536e commit dc956b4d0250a67459af145934a6583c71526820 @fabianp fabianp committed Dec 16, 2010
Showing with 29 additions and 15 deletions.
  1. +28 −15 scikits/learn/metrics/pairwise.py
  2. +1 −0 scikits/learn/metrics/tests/test_pairwise.py
View
43 scikits/learn/metrics/pairwise.py
@@ -9,36 +9,49 @@
import numpy as np
-def euclidian_distances(X, Y=None):
+def euclidian_distances(X, Y):
"""
Considering the rows of X (and Y=X) as vectors, compute the
- distance matrix between each pair of vector
+ distance matrix between each pair of vectors.
Parameters
----------
- X, array of shape (n_samples_1, n_features)
+ X: array of shape (n_samples_1, n_features)
- Y, array of shape (n_samples_2, n_features), default None
- if Y is None, then Y=X is used instead
+ Y: array of shape (n_samples_2, n_features)
Returns
-------
- distances, array of shape (n_samples_1, n_samples_2)
- """
+ distances: array of shape (n_samples_1, n_samples_2)
+
+ Examples
+ --------
+ >>> X = [[0, 1], [1, 1]]
+ >>> # distrance between rows of X
+ >>> euclidian_distances(X, X)
+ array([[ 0., 1.],
+ [ 1., 0.]])
+ >>> # get distance to origin
+ >>> euclidian_distances(X, [[0, 0]])
+ array([[ 1. ],
+ [ 1.41421356]])
+ """
+ # shortcut in the common case euclidean_distances(X, X)
+ compute_Y = X is not Y
+
X = np.asanyarray(X)
Y = np.asanyarray(Y)
- if Y is None:
- Y = X
+
if X.shape[1] != Y.shape[1]:
- raise ValueError, "incompatible dimension for X and Y matrices"
+ raise ValueError("Incompatible dimension for X and Y matrices")
XX = np.sum(X * X, axis=1)[:, np.newaxis]
- if Y is None:
- YY = XX.T
- else:
+ if compute_Y:
YY = np.sum(Y * Y, axis=1)[np.newaxis, :]
+ else:
+ YY = XX.T
+
distances = XX + YY # Using broadcasting
distances -= 2 * np.dot(X, Y.T)
distances = np.maximum(distances, 0)
- distances = np.sqrt(distances)
- return distances
+ return np.sqrt(distances)
View
1 scikits/learn/metrics/tests/test_pairwise.py
@@ -9,3 +9,4 @@ def test_euclidian_distances():
Y = [[1], [2]]
D = euclidian_distances(X, Y)
assert_array_almost_equal(D, [[1., 2.]])
+

0 comments on commit dc956b4

Please sign in to comment.