Skip to content
This repository
Browse code

BUG: spatial:

* Improved validation of the arguments to the distance functions, so that only "1-D-like" arrays are accepted.
* Simplified some of the distance computations.
* Cleaned up the code a bit.
* Added tests.
This commit should fix ticket #1328.
  • Loading branch information...
commit 6542a8c034b5211ed3f426d2f2b562c9f92786a6 1 parent 5698591
Warren Weckesser authored June 12, 2011
370  scipy/spatial/distance.py
@@ -112,6 +112,7 @@
112 112
 
113 113
 import warnings
114 114
 import numpy as np
  115
+from numpy.linalg import norm
115 116
 
116 117
 import _distance_wrap
117 118
 
@@ -127,6 +128,7 @@ def _copy_array_if_base_present(a):
127 128
     else:
128 129
         return a
129 130
 
  131
+
130 132
 def _copy_arrays_if_base_present(T):
131 133
     """
132 134
     Accepts a tuple of arrays T. Copies the array T[i] if its base array
@@ -137,6 +139,7 @@ def _copy_arrays_if_base_present(T):
137 139
     l = [_copy_array_if_base_present(a) for a in T]
138 140
     return l
139 141
 
  142
+
140 143
 def _convert_to_bool(X):
141 144
     if X.dtype != np.bool:
142 145
         X = np.bool_(X)
@@ -144,6 +147,7 @@ def _convert_to_bool(X):
144 147
         X = X.copy()
145 148
     return X
146 149
 
  150
+
147 151
 def _convert_to_double(X):
148 152
     if X.dtype != np.double:
149 153
         X = np.double(X)
@@ -151,6 +155,17 @@ def _convert_to_double(X):
151 155
         X = X.copy()
152 156
     return X
153 157
 
  158
+
  159
+def _validate_vector(u, dtype=None):
  160
+    # XXX Is order='c' really necessary?
  161
+    u = np.asarray(u, dtype=dtype, order='c').squeeze()
  162
+    # Ensure values such as u=1 and u=[1] still return 1-D arrays.
  163
+    u = np.atleast_1d(u)
  164
+    if u.ndim > 1:
  165
+        raise ValueError("Input vector should be 1-D.")
  166
+    return u
  167
+
  168
+
154 169
 def minkowski(u, v, p):
155 170
     r"""
156 171
     Computes the Minkowski distance between two vectors ``u`` and ``v``,
@@ -166,19 +181,21 @@ def minkowski(u, v, p):
166 181
         An n-dimensional vector.
167 182
     v : ndarray
168 183
         An n-dimensional vector.
169  
-    p : ndarray
170  
-        The norm of the difference :math:`{||u-v||}_p`.
  184
+    p : int
  185
+        The order of the norm of the difference :math:`{||u-v||}_p`.
171 186
 
172 187
     Returns
173 188
     -------
174 189
     d : double
175 190
         The Minkowski distance between vectors ``u`` and ``v``.
176 191
     """
177  
-    u = np.asarray(u, order='c')
178  
-    v = np.asarray(v, order='c')
  192
+    u = _validate_vector(u)
  193
+    v = _validate_vector(v)
179 194
     if p < 1:
180 195
         raise ValueError("p must be at least 1")
181  
-    return (abs(u-v)**p).sum() ** (1.0 / p)
  196
+    dist = norm(u - v, ord=p)
  197
+    return dist
  198
+
182 199
 
183 200
 def wminkowski(u, v, p, w):
184 201
     r"""
@@ -195,8 +212,8 @@ def wminkowski(u, v, p, w):
195 212
         An :math:`n`-dimensional vector.
196 213
     v : ndarray
197 214
         An :math:`n`-dimensional vector.
198  
-    p : ndarray
199  
-        The norm of the difference :math:`{||u-v||}_p`.
  215
+    p : int
  216
+        The order of the norm of the difference :math:`{||u-v||}_p`.
200 217
     w : ndarray
201 218
         The weight vector.
202 219
 
@@ -205,12 +222,14 @@ def wminkowski(u, v, p, w):
205 222
     d : double
206 223
         The Minkowski distance between vectors ``u`` and ``v``.
207 224
     """
208  
-    u = np.asarray(u, order='c')
209  
-    v = np.asarray(v, order='c')
210  
-    w = np.asarray(w)
  225
+    u = _validate_vector(u)
  226
+    v = _validate_vector(v)
  227
+    w = _validate_vector(w)
211 228
     if p < 1:
212 229
         raise ValueError("p must be at least 1")
213  
-    return ((w * abs(u-v))**p).sum() ** (1.0 / p)
  230
+    dist = norm(w * (u - v), ord=p)
  231
+    return dist
  232
+
214 233
 
215 234
 def euclidean(u, v):
216 235
     """
@@ -233,10 +252,11 @@ def euclidean(u, v):
233 252
     d : double
234 253
         The Euclidean distance between vectors ``u`` and ``v``.
235 254
     """
236  
-    u = np.asarray(u, order='c')
237  
-    v = np.asarray(v, order='c')
238  
-    q=np.matrix(u-v)
239  
-    return np.sqrt((q*q.T).sum())
  255
+    u = _validate_vector(u)
  256
+    v = _validate_vector(v)
  257
+    dist = norm(u - v)
  258
+    return dist
  259
+
240 260
 
241 261
 def sqeuclidean(u, v):
242 262
     """
@@ -260,9 +280,11 @@ def sqeuclidean(u, v):
260 280
     d : double
261 281
         The squared Euclidean distance between vectors ``u`` and ``v``.
262 282
     """
263  
-    u = np.asarray(u, order='c')
264  
-    v = np.asarray(v, order='c')
265  
-    return ((u-v)*(u-v).T).sum()
  283
+    u = _validate_vector(u)
  284
+    v = _validate_vector(v)
  285
+    dist = ((u - v) ** 2).sum()
  286
+    return dist
  287
+
266 288
 
267 289
 def cosine(u, v):
268 290
     r"""
@@ -286,10 +308,11 @@ def cosine(u, v):
286 308
     d : double
287 309
         The Cosine distance between vectors ``u`` and ``v``.
288 310
     """
289  
-    u = np.asarray(u, order='c')
290  
-    v = np.asarray(v, order='c')
291  
-    return (1.0 - (np.dot(u, v.T) / \
292  
-                   (np.sqrt(np.dot(u, u.T)) * np.sqrt(np.dot(v, v.T)))))
  311
+    u = _validate_vector(u)
  312
+    v = _validate_vector(v)
  313
+    dist = 1.0 - np.dot(u, v) / (norm(u) * norm(v))
  314
+    return dist
  315
+
293 316
 
294 317
 def correlation(u, v):
295 318
     r"""
@@ -316,13 +339,15 @@ def correlation(u, v):
316 339
     d : double
317 340
         The correlation distance between vectors ``u`` and ``v``.
318 341
     """
  342
+    u = _validate_vector(u)
  343
+    v = _validate_vector(v)
319 344
     umu = u.mean()
320 345
     vmu = v.mean()
321 346
     um = u - umu
322 347
     vm = v - vmu
323  
-    return 1.0 - (np.dot(um, vm) /
324  
-                  (np.sqrt(np.dot(um, um)) \
325  
-                   * np.sqrt(np.dot(vm, vm))))
  348
+    dist = 1.0 - np.dot(um, vm) / (norm(um) * norm(vm))
  349
+    return dist
  350
+
326 351
 
327 352
 def hamming(u, v):
328 353
     r"""
@@ -351,10 +376,11 @@ def hamming(u, v):
351 376
     d : double
352 377
         The Hamming distance between vectors ``u`` and ``v``.
353 378
     """
354  
-    u = np.asarray(u, order='c')
355  
-    v = np.asarray(v, order='c')
  379
+    u = _validate_vector(u)
  380
+    v = _validate_vector(v)
356 381
     return (u != v).mean()
357 382
 
  383
+
358 384
 def jaccard(u, v):
359 385
     """
360 386
     Computes the Jaccard-Needham dissimilarity between two boolean
@@ -381,11 +407,13 @@ def jaccard(u, v):
381 407
     d : double
382 408
         The Jaccard distance between vectors ``u`` and ``v``.
383 409
     """
384  
-    u = np.asarray(u, order='c')
385  
-    v = np.asarray(v, order='c')
386  
-    return (np.double(np.bitwise_and((u != v),
387  
-                     np.bitwise_or(u != 0, v != 0)).sum())
388  
-            /  np.double(np.bitwise_or(u != 0, v != 0).sum()))
  410
+    u = _validate_vector(u)
  411
+    v = _validate_vector(v)
  412
+    dist = (np.double(np.bitwise_and((u != v),
  413
+                                     np.bitwise_or(u != 0, v != 0)).sum())
  414
+            / np.double(np.bitwise_or(u != 0, v != 0).sum()))
  415
+    return dist
  416
+
389 417
 
390 418
 def kulsinski(u, v):
391 419
     """
@@ -413,17 +441,18 @@ def kulsinski(u, v):
413 441
     d : double
414 442
         The Kulsinski distance between vectors ``u`` and ``v``.
415 443
     """
416  
-    u = np.asarray(u, order='c')
417  
-    v = np.asarray(v, order='c')
  444
+    u = _validate_vector(u)
  445
+    v = _validate_vector(v)
418 446
     n = float(len(u))
419 447
     (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
420 448
 
421 449
     return (ntf + nft - ntt + n) / (ntf + nft + n)
422 450
 
  451
+
423 452
 def seuclidean(u, v, V):
424 453
     """
425 454
     Returns the standardized Euclidean distance between two n-vectors
426  
-    ``u`` and ``v``. ``V`` is an m-dimensional vector of component
  455
+    ``u`` and ``v``. ``V`` is an n-dimensional vector of component
427 456
     variances. It is usually computed among a larger collection
428 457
     vectors.
429 458
 
@@ -433,18 +462,22 @@ def seuclidean(u, v, V):
433 462
         An :math:`n`-dimensional vector.
434 463
     v : ndarray
435 464
         An :math:`n`-dimensional vector.
  465
+    V : ndarray
  466
+        An :math:`n`-dimensional vector.
436 467
 
437 468
     Returns
438 469
     -------
439 470
     d : double
440 471
         The standardized Euclidean distance between vectors ``u`` and ``v``.
441 472
     """
442  
-    u = np.asarray(u, order='c')
443  
-    v = np.asarray(v, order='c')
444  
-    V = np.asarray(V, order='c', dtype=np.float64)
445  
-    if len(V.shape) != 1 or V.shape[0] != u.shape[0] or u.shape[0] != v.shape[0]:
446  
-        raise TypeError('V must be a 1-D array of the same dimension as u and v.')
447  
-    return np.sqrt(((u-v)**2 / V).sum())
  473
+    u = _validate_vector(u)
  474
+    v = _validate_vector(v)
  475
+    V = _validate_vector(V, dtype=np.float64)
  476
+    if V.shape[0] != u.shape[0] or u.shape[0] != v.shape[0]:
  477
+        raise TypeError('V must be a 1-D array of the same dimension '
  478
+                        'as u and v.')
  479
+    return np.sqrt(((u - v) ** 2 / V).sum())
  480
+
448 481
 
449 482
 def cityblock(u, v):
450 483
     r"""
@@ -467,9 +500,10 @@ def cityblock(u, v):
467 500
     d : double
468 501
         The City Block distance between vectors ``u`` and ``v``.
469 502
     """
470  
-    u = np.asarray(u, order='c')
471  
-    v = np.asarray(v, order='c')
472  
-    return abs(u-v).sum()
  503
+    u = _validate_vector(u)
  504
+    v = _validate_vector(v)
  505
+    return abs(u - v).sum()
  506
+
473 507
 
474 508
 def mahalanobis(u, v, VI):
475 509
     r"""
@@ -494,10 +528,13 @@ def mahalanobis(u, v, VI):
494 528
     d : double
495 529
         The Mahalanobis distance between vectors ``u`` and ``v``.
496 530
     """
497  
-    u = np.asarray(u, order='c')
498  
-    v = np.asarray(v, order='c')
499  
-    VI = np.asarray(VI, order='c')
500  
-    return np.sqrt(np.dot(np.dot((u-v),VI),(u-v).T).sum())
  531
+    u = _validate_vector(u)
  532
+    v = _validate_vector(v)
  533
+    VI = np.atleast_2d(VI)
  534
+    delta = u - v
  535
+    m = np.dot(np.dot(delta, VI), delta)
  536
+    return np.sqrt(m)
  537
+
501 538
 
502 539
 def chebyshev(u, v):
503 540
     r"""
@@ -520,9 +557,10 @@ def chebyshev(u, v):
520 557
     d : double
521 558
         The Chebyshev distance between vectors ``u`` and ``v``.
522 559
     """
523  
-    u = np.asarray(u, order='c')
524  
-    v = np.asarray(v, order='c')
525  
-    return max(abs(u-v))
  560
+    u = _validate_vector(u)
  561
+    v = _validate_vector(v)
  562
+    return max(abs(u - v))
  563
+
526 564
 
527 565
 def braycurtis(u, v):
528 566
     r"""
@@ -548,10 +586,11 @@ def braycurtis(u, v):
548 586
     d : double
549 587
         The Bray-Curtis distance between vectors ``u`` and ``v``.
550 588
     """
551  
-    u = np.asarray(u, order='c')
552  
-    v = np.asarray(v, order='c', dtype=np.float64)
  589
+    u = _validate_vector(u)
  590
+    v = _validate_vector(v, dtype=np.float64)
553 591
     return abs(u - v).sum() / abs(u + v).sum()
554 592
 
  593
+
555 594
 def canberra(u, v):
556 595
     r"""
557 596
     Computes the Canberra distance between two n-vectors u and v,
@@ -580,8 +619,8 @@ def canberra(u, v):
580 619
     the calculation.
581 620
 
582 621
     """
583  
-    u = np.asarray(u, order='c')
584  
-    v = np.asarray(v, order='c', dtype=np.float64)
  622
+    u = _validate_vector(u)
  623
+    v = _validate_vector(v, dtype=np.float64)
585 624
     olderr = np.seterr(invalid='ignore')
586 625
     try:
587 626
         d = np.nansum(abs(u - v) / (abs(u) + abs(v)))
@@ -589,6 +628,7 @@ def canberra(u, v):
589 628
         np.seterr(**olderr)
590 629
     return d
591 630
 
  631
+
592 632
 def _nbool_correspond_all(u, v):
593 633
     if u.dtype != v.dtype:
594 634
         raise TypeError("Arrays being compared must be of the same data type.")
@@ -612,6 +652,7 @@ def _nbool_correspond_all(u, v):
612 652
 
613 653
     return (nff, nft, ntf, ntt)
614 654
 
  655
+
615 656
 def _nbool_correspond_ft_tf(u, v):
616 657
     if u.dtype == np.int or u.dtype == np.float_ or u.dtype == np.double:
617 658
         not_u = 1.0 - u
@@ -625,6 +666,7 @@ def _nbool_correspond_ft_tf(u, v):
625 666
         ntf = (u & not_v).sum()
626 667
     return (nft, ntf)
627 668
 
  669
+
628 670
 def yule(u, v):
629 671
     r"""
630 672
     Computes the Yule dissimilarity between two boolean n-vectors u and v,
@@ -651,11 +693,12 @@ def yule(u, v):
651 693
     d : double
652 694
         The Yule dissimilarity between vectors ``u`` and ``v``.
653 695
     """
654  
-    u = np.asarray(u, order='c')
655  
-    v = np.asarray(v, order='c')
  696
+    u = _validate_vector(u)
  697
+    v = _validate_vector(v)
656 698
     (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
657 699
     return float(2.0 * ntf * nft) / float(ntt * nff + ntf * nft)
658 700
 
  701
+
659 702
 def matching(u, v):
660 703
     r"""
661 704
     Computes the Matching dissimilarity between two boolean n-vectors
@@ -681,11 +724,12 @@ def matching(u, v):
681 724
     d : double
682 725
         The Matching dissimilarity between vectors ``u`` and ``v``.
683 726
     """
684  
-    u = np.asarray(u, order='c')
685  
-    v = np.asarray(v, order='c')
  727
+    u = _validate_vector(u)
  728
+    v = _validate_vector(v)
686 729
     (nft, ntf) = _nbool_correspond_ft_tf(u, v)
687 730
     return float(nft + ntf) / float(len(u))
688 731
 
  732
+
689 733
 def dice(u, v):
690 734
     r"""
691 735
     Computes the Dice dissimilarity between two boolean n-vectors
@@ -712,8 +756,8 @@ def dice(u, v):
712 756
     d : double
713 757
         The Dice dissimilarity between vectors ``u`` and ``v``.
714 758
     """
715  
-    u = np.asarray(u, order='c')
716  
-    v = np.asarray(v, order='c')
  759
+    u = _validate_vector(u)
  760
+    v = _validate_vector(v)
717 761
     if u.dtype == np.bool:
718 762
         ntt = (u & v).sum()
719 763
     else:
@@ -721,6 +765,7 @@ def dice(u, v):
721 765
     (nft, ntf) = _nbool_correspond_ft_tf(u, v)
722 766
     return float(ntf + nft) / float(2.0 * ntt + ntf + nft)
723 767
 
  768
+
724 769
 def rogerstanimoto(u, v):
725 770
     r"""
726 771
     Computes the Rogers-Tanimoto dissimilarity between two boolean
@@ -747,11 +792,12 @@ def rogerstanimoto(u, v):
747 792
         The Rogers-Tanimoto dissimilarity between vectors
748 793
         `u` and `v`.
749 794
     """
750  
-    u = np.asarray(u, order='c')
751  
-    v = np.asarray(v, order='c')
  795
+    u = _validate_vector(u)
  796
+    v = _validate_vector(v)
752 797
     (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
753 798
     return float(2.0 * (ntf + nft)) / float(ntt + nff + (2.0 * (ntf + nft)))
754 799
 
  800
+
755 801
 def russellrao(u, v):
756 802
     r"""
757 803
     Computes the Russell-Rao dissimilarity between two boolean n-vectors
@@ -778,14 +824,15 @@ def russellrao(u, v):
778 824
     d : double
779 825
         The Russell-Rao dissimilarity between vectors ``u`` and ``v``.
780 826
     """
781  
-    u = np.asarray(u, order='c')
782  
-    v = np.asarray(v, order='c')
  827
+    u = _validate_vector(u)
  828
+    v = _validate_vector(v)
783 829
     if u.dtype == np.bool:
784 830
         ntt = (u & v).sum()
785 831
     else:
786 832
         ntt = (u * v).sum()
787 833
     return float(len(u) - ntt) / float(len(u))
788 834
 
  835
+
789 836
 def sokalmichener(u, v):
790 837
     r"""
791 838
     Computes the Sokal-Michener dissimilarity between two boolean vectors
@@ -813,8 +860,8 @@ def sokalmichener(u, v):
813 860
     d : double
814 861
         The Sokal-Michener dissimilarity between vectors ``u`` and ``v``.
815 862
     """
816  
-    u = np.asarray(u, order='c')
817  
-    v = np.asarray(v, order='c')
  863
+    u = _validate_vector(u)
  864
+    v = _validate_vector(v)
818 865
     if u.dtype == np.bool:
819 866
         ntt = (u & v).sum()
820 867
         nff = (~u & ~v).sum()
@@ -822,7 +869,8 @@ def sokalmichener(u, v):
822 869
         ntt = (u * v).sum()
823 870
         nff = ((1.0 - u) * (1.0 - v)).sum()
824 871
     (nft, ntf) = _nbool_correspond_ft_tf(u, v)
825  
-    return float(2.0 * (ntf + nft))/float(ntt + nff + 2.0 * (ntf + nft))
  872
+    return float(2.0 * (ntf + nft)) / float(ntt + nff + 2.0 * (ntf + nft))
  873
+
826 874
 
827 875
 def sokalsneath(u, v):
828 876
     r"""
@@ -850,8 +898,8 @@ def sokalsneath(u, v):
850 898
     d : double
851 899
         The Sokal-Sneath dissimilarity between vectors ``u`` and ``v``.
852 900
     """
853  
-    u = np.asarray(u, order='c')
854  
-    v = np.asarray(v, order='c')
  901
+    u = _validate_vector(u)
  902
+    v = _validate_vector(v)
855 903
     if u.dtype == np.bool:
856 904
         ntt = (u & v).sum()
857 905
     else:
@@ -903,6 +951,7 @@ def pdist(X, metric='euclidean', p=2, w=None, V=None, VI=None):
903 951
 
904 952
           \sqrt{\sum {(u_i-v_i)^2 / V[x_i]}}.
905 953
 
  954
+
906 955
        V is the variance vector; V[i] is the variance computed over all
907 956
           the i'th components of the points. If not passed, it is
908 957
           automatically computed.
@@ -1088,14 +1137,12 @@ def pdist(X, metric='euclidean', p=2, w=None, V=None, VI=None):
1088 1137
                  square distance matrices.
1089 1138
     """
1090 1139
 
1091  
-
1092 1140
 #         21. Y = pdist(X, 'test_Y')
1093 1141
 #
1094 1142
 #           Computes the distance between all pairs of vectors in X
1095 1143
 #           using the distance metric Y but with a more succint,
1096 1144
 #           verifiable, but less efficient implementation.
1097 1145
 
1098  
-
1099 1146
     X = np.asarray(X, order='c')
1100 1147
 
1101 1148
     # The C code doesn't do striding.
@@ -1103,7 +1150,7 @@ def pdist(X, metric='euclidean', p=2, w=None, V=None, VI=None):
1103 1150
 
1104 1151
     s = X.shape
1105 1152
     if len(s) != 2:
1106  
-        raise ValueError('A 2-dimensional array must be passed.');
  1153
+        raise ValueError('A 2-dimensional array must be passed.')
1107 1154
 
1108 1155
     m, n = s
1109 1156
     dm = np.zeros((m * (m - 1) / 2,), dtype=np.double)
@@ -1115,23 +1162,27 @@ def pdist(X, metric='euclidean', p=2, w=None, V=None, VI=None):
1115 1162
 
1116 1163
     if callable(metric):
1117 1164
         if metric == minkowski:
1118  
-            def dfun(u,v): return minkowski(u, v, p)
  1165
+            def dfun(u, v):
  1166
+                return minkowski(u, v, p)
1119 1167
         elif metric == wminkowski:
1120  
-            def dfun(u,v): return wminkowski(u, v, p, w)
  1168
+            def dfun(u, v):
  1169
+                return wminkowski(u, v, p, w)
1121 1170
         elif metric == seuclidean:
1122  
-            def dfun(u,v): return seuclidean(u, v, V)
  1171
+            def dfun(u, v):
  1172
+                return seuclidean(u, v, V)
1123 1173
         elif metric == mahalanobis:
1124  
-            def dfun(u,v): return mahalanobis(u, v, V)
  1174
+            def dfun(u, v):
  1175
+                return mahalanobis(u, v, V)
1125 1176
         else:
1126 1177
             dfun = metric
1127 1178
 
1128 1179
         k = 0
1129 1180
         for i in xrange(0, m - 1):
1130  
-            for j in xrange(i+1, m):
  1181
+            for j in xrange(i + 1, m):
1131 1182
                 dm[k] = dfun(X[i], X[j])
1132 1183
                 k = k + 1
1133 1184
 
1134  
-    elif isinstance(metric,basestring):
  1185
+    elif isinstance(metric, basestring):
1135 1186
         mstr = metric.lower()
1136 1187
 
1137 1188
         #if X.dtype != np.double and \
@@ -1169,7 +1220,8 @@ def dfun(u,v): return mahalanobis(u, v, V)
1169 1220
                 if V.dtype != np.double:
1170 1221
                     raise TypeError('Variance vector V must contain doubles.')
1171 1222
                 if len(V.shape) != 1:
1172  
-                    raise ValueError('Variance vector V must be one-dimensional.')
  1223
+                    raise ValueError('Variance vector V must '
  1224
+                                     'be one-dimensional.')
1173 1225
                 if V.shape[0] != n:
1174 1226
                     raise ValueError('Variance vector V must be of the same '
1175 1227
                             'dimension as the vectors on which the distances '
@@ -1192,15 +1244,17 @@ def dfun(u,v): return mahalanobis(u, v, V)
1192 1244
             # The numerator u * v
1193 1245
             nm = np.dot(X, X.T)
1194 1246
             # The denom. ||u||*||v||
1195  
-            de = np.dot(nV, nV.T);
  1247
+            de = np.dot(nV, nV.T)
1196 1248
             dm = 1.0 - (nm / de)
1197  
-            dm[xrange(0,m),xrange(0,m)] = 0.0
  1249
+            dm[xrange(0, m), xrange(0, m)] = 0.0
1198 1250
             dm = squareform(dm)
1199 1251
         elif mstr in set(['correlation', 'co']):
1200  
-            X2 = X - X.mean(1)[:,np.newaxis]
  1252
+            X2 = X - X.mean(1)[:, np.newaxis]
1201 1253
             #X2 = X - np.matlib.repmat(np.mean(X, axis=1).reshape(m, 1), 1, n)
1202 1254
             norms = np.sqrt(np.sum(X2 * X2, axis=1))
1203  
-            _distance_wrap.pdist_cosine_wrap(_convert_to_double(X2), _convert_to_double(dm), _convert_to_double(norms))
  1255
+            _distance_wrap.pdist_cosine_wrap(_convert_to_double(X2),
  1256
+                                             _convert_to_double(dm),
  1257
+                                             _convert_to_double(norms))
1204 1258
         elif mstr in set(['mahalanobis', 'mahal', 'mah']):
1205 1259
             if VI is not None:
1206 1260
                 VI = _convert_to_double(np.asarray(VI, order='c'))
@@ -1213,7 +1267,8 @@ def dfun(u,v): return mahalanobis(u, v, V)
1213 1267
                 V = np.cov(X.T)
1214 1268
                 VI = _convert_to_double(np.linalg.inv(V).T.copy())
1215 1269
             # (u-v)V^(-1)(u-v)^T
1216  
-            _distance_wrap.pdist_mahalanobis_wrap(_convert_to_double(X), VI, dm)
  1270
+            _distance_wrap.pdist_mahalanobis_wrap(_convert_to_double(X),
  1271
+                                                  VI, dm)
1217 1272
         elif mstr == 'canberra':
1218 1273
             _distance_wrap.pdist_canberra_wrap(_convert_to_double(X), dm)
1219 1274
         elif mstr == 'braycurtis':
@@ -1227,11 +1282,13 @@ def dfun(u,v): return mahalanobis(u, v, V)
1227 1282
         elif mstr == 'dice':
1228 1283
             _distance_wrap.pdist_dice_bool_wrap(_convert_to_bool(X), dm)
1229 1284
         elif mstr == 'rogerstanimoto':
1230  
-            _distance_wrap.pdist_rogerstanimoto_bool_wrap(_convert_to_bool(X), dm)
  1285
+            _distance_wrap.pdist_rogerstanimoto_bool_wrap(_convert_to_bool(X),
  1286
+                                                          dm)
1231 1287
         elif mstr == 'russellrao':
1232 1288
             _distance_wrap.pdist_russellrao_bool_wrap(_convert_to_bool(X), dm)
1233 1289
         elif mstr == 'sokalmichener':
1234  
-            _distance_wrap.pdist_sokalmichener_bool_wrap(_convert_to_bool(X), dm)
  1290
+            _distance_wrap.pdist_sokalmichener_bool_wrap(_convert_to_bool(X),
  1291
+                                                         dm)
1235 1292
         elif mstr == 'sokalsneath':
1236 1293
             _distance_wrap.pdist_sokalsneath_bool_wrap(_convert_to_bool(X), dm)
1237 1294
         elif metric == 'test_euclidean':
@@ -1290,9 +1347,11 @@ def dfun(u,v): return mahalanobis(u, v, V)
1290 1347
         else:
1291 1348
             raise ValueError('Unknown Distance Metric: %s' % mstr)
1292 1349
     else:
1293  
-        raise TypeError('2nd argument metric must be a string identifier or a function.')
  1350
+        raise TypeError('2nd argument metric must be a string identifier '
  1351
+                        'or a function.')
1294 1352
     return dm
1295 1353
 
  1354
+
1296 1355
 def squareform(X, force="no", checks=True):
1297 1356
     r"""
1298 1357
     Converts a vector-form distance vector to a square-form distance
@@ -1355,16 +1414,17 @@ def squareform(X, force="no", checks=True):
1355 1414
 
1356 1415
     if force.lower() == 'tomatrix':
1357 1416
         if len(s) != 1:
1358  
-            raise ValueError("Forcing 'tomatrix' but input X is not a distance vector.")
  1417
+            raise ValueError("Forcing 'tomatrix' but input X is not a "
  1418
+                             "distance vector.")
1359 1419
     elif force.lower() == 'tovector':
1360 1420
         if len(s) != 2:
1361  
-            raise ValueError("Forcing 'tovector' but input X is not a distance matrix.")
1362  
-
  1421
+            raise ValueError("Forcing 'tovector' but input X is not a "
  1422
+                             "distance matrix.")
1363 1423
 
1364 1424
     # X = squareform(v)
1365 1425
     if len(s) == 1:
1366 1426
         if X.shape[0] == 0:
1367  
-            return np.zeros((1,1), dtype=np.double)
  1427
+            return np.zeros((1, 1), dtype=np.double)
1368 1428
 
1369 1429
         # Grab the closest value to the square root of the number
1370 1430
         # of elements times 2 to see if the number of elements
@@ -1373,7 +1433,8 @@ def squareform(X, force="no", checks=True):
1373 1433
 
1374 1434
         # Check that v is of valid dimensions.
1375 1435
         if d * (d - 1) / 2 != int(s[0]):
1376  
-            raise ValueError('Incompatible vector size. It must be a binomial coefficient n choose 2 for some integer n >= 2.')
  1436
+            raise ValueError('Incompatible vector size. It must be a binomial '
  1437
+                             'coefficient n choose 2 for some integer n >= 2.')
1377 1438
 
1378 1439
         # Allocate memory for the distance matrix.
1379 1440
         M = np.zeros((d, d), dtype=np.double)
@@ -1411,7 +1472,10 @@ def squareform(X, force="no", checks=True):
1411 1472
         _distance_wrap.to_vector_from_squareform_wrap(X, v)
1412 1473
         return v
1413 1474
     else:
1414  
-        raise ValueError('The first argument must be one or two dimensional array. A %d-dimensional array is not permitted' % len(s))
  1475
+        raise ValueError(('The first argument must be one or two dimensional '
  1476
+                         'array. A %d-dimensional array is not '
  1477
+                         'permitted') % len(s))
  1478
+
1415 1479
 
1416 1480
 def is_valid_dm(D, tol=0.0, throw=False, name="D", warning=False):
1417 1481
     """
@@ -1453,36 +1517,49 @@ def is_valid_dm(D, tol=0.0, throw=False, name="D", warning=False):
1453 1517
         s = D.shape
1454 1518
         if D.dtype != np.double:
1455 1519
             if name:
1456  
-                raise TypeError('Distance matrix \'%s\' must contain doubles (double).' % name)
  1520
+                raise TypeError(('Distance matrix \'%s\' must contain doubles '
  1521
+                                 '(double).') % name)
1457 1522
             else:
1458  
-                raise TypeError('Distance matrix must contain doubles (double).')
  1523
+                raise TypeError('Distance matrix must contain doubles '
  1524
+                                '(double).')
1459 1525
         if len(D.shape) != 2:
1460 1526
             if name:
1461  
-                raise ValueError('Distance matrix \'%s\' must have shape=2 (i.e. be two-dimensional).' % name)
  1527
+                raise ValueError(('Distance matrix \'%s\' must have shape=2 '
  1528
+                                 '(i.e. be two-dimensional).') % name)
1462 1529
             else:
1463  
-                raise ValueError('Distance matrix must have shape=2 (i.e. be two-dimensional).')
  1530
+                raise ValueError('Distance matrix must have shape=2 (i.e. '
  1531
+                                 'be two-dimensional).')
1464 1532
         if tol == 0.0:
1465 1533
             if not (D == D.T).all():
1466 1534
                 if name:
1467  
-                    raise ValueError('Distance matrix \'%s\' must be symmetric.' % name)
  1535
+                    raise ValueError(('Distance matrix \'%s\' must be '
  1536
+                                     'symmetric.') % name)
1468 1537
                 else:
1469 1538
                     raise ValueError('Distance matrix must be symmetric.')
1470 1539
             if not (D[xrange(0, s[0]), xrange(0, s[0])] == 0).all():
1471 1540
                 if name:
1472  
-                    raise ValueError('Distance matrix \'%s\' diagonal must be zero.' % name)
  1541
+                    raise ValueError(('Distance matrix \'%s\' diagonal must '
  1542
+                                     'be zero.') % name)
1473 1543
                 else:
1474 1544
                     raise ValueError('Distance matrix diagonal must be zero.')
1475 1545
         else:
1476 1546
             if not (D - D.T <= tol).all():
1477 1547
                 if name:
1478  
-                    raise ValueError('Distance matrix \'%s\' must be symmetric within tolerance %d.' % (name, tol))
  1548
+                    raise ValueError(('Distance matrix \'%s\' must be '
  1549
+                                      'symmetric within tolerance %d.')
  1550
+                                     % (name, tol))
1479 1551
                 else:
1480  
-                    raise ValueError('Distance matrix must be symmetric within tolerance %5.5f.' % tol)
  1552
+                    raise ValueError('Distance matrix must be symmetric within'
  1553
+                                     ' tolerance %5.5f.' % tol)
1481 1554
             if not (D[xrange(0, s[0]), xrange(0, s[0])] <= tol).all():
1482 1555
                 if name:
1483  
-                    raise ValueError('Distance matrix \'%s\' diagonal must be close to zero within tolerance %5.5f.' % (name, tol))
  1556
+                    raise ValueError(('Distance matrix \'%s\' diagonal must be'
  1557
+                                      ' close to zero within tolerance %5.5f.')
  1558
+                                     % (name, tol))
1484 1559
                 else:
1485  
-                    raise ValueError('Distance matrix \'%s\' diagonal must be close to zero within tolerance %5.5f.' % tol)
  1560
+                    raise ValueError(('Distance matrix \'%s\' diagonal must be'
  1561
+                                      ' close to zero within tolerance %5.5f.')
  1562
+                                     % tol)
1486 1563
     except Exception, e:
1487 1564
         if throw:
1488 1565
             raise
@@ -1491,6 +1568,7 @@ def is_valid_dm(D, tol=0.0, throw=False, name="D", warning=False):
1491 1568
         valid = False
1492 1569
     return valid
1493 1570
 
  1571
+
1494 1572
 def is_valid_y(y, warning=False, throw=False, name=None):
1495 1573
     r"""
1496 1574
     Returns ``True`` if the variable ``y`` passed is a valid condensed
@@ -1521,26 +1599,37 @@ def is_valid_y(y, warning=False, throw=False, name=None):
1521 1599
     try:
1522 1600
         if type(y) != np.ndarray:
1523 1601
             if name:
1524  
-                raise TypeError('\'%s\' passed as a condensed distance matrix is not a numpy array.' % name)
  1602
+                raise TypeError(('\'%s\' passed as a condensed distance '
  1603
+                                 'matrix is not a numpy array.') % name)
1525 1604
             else:
1526 1605
                 raise TypeError('Variable is not a numpy array.')
1527 1606
         if y.dtype != np.double:
1528 1607
             if name:
1529  
-                raise TypeError('Condensed distance matrix \'%s\' must contain doubles (double).' % name)
  1608
+                raise TypeError(('Condensed distance matrix \'%s\' must '
  1609
+                                 'contain doubles (double).') % name)
1530 1610
             else:
1531  
-                raise TypeError('Condensed distance matrix must contain doubles (double).')
  1611
+                raise TypeError('Condensed distance matrix must contain '
  1612
+                                'doubles (double).')
1532 1613
         if len(y.shape) != 1:
1533 1614
             if name:
1534  
-                raise ValueError('Condensed distance matrix \'%s\' must have shape=1 (i.e. be one-dimensional).' % name)
  1615
+                raise ValueError(('Condensed distance matrix \'%s\' must '
  1616
+                                  'have shape=1 (i.e. be one-dimensional).')
  1617
+                                 % name)
1535 1618
             else:
1536  
-                raise ValueError('Condensed distance matrix must have shape=1 (i.e. be one-dimensional).')
  1619
+                raise ValueError('Condensed distance matrix must have shape=1 '
  1620
+                                 '(i.e. be one-dimensional).')
1537 1621
         n = y.shape[0]
1538 1622
         d = int(np.ceil(np.sqrt(n * 2)))
1539  
-        if (d*(d-1)/2) != n:
  1623
+        if (d * (d - 1) / 2) != n:
1540 1624
             if name:
1541  
-                raise ValueError('Length n of condensed distance matrix \'%s\' must be a binomial coefficient, i.e. there must be a k such that (k \choose 2)=n)!' % name)
  1625
+                raise ValueError(('Length n of condensed distance matrix '
  1626
+                                  '\'%s\' must be a binomial coefficient, i.e.'
  1627
+                                  'there must be a k such that '
  1628
+                                  '(k \choose 2)=n)!') % name)
1542 1629
             else:
1543  
-                raise ValueError('Length n of condensed distance matrix must be a binomial coefficient, i.e. there must be a k such that (k \choose 2)=n)!')
  1630
+                raise ValueError('Length n of condensed distance matrix must '
  1631
+                                 'be a binomial coefficient, i.e. there must '
  1632
+                                 'be a k such that (k \choose 2)=n)!')
1544 1633
     except Exception, e:
1545 1634
         if throw:
1546 1635
             raise
@@ -1549,6 +1638,7 @@ def is_valid_y(y, warning=False, throw=False, name=None):
1549 1638
         valid = False
1550 1639
     return valid
1551 1640
 
  1641
+
1552 1642
 def num_obs_dm(d):
1553 1643
     """
1554 1644
     Returns the number of original observations that correspond to a
@@ -1568,6 +1658,7 @@ def num_obs_dm(d):
1568 1658
     is_valid_dm(d, tol=np.inf, throw=True, name='d')
1569 1659
     return d.shape[0]
1570 1660
 
  1661
+
1571 1662
 def num_obs_y(Y):
1572 1663
     """
1573 1664
     Returns the number of original observations that correspond to a
@@ -1589,10 +1680,12 @@ def num_obs_y(Y):
1589 1680
     is_valid_y(Y, throw=True, name='Y')
1590 1681
     k = Y.shape[0]
1591 1682
     if k == 0:
1592  
-        raise ValueError("The number of observations cannot be determined on an empty distance matrix.")
  1683
+        raise ValueError("The number of observations cannot be determined on "
  1684
+                         "an empty distance matrix.")
1593 1685
     d = int(np.ceil(np.sqrt(k * 2)))
1594  
-    if (d*(d-1)/2) != k:
1595  
-        raise ValueError("Invalid condensed distance matrix passed. Must be some k where k=(n choose 2) for some n >= 2.")
  1686
+    if (d * (d - 1) / 2) != k:
  1687
+        raise ValueError("Invalid condensed distance matrix passed. Must be "
  1688
+                         "some k where k=(n choose 2) for some n >= 2.")
1596 1689
     return d
1597 1690
 
1598 1691
 
@@ -1825,14 +1918,12 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None):
1825 1918
         A :math:`m_A` by :math:`m_B` distance matrix.
1826 1919
     """
1827 1920
 
1828  
-
1829 1921
 #         21. Y = cdist(XA, XB, 'test_Y')
1830 1922
 #
1831 1923
 #           Computes the distance between all pairs of vectors in X
1832 1924
 #           using the distance metric Y but with a more succint,
1833 1925
 #           verifiable, but less efficient implementation.
1834 1926
 
1835  
-
1836 1927
     XA = np.asarray(XA, order='c')
1837 1928
     XB = np.asarray(XB, order='c')
1838 1929
 
@@ -1848,11 +1939,12 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None):
1848 1939
     sB = XB.shape
1849 1940
 
1850 1941
     if len(s) != 2:
1851  
-        raise ValueError('XA must be a 2-dimensional array.');
  1942
+        raise ValueError('XA must be a 2-dimensional array.')
1852 1943
     if len(sB) != 2:
1853  
-        raise ValueError('XB must be a 2-dimensional array.');
  1944
+        raise ValueError('XB must be a 2-dimensional array.')
1854 1945
     if s[1] != sB[1]:
1855  
-        raise ValueError('XA and XB must have the same number of columns (i.e. feature dimension.)')
  1946
+        raise ValueError('XA and XB must have the same number of columns '
  1947
+                         '(i.e. feature dimension.)')
1856 1948
 
1857 1949
     mA = s[0]
1858 1950
     mB = sB[0]
@@ -1880,7 +1972,7 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None):
1880 1972
             for i in xrange(0, mA):
1881 1973
                 for j in xrange(0, mB):
1882 1974
                     dm[i, j] = metric(XA[i, :], XB[j, :])
1883  
-    elif isinstance(metric,basestring):
  1975
+    elif isinstance(metric, basestring):
1884 1976
         mstr = metric.lower()
1885 1977
 
1886 1978
         #if XA.dtype != np.double and \
@@ -1899,14 +1991,16 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None):
1899 1991
         elif mstr in set(['hamming', 'hamm', 'ha', 'h']):
1900 1992
             if XA.dtype == np.bool:
1901 1993
                 _distance_wrap.cdist_hamming_bool_wrap(_convert_to_bool(XA),
1902  
-                                                       _convert_to_bool(XB), dm)
  1994
+                                                       _convert_to_bool(XB),
  1995
+                                                       dm)
1903 1996
             else:
1904 1997
                 _distance_wrap.cdist_hamming_wrap(_convert_to_double(XA),
1905 1998
                                                   _convert_to_double(XB), dm)
1906 1999
         elif mstr in set(['jaccard', 'jacc', 'ja', 'j']):
1907 2000
             if XA.dtype == np.bool:
1908 2001
                 _distance_wrap.cdist_jaccard_bool_wrap(_convert_to_bool(XA),
1909  
-                                                       _convert_to_bool(XB), dm)
  2002
+                                                       _convert_to_bool(XB),
  2003
+                                                       dm)
1910 2004
             else:
1911 2005
                 _distance_wrap.cdist_jaccard_wrap(_convert_to_double(XA),
1912 2006
                                                   _convert_to_double(XB), dm)
@@ -1918,7 +2012,9 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None):
1918 2012
                                                 _convert_to_double(XB), dm, p)
1919 2013
         elif mstr in set(['wminkowski', 'wmi', 'wm', 'wpnorm']):
1920 2014
             _distance_wrap.cdist_weighted_minkowski_wrap(_convert_to_double(XA),
1921  
-                                                         _convert_to_double(XB), dm, p, _convert_to_double(w))
  2015
+                                                         _convert_to_double(XB),
  2016
+                                                         dm, p,
  2017
+                                                         _convert_to_double(w))
1922 2018
         elif mstr in set(['seuclidean', 'se', 's']):
1923 2019
             if V is not None:
1924 2020
                 V = np.asarray(V, order='c')
@@ -1927,9 +2023,12 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None):
1927 2023
                 if V.dtype != np.double:
1928 2024
                     raise TypeError('Variance vector V must contain doubles.')
1929 2025
                 if len(V.shape) != 1:
1930  
-                    raise ValueError('Variance vector V must be one-dimensional.')
  2026
+                    raise ValueError('Variance vector V must be '
  2027
+                                     'one-dimensional.')
1931 2028
                 if V.shape[0] != n:
1932  
-                    raise ValueError('Variance vector V must be of the same dimension as the vectors on which the distances are computed.')
  2029
+                    raise ValueError('Variance vector V must be of the same '
  2030
+                                     'dimension as the vectors on which the '
  2031
+                                     'distances are computed.')
1933 2032
                 # The C code doesn't do striding.
1934 2033
                 [VV] = _copy_arrays_if_base_present([_convert_to_double(V)])
1935 2034
             else:
@@ -1951,8 +2050,8 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None):
1951 2050
                                              normsA,
1952 2051
                                              normsB)
1953 2052
         elif mstr in set(['correlation', 'co']):
1954  
-            XA2 = XA - XA.mean(1)[:,np.newaxis]
1955  
-            XB2 = XB - XB.mean(1)[:,np.newaxis]
  2053
+            XA2 = XA - XA.mean(1)[:, np.newaxis]
  2054
+            XB2 = XB - XB.mean(1)[:, np.newaxis]
1956 2055
             #X2 = X - np.matlib.repmat(np.mean(X, axis=1).reshape(m, 1), 1, n)
1957 2056
             normsA = np.sqrt(np.sum(XA2 * XA2, axis=1))
1958 2057
             normsB = np.sqrt(np.sum(XB2 * XB2, axis=1))
@@ -1977,7 +2076,8 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None):
1977 2076
                 VI = _convert_to_double(np.linalg.inv(V).T.copy())
1978 2077
             # (u-v)V^(-1)(u-v)^T
1979 2078
             _distance_wrap.cdist_mahalanobis_wrap(_convert_to_double(XA),
1980  
-                                                  _convert_to_double(XB), VI, dm)
  2079
+                                                  _convert_to_double(XB),
  2080
+                                                  VI, dm)
1981 2081
         elif mstr == 'canberra':
1982 2082
             _distance_wrap.cdist_canberra_wrap(_convert_to_double(XA),
1983 2083
                                                _convert_to_double(XB), dm)
@@ -1998,16 +2098,19 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None):
1998 2098
                                                 _convert_to_bool(XB), dm)
1999 2099
         elif mstr == 'rogerstanimoto':
2000 2100
             _distance_wrap.cdist_rogerstanimoto_bool_wrap(_convert_to_bool(XA),
2001  
-                                                          _convert_to_bool(XB), dm)
  2101
+                                                          _convert_to_bool(XB),
  2102
+                                                          dm)
2002 2103
         elif mstr == 'russellrao':
2003 2104
             _distance_wrap.cdist_russellrao_bool_wrap(_convert_to_bool(XA),
2004 2105
                                                       _convert_to_bool(XB), dm)
2005 2106
         elif mstr == 'sokalmichener':
2006 2107
             _distance_wrap.cdist_sokalmichener_bool_wrap(_convert_to_bool(XA),
2007  
-                                                         _convert_to_bool(XB), dm)
  2108
+                                                         _convert_to_bool(XB),
  2109
+                                                         dm)
2008 2110
         elif mstr == 'sokalsneath':
2009 2111
             _distance_wrap.cdist_sokalsneath_bool_wrap(_convert_to_bool(XA),
2010  
-                                                       _convert_to_bool(XB), dm)
  2112
+                                                       _convert_to_bool(XB),
  2113
+                                                       dm)
2011 2114
         elif metric == 'test_euclidean':
2012 2115
             dm = cdist(XA, XB, euclidean)
2013 2116
         elif metric == 'test_seuclidean':
@@ -2069,5 +2172,6 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None):
2069 2172
         else:
2070 2173
             raise ValueError('Unknown Distance Metric: %s' % mstr)
2071 2174
     else:
2072  
-        raise TypeError('2nd argument metric must be a string identifier or a function.')
  2175
+        raise TypeError('2nd argument metric must be a string identifier '
  2176
+                        'or a function.')
2073 2177
     return dm
170  scipy/spatial/tests/test_distance.py
@@ -37,13 +37,16 @@
37 37
 import os.path
38 38
 
39 39
 import numpy as np
  40
+from numpy.linalg import norm
40 41
 from numpy.testing import verbose, TestCase, run_module_suite, \
41 42
         assert_raises, assert_array_equal, assert_equal, assert_almost_equal
  43
+
42 44
 from scipy.spatial.distance import squareform, pdist, cdist, matching, \
43  
-                                   jaccard, dice, sokalsneath, rogerstanimoto, \
44  
-                                   russellrao, yule, num_obs_y, num_obs_dm, \
45  
-                                   is_valid_dm, is_valid_y, wminkowski, \
46  
-                                   canberra, braycurtis
  45
+        jaccard, dice, sokalsneath, rogerstanimoto, russellrao, yule, \
  46
+        num_obs_y, num_obs_dm, is_valid_dm, is_valid_y, minkowski, wminkowski, \
  47
+        euclidean, sqeuclidean, cosine, correlation, mahalanobis, \
  48
+        canberra, braycurtis, sokalmichener, _validate_vector                                    
  49
+
47 50
 
48 51
 _filenames = ["iris.txt",
49 52
               "cdist-X1.txt",
@@ -97,11 +100,6 @@ def load_testing_files():
97 100
 
98 101
 load_testing_files()
99 102
 
100  
-#print eo.keys()
101  
-
102  
-
103  
-#print np.abs(Y_test2 - Y_right).max()
104  
-#print np.abs(Y_test1 - Y_right).max()
105 103
 
106 104
 class TestCdist(TestCase):
107 105
     """
@@ -463,6 +461,7 @@ def test_cdist_sokalsneath_random(self):
463 461
             print (Y1-Y2).max()
464 462
         self.assertTrue(within_tol(Y1, Y2, eps))
465 463
 
  464
+
466 465
 class TestPdist(TestCase):
467 466
     """
468 467
     Test suite for the pdist function.
@@ -1402,10 +1401,76 @@ def test_pdist_canberra_ticket_711(self):
1402 1401
             print np.abs(pdist_y-right_y).max()
1403 1402
         self.assertTrue(within_tol(pdist_y, right_y, eps))
1404 1403
 
  1404
+
1405 1405
 def within_tol(a, b, tol):
1406 1406
     return np.abs(a - b).max() < tol
1407 1407
 
1408 1408
 
  1409
+class TestSomeDistanceFunctions(TestCase):
  1410
+
  1411
+    def setUp(self):
  1412
+        # 1D arrays
  1413
+        x = np.array([1.0, 2.0, 3.0])
  1414
+        y = np.array([1.0, 1.0, 5.0])
  1415
+        # 3x1 arrays
  1416
+        x31 = x[:,np.newaxis]
  1417
+        y31 = y[:,np.newaxis]
  1418
+        # 1x3 arrays
  1419
+        x13 = x31.T
  1420
+        y13 = y31.T
  1421
+
  1422
+        self.cases = [(x,y), (x31, y31), (x13, y13)]
  1423
+
  1424
+    def test_minkowski(self):
  1425
+        for x, y in self.cases:
  1426
+            dist1 = minkowski(x, y, p=1)
  1427
+            assert_almost_equal(dist1, 3.0)
  1428
+            dist1p5 = minkowski(x, y, p=1.5)
  1429
+            assert_almost_equal(dist1p5, (1.0+2.0**1.5)**(2./3))
  1430
+            dist2 = minkowski(x, y, p=2)
  1431
+            assert_almost_equal(dist2, np.sqrt(5))
  1432
+
  1433
+    def test_wminkowski(self):
  1434
+        w = np.array([1.0, 2.0, 0.5])
  1435
+        for x, y in self.cases:
  1436
+            dist1 = wminkowski(x, y, p=1, w=w)
  1437
+            assert_almost_equal(dist1, 3.0)
  1438
+            dist1p5 = wminkowski(x, y, p=1.5, w=w)
  1439
+            assert_almost_equal(dist1p5, (2.0**1.5+1.0)**(2./3))
  1440
+            dist2 = wminkowski(x, y, p=2, w=w)
  1441
+            assert_almost_equal(dist2, np.sqrt(5))
  1442
+
  1443
+    def test_euclidean(self):
  1444
+        for x, y in self.cases:
  1445
+            dist = euclidean(x, y)
  1446
+            assert_almost_equal(dist, np.sqrt(5))
  1447
+
  1448
+    def test_sqeuclidean(self):
  1449
+        for x, y in self.cases:
  1450
+            dist = sqeuclidean(x, y)
  1451
+            assert_almost_equal(dist, 5.0)
  1452
+
  1453
+    def test_cosine(self):
  1454
+        for x, y in self.cases:
  1455
+            dist = cosine(x, y)
  1456
+            assert_almost_equal(dist, 1.0 - 18.0/(np.sqrt(14)*np.sqrt(27)))
  1457
+
  1458
+    def test_correlation(self):
  1459
+        xm = np.array([-1.0, 0, 1.0])
  1460
+        ym = np.array([-4.0/3, -4.0/3, 5.0-7.0/3])
  1461
+        for x, y in self.cases:
  1462
+            dist = correlation(x, y)
  1463
+            assert_almost_equal(dist, 1.0 - np.dot(xm, ym)/(norm(xm)*norm(ym)))
  1464
+
  1465
+    def test_mahalanobis(self):
  1466
+        x = np.array([1.0, 2.0, 3.0])
  1467
+        y = np.array([1.0, 1.0, 5.0])
  1468
+        vi = np.array([[2.0, 1.0, 0.0],[1.0, 2.0, 1.0], [0.0, 1.0, 2.0]])
  1469
+        for x, y in self.cases:
  1470
+            dist = mahalanobis(x, y, vi)
  1471
+            assert_almost_equal(dist, np.sqrt(6.0))
  1472
+
  1473
+
1409 1474
 class TestSquareForm(TestCase):
1410 1475
 
1411 1476
     ################### squareform
@@ -1472,6 +1537,7 @@ def check_squareform_multi_matrix(self, n):
1472 1537
                 else:
1473 1538
                     self.assertTrue(A[i, j] == 0)
1474 1539
 
  1540
+
1475 1541
 class TestNumObsY(TestCase):
1476 1542
 
1477 1543
     def test_num_obs_y_multi_matrix(self):
@@ -1525,6 +1591,7 @@ def check_y(self, n):
1525 1591
     def make_y(self, n):
1526 1592
         return np.random.rand((n*(n-1)/2))
1527 1593
 
  1594
+
1528 1595
 class TestNumObsDM(TestCase):
1529 1596
 
1530 1597
     ############## num_obs_dm
@@ -1564,9 +1631,11 @@ def check_D(self, n):
1564 1631
     def make_D(self, n):
1565 1632
         return np.random.rand(n, n)
1566 1633
 
  1634
+
1567 1635
 def is_valid_dm_throw(D):
1568 1636
     return is_valid_dm(D, throw=True)
1569 1637
 
  1638
+
1570 1639
 class TestIsValidDM(TestCase):
1571 1640
 
1572 1641
     def test_is_valid_dm_int16_array_E(self):
@@ -1658,9 +1727,11 @@ def test_is_valid_dm_correct_5_by_5(self):
1658 1727
         D = squareform(y)
1659 1728
         self.assertTrue(is_valid_dm(D) == True)
1660 1729
 
  1730
+
1661 1731
 def is_valid_y_throw(y):
1662 1732
     return is_valid_y(y, throw=True)
1663 1733
 
  1734
+
1664 1735
 class TestIsValidY(TestCase):
1665 1736
 
1666 1737
     def test_is_valid_y_int16_array_E(self):
@@ -1731,20 +1802,101 @@ def correct_n_by_n(self, n):
1731 1802
         return y
1732 1803
 
1733 1804
 
  1805
+def test_bad_p():
  1806
+    """Raise ValueError if p < 1."""
  1807
+    p = 0.5
  1808
+    assert_raises(ValueError, minkowski, [1, 2], [3, 4], p)
  1809
+    assert_raises(ValueError, wminkowski, [1, 2], [3, 4], p, [1, 1])
  1810
+
  1811
+
1734 1812
 def test_sokalsneath_all_false():
1735 1813
     """Regression test for ticket #876"""
1736 1814
     assert_raises(ValueError, sokalsneath, [False, False, False], [False, False, False])
1737 1815
 
  1816
+
1738 1817
 def test_canberra():
1739 1818
     """Regression test for ticket #1430."""
1740 1819
     assert_equal(canberra([1,2,3], [2,4,6]), 1)
1741 1820
     assert_equal(canberra([1,1,0,0], [1,0,1,0]), 2)
1742 1821
 
  1822
+
1743 1823
 def test_braycurtis():
1744 1824
     """Regression test for ticket #1430."""
1745 1825
     assert_almost_equal(braycurtis([1,2,3], [2,4,6]), 1./3, decimal=15)
1746 1826
     assert_almost_equal(braycurtis([1,1,0,0], [1,0,1,0]), 0.5, decimal=15)
1747 1827
 
1748 1828
 
  1829
+def test_euclideans():
  1830
+    """Regression test for ticket #1328."""
  1831
+    x1 = np.array([1, 1, 1])
  1832
+    x2 = np.array([0, 0, 0])
  1833
+    
  1834
+    # Basic test of the calculation.
  1835
+    assert_almost_equal(sqeuclidean(x1, x2), 3.0, decimal=14)
  1836
+    assert_almost_equal(euclidean(x1, x2), np.sqrt(3), decimal=14)
  1837
+
  1838
+    # Check flattening for (1, N) or (N, 1) inputs
  1839
+    assert_almost_equal(euclidean(x1[np.newaxis, :], x2[np.newaxis, :]),
  1840
+                        np.sqrt(3), decimal=14)
  1841
+    assert_almost_equal(sqeuclidean(x1[np.newaxis, :], x2[np.newaxis, :]),
  1842
+                        3.0, decimal=14)
  1843
+    assert_almost_equal(sqeuclidean(x1[:, np.newaxis], x2[:, np.newaxis]),
  1844
+                        3.0, decimal=14)
  1845
+
  1846
+    # Distance metrics only defined for vectors (= 1-D)
  1847
+    x = np.arange(4).reshape(2, 2)
  1848
+    assert_raises(ValueError, euclidean, x, x)