# publicscipy/scipy

### Subversion checkout URL

You can clone with HTTPS or Subversion.

BUG: spatial:

* Improved validation of the arguments to the distance functions, so that only "1-D-like" arrays are accepted.
* Simplified some of the distance computations.
* Cleaned up the code a bit.
This commit should fix ticket #1328.
commit 6542a8c034b5211ed3f426d2f2b562c9f92786a6 1 parent 5698591
authored June 12, 2011

Showing 2 changed files with 398 additions and 142 deletions.

1. scipy/spatial/distance.py
2. scipy/spatial/tests/test_distance.py
370  scipy/spatial/distance.py
 @@ -112,6 +112,7 @@ 112 112   113 113  import warnings 114 114  import numpy as np 115 +from numpy.linalg import norm 115 116   116 117  import _distance_wrap 117 118   @@ -127,6 +128,7 @@ def _copy_array_if_base_present(a): 127 128  else: 128 129  return a 129 130   131 + 130 132  def _copy_arrays_if_base_present(T): 131 133  """ 132 134  Accepts a tuple of arrays T. Copies the array T[i] if its base array @@ -137,6 +139,7 @@ def _copy_arrays_if_base_present(T): 137 139  l = [_copy_array_if_base_present(a) for a in T] 138 140  return l 139 141   142 + 140 143  def _convert_to_bool(X): 141 144  if X.dtype != np.bool: 142 145  X = np.bool_(X) @@ -144,6 +147,7 @@ def _convert_to_bool(X): 144 147  X = X.copy() 145 148  return X 146 149   150 + 147 151  def _convert_to_double(X): 148 152  if X.dtype != np.double: 149 153  X = np.double(X) @@ -151,6 +155,17 @@ def _convert_to_double(X): 151 155  X = X.copy() 152 156  return X 153 157   158 + 159 +def _validate_vector(u, dtype=None): 160 + # XXX Is order='c' really necessary? 161 + u = np.asarray(u, dtype=dtype, order='c').squeeze() 162 + # Ensure values such as u=1 and u=[1] still return 1-D arrays. 163 + u = np.atleast_1d(u) 164 + if u.ndim > 1: 165 + raise ValueError("Input vector should be 1-D.") 166 + return u 167 + 168 + 154 169  def minkowski(u, v, p): 155 170  r""" 156 171  Computes the Minkowski distance between two vectors u and v, @@ -166,19 +181,21 @@ def minkowski(u, v, p): 166 181  An n-dimensional vector. 167 182  v : ndarray 168 183  An n-dimensional vector. 169 - p : ndarray 170 - The norm of the difference :math:{||u-v||}_p. 184 + p : int 185 + The order of the norm of the difference :math:{||u-v||}_p. 171 186   172 187  Returns 173 188  ------- 174 189  d : double 175 190  The Minkowski distance between vectors u and v. 176 191  """ 177 - u = np.asarray(u, order='c') 178 - v = np.asarray(v, order='c') 192 + u = _validate_vector(u) 193 + v = _validate_vector(v) 179 194  if p < 1: 180 195  raise ValueError("p must be at least 1") 181 - return (abs(u-v)**p).sum() ** (1.0 / p) 196 + dist = norm(u - v, ord=p) 197 + return dist 198 + 182 199   183 200  def wminkowski(u, v, p, w): 184 201  r""" @@ -195,8 +212,8 @@ def wminkowski(u, v, p, w): 195 212  An :math:n-dimensional vector. 196 213  v : ndarray 197 214  An :math:n-dimensional vector. 198 - p : ndarray 199 - The norm of the difference :math:{||u-v||}_p. 215 + p : int 216 + The order of the norm of the difference :math:{||u-v||}_p. 200 217  w : ndarray 201 218  The weight vector. 202 219   @@ -205,12 +222,14 @@ def wminkowski(u, v, p, w): 205 222  d : double 206 223  The Minkowski distance between vectors u and v. 207 224  """ 208 - u = np.asarray(u, order='c') 209 - v = np.asarray(v, order='c') 210 - w = np.asarray(w) 225 + u = _validate_vector(u) 226 + v = _validate_vector(v) 227 + w = _validate_vector(w) 211 228  if p < 1: 212 229  raise ValueError("p must be at least 1") 213 - return ((w * abs(u-v))**p).sum() ** (1.0 / p) 230 + dist = norm(w * (u - v), ord=p) 231 + return dist 232 + 214 233   215 234  def euclidean(u, v): 216 235  """ @@ -233,10 +252,11 @@ def euclidean(u, v): 233 252  d : double 234 253  The Euclidean distance between vectors u and v. 235 254  """ 236 - u = np.asarray(u, order='c') 237 - v = np.asarray(v, order='c') 238 - q=np.matrix(u-v) 239 - return np.sqrt((q*q.T).sum()) 255 + u = _validate_vector(u) 256 + v = _validate_vector(v) 257 + dist = norm(u - v) 258 + return dist 259 + 240 260   241 261  def sqeuclidean(u, v): 242 262  """ @@ -260,9 +280,11 @@ def sqeuclidean(u, v): 260 280  d : double 261 281  The squared Euclidean distance between vectors u and v. 262 282  """ 263 - u = np.asarray(u, order='c') 264 - v = np.asarray(v, order='c') 265 - return ((u-v)*(u-v).T).sum() 283 + u = _validate_vector(u) 284 + v = _validate_vector(v) 285 + dist = ((u - v) ** 2).sum() 286 + return dist 287 + 266 288   267 289  def cosine(u, v): 268 290  r""" @@ -286,10 +308,11 @@ def cosine(u, v): 286 308  d : double 287 309  The Cosine distance between vectors u and v. 288 310  """ 289 - u = np.asarray(u, order='c') 290 - v = np.asarray(v, order='c') 291 - return (1.0 - (np.dot(u, v.T) / \ 292 - (np.sqrt(np.dot(u, u.T)) * np.sqrt(np.dot(v, v.T))))) 311 + u = _validate_vector(u) 312 + v = _validate_vector(v) 313 + dist = 1.0 - np.dot(u, v) / (norm(u) * norm(v)) 314 + return dist 315 + 293 316   294 317  def correlation(u, v): 295 318  r""" @@ -316,13 +339,15 @@ def correlation(u, v): 316 339  d : double 317 340  The correlation distance between vectors u and v. 318 341  """ 342 + u = _validate_vector(u) 343 + v = _validate_vector(v) 319 344  umu = u.mean() 320 345  vmu = v.mean() 321 346  um = u - umu 322 347  vm = v - vmu 323 - return 1.0 - (np.dot(um, vm) / 324 - (np.sqrt(np.dot(um, um)) \ 325 - * np.sqrt(np.dot(vm, vm)))) 348 + dist = 1.0 - np.dot(um, vm) / (norm(um) * norm(vm)) 349 + return dist 350 + 326 351   327 352  def hamming(u, v): 328 353  r""" @@ -351,10 +376,11 @@ def hamming(u, v): 351 376  d : double 352 377  The Hamming distance between vectors u and v. 353 378  """ 354 - u = np.asarray(u, order='c') 355 - v = np.asarray(v, order='c') 379 + u = _validate_vector(u) 380 + v = _validate_vector(v) 356 381  return (u != v).mean() 357 382   383 + 358 384  def jaccard(u, v): 359 385  """ 360 386  Computes the Jaccard-Needham dissimilarity between two boolean @@ -381,11 +407,13 @@ def jaccard(u, v): 381 407  d : double 382 408  The Jaccard distance between vectors u and v. 383 409  """ 384 - u = np.asarray(u, order='c') 385 - v = np.asarray(v, order='c') 386 - return (np.double(np.bitwise_and((u != v), 387 - np.bitwise_or(u != 0, v != 0)).sum()) 388 - / np.double(np.bitwise_or(u != 0, v != 0).sum())) 410 + u = _validate_vector(u) 411 + v = _validate_vector(v) 412 + dist = (np.double(np.bitwise_and((u != v), 413 + np.bitwise_or(u != 0, v != 0)).sum()) 414 + / np.double(np.bitwise_or(u != 0, v != 0).sum())) 415 + return dist 416 + 389 417   390 418  def kulsinski(u, v): 391 419  """ @@ -413,17 +441,18 @@ def kulsinski(u, v): 413 441  d : double 414 442  The Kulsinski distance between vectors u and v. 415 443  """ 416 - u = np.asarray(u, order='c') 417 - v = np.asarray(v, order='c') 444 + u = _validate_vector(u) 445 + v = _validate_vector(v) 418 446  n = float(len(u)) 419 447  (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v) 420 448   421 449  return (ntf + nft - ntt + n) / (ntf + nft + n) 422 450   451 + 423 452  def seuclidean(u, v, V): 424 453  """ 425 454  Returns the standardized Euclidean distance between two n-vectors 426 - u and v. V is an m-dimensional vector of component 455 + u and v. V is an n-dimensional vector of component 427 456  variances. It is usually computed among a larger collection 428 457  vectors. 429 458   @@ -433,18 +462,22 @@ def seuclidean(u, v, V): 433 462  An :math:n-dimensional vector. 434 463  v : ndarray 435 464  An :math:n-dimensional vector. 465 + V : ndarray 466 + An :math:n-dimensional vector. 436 467   437 468  Returns 438 469  ------- 439 470  d : double 440 471  The standardized Euclidean distance between vectors u and v. 441 472  """ 442 - u = np.asarray(u, order='c') 443 - v = np.asarray(v, order='c') 444 - V = np.asarray(V, order='c', dtype=np.float64) 445 - if len(V.shape) != 1 or V.shape[0] != u.shape[0] or u.shape[0] != v.shape[0]: 446 - raise TypeError('V must be a 1-D array of the same dimension as u and v.') 447 - return np.sqrt(((u-v)**2 / V).sum()) 473 + u = _validate_vector(u) 474 + v = _validate_vector(v) 475 + V = _validate_vector(V, dtype=np.float64) 476 + if V.shape[0] != u.shape[0] or u.shape[0] != v.shape[0]: 477 + raise TypeError('V must be a 1-D array of the same dimension ' 478 + 'as u and v.') 479 + return np.sqrt(((u - v) ** 2 / V).sum()) 480 + 448 481   449 482  def cityblock(u, v): 450 483  r""" @@ -467,9 +500,10 @@ def cityblock(u, v): 467 500  d : double 468 501  The City Block distance between vectors u and v. 469 502  """ 470 - u = np.asarray(u, order='c') 471 - v = np.asarray(v, order='c') 472 - return abs(u-v).sum() 503 + u = _validate_vector(u) 504 + v = _validate_vector(v) 505 + return abs(u - v).sum() 506 + 473 507   474 508  def mahalanobis(u, v, VI): 475 509  r""" @@ -494,10 +528,13 @@ def mahalanobis(u, v, VI): 494 528  d : double 495 529  The Mahalanobis distance between vectors u and v. 496 530  """ 497 - u = np.asarray(u, order='c') 498 - v = np.asarray(v, order='c') 499 - VI = np.asarray(VI, order='c') 500 - return np.sqrt(np.dot(np.dot((u-v),VI),(u-v).T).sum()) 531 + u = _validate_vector(u) 532 + v = _validate_vector(v) 533 + VI = np.atleast_2d(VI) 534 + delta = u - v 535 + m = np.dot(np.dot(delta, VI), delta) 536 + return np.sqrt(m) 537 + 501 538   502 539  def chebyshev(u, v): 503 540  r""" @@ -520,9 +557,10 @@ def chebyshev(u, v): 520 557  d : double 521 558  The Chebyshev distance between vectors u and v. 522 559  """ 523 - u = np.asarray(u, order='c') 524 - v = np.asarray(v, order='c') 525 - return max(abs(u-v)) 560 + u = _validate_vector(u) 561 + v = _validate_vector(v) 562 + return max(abs(u - v)) 563 + 526 564   527 565  def braycurtis(u, v): 528 566  r""" @@ -548,10 +586,11 @@ def braycurtis(u, v): 548 586  d : double 549 587  The Bray-Curtis distance between vectors u and v. 550 588  """ 551 - u = np.asarray(u, order='c') 552 - v = np.asarray(v, order='c', dtype=np.float64) 589 + u = _validate_vector(u) 590 + v = _validate_vector(v, dtype=np.float64) 553 591  return abs(u - v).sum() / abs(u + v).sum() 554 592   593 + 555 594  def canberra(u, v): 556 595  r""" 557 596  Computes the Canberra distance between two n-vectors u and v, @@ -580,8 +619,8 @@ def canberra(u, v): 580 619  the calculation. 581 620   582 621  """ 583 - u = np.asarray(u, order='c') 584 - v = np.asarray(v, order='c', dtype=np.float64) 622 + u = _validate_vector(u) 623 + v = _validate_vector(v, dtype=np.float64) 585 624  olderr = np.seterr(invalid='ignore') 586 625  try: 587 626  d = np.nansum(abs(u - v) / (abs(u) + abs(v))) @@ -589,6 +628,7 @@ def canberra(u, v): 589 628  np.seterr(**olderr) 590 629  return d 591 630   631 + 592 632  def _nbool_correspond_all(u, v): 593 633  if u.dtype != v.dtype: 594 634  raise TypeError("Arrays being compared must be of the same data type.") @@ -612,6 +652,7 @@ def _nbool_correspond_all(u, v): 612 652   613 653  return (nff, nft, ntf, ntt) 614 654   655 + 615 656  def _nbool_correspond_ft_tf(u, v): 616 657  if u.dtype == np.int or u.dtype == np.float_ or u.dtype == np.double: 617 658  not_u = 1.0 - u @@ -625,6 +666,7 @@ def _nbool_correspond_ft_tf(u, v): 625 666  ntf = (u & not_v).sum() 626 667  return (nft, ntf) 627 668   669 + 628 670  def yule(u, v): 629 671  r""" 630 672  Computes the Yule dissimilarity between two boolean n-vectors u and v, @@ -651,11 +693,12 @@ def yule(u, v): 651 693  d : double 652 694  The Yule dissimilarity between vectors u and v. 653 695  """ 654 - u = np.asarray(u, order='c') 655 - v = np.asarray(v, order='c') 696 + u = _validate_vector(u) 697 + v = _validate_vector(v) 656 698  (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v) 657 699  return float(2.0 * ntf * nft) / float(ntt * nff + ntf * nft) 658 700   701 + 659 702  def matching(u, v): 660 703  r""" 661 704  Computes the Matching dissimilarity between two boolean n-vectors @@ -681,11 +724,12 @@ def matching(u, v): 681 724  d : double 682 725  The Matching dissimilarity between vectors u and v. 683 726  """ 684 - u = np.asarray(u, order='c') 685 - v = np.asarray(v, order='c') 727 + u = _validate_vector(u) 728 + v = _validate_vector(v) 686 729  (nft, ntf) = _nbool_correspond_ft_tf(u, v) 687 730  return float(nft + ntf) / float(len(u)) 688 731   732 + 689 733  def dice(u, v): 690 734  r""" 691 735  Computes the Dice dissimilarity between two boolean n-vectors @@ -712,8 +756,8 @@ def dice(u, v): 712 756  d : double 713 757  The Dice dissimilarity between vectors u and v. 714 758  """ 715 - u = np.asarray(u, order='c') 716 - v = np.asarray(v, order='c') 759 + u = _validate_vector(u) 760 + v = _validate_vector(v) 717 761  if u.dtype == np.bool: 718 762  ntt = (u & v).sum() 719 763  else: @@ -721,6 +765,7 @@ def dice(u, v): 721 765  (nft, ntf) = _nbool_correspond_ft_tf(u, v) 722 766  return float(ntf + nft) / float(2.0 * ntt + ntf + nft) 723 767   768 + 724 769  def rogerstanimoto(u, v): 725 770  r""" 726 771  Computes the Rogers-Tanimoto dissimilarity between two boolean @@ -747,11 +792,12 @@ def rogerstanimoto(u, v): 747 792  The Rogers-Tanimoto dissimilarity between vectors 748 793  u and v. 749 794  """ 750 - u = np.asarray(u, order='c') 751 - v = np.asarray(v, order='c') 795 + u = _validate_vector(u) 796 + v = _validate_vector(v) 752 797  (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v) 753 798  return float(2.0 * (ntf + nft)) / float(ntt + nff + (2.0 * (ntf + nft))) 754 799   800 + 755 801  def russellrao(u, v): 756 802  r""" 757 803  Computes the Russell-Rao dissimilarity between two boolean n-vectors @@ -778,14 +824,15 @@ def russellrao(u, v): 778 824  d : double 779 825  The Russell-Rao dissimilarity between vectors u and v. 780 826  """ 781 - u = np.asarray(u, order='c') 782 - v = np.asarray(v, order='c') 827 + u = _validate_vector(u) 828 + v = _validate_vector(v) 783 829  if u.dtype == np.bool: 784 830  ntt = (u & v).sum() 785 831  else: 786 832  ntt = (u * v).sum() 787 833  return float(len(u) - ntt) / float(len(u)) 788 834   835 + 789 836  def sokalmichener(u, v): 790 837  r""" 791 838  Computes the Sokal-Michener dissimilarity between two boolean vectors @@ -813,8 +860,8 @@ def sokalmichener(u, v): 813 860  d : double 814 861  The Sokal-Michener dissimilarity between vectors u and v. 815 862  """ 816 - u = np.asarray(u, order='c') 817 - v = np.asarray(v, order='c') 863 + u = _validate_vector(u) 864 + v = _validate_vector(v) 818 865  if u.dtype == np.bool: 819 866  ntt = (u & v).sum() 820 867  nff = (~u & ~v).sum() @@ -822,7 +869,8 @@ def sokalmichener(u, v): 822 869  ntt = (u * v).sum() 823 870  nff = ((1.0 - u) * (1.0 - v)).sum() 824 871  (nft, ntf) = _nbool_correspond_ft_tf(u, v) 825 - return float(2.0 * (ntf + nft))/float(ntt + nff + 2.0 * (ntf + nft)) 872 + return float(2.0 * (ntf + nft)) / float(ntt + nff + 2.0 * (ntf + nft)) 873 + 826 874   827 875  def sokalsneath(u, v): 828 876  r""" @@ -850,8 +898,8 @@ def sokalsneath(u, v): 850 898  d : double 851 899  The Sokal-Sneath dissimilarity between vectors u and v. 852 900  """ 853 - u = np.asarray(u, order='c') 854 - v = np.asarray(v, order='c') 901 + u = _validate_vector(u) 902 + v = _validate_vector(v) 855 903  if u.dtype == np.bool: 856 904  ntt = (u & v).sum() 857 905  else: @@ -903,6 +951,7 @@ def pdist(X, metric='euclidean', p=2, w=None, V=None, VI=None): 903 951   904 952  \sqrt{\sum {(u_i-v_i)^2 / V[x_i]}}. 905 953   954 + 906 955  V is the variance vector; V[i] is the variance computed over all 907 956  the i'th components of the points. If not passed, it is 908 957  automatically computed. @@ -1088,14 +1137,12 @@ def pdist(X, metric='euclidean', p=2, w=None, V=None, VI=None): 1088 1137  square distance matrices. 1089 1138  """ 1090 1139   1091 - 1092 1140  # 21. Y = pdist(X, 'test_Y') 1093 1141  # 1094 1142  # Computes the distance between all pairs of vectors in X 1095 1143  # using the distance metric Y but with a more succint, 1096 1144  # verifiable, but less efficient implementation. 1097 1145   1098 - 1099 1146  X = np.asarray(X, order='c') 1100 1147   1101 1148  # The C code doesn't do striding. @@ -1103,7 +1150,7 @@ def pdist(X, metric='euclidean', p=2, w=None, V=None, VI=None): 1103 1150   1104 1151  s = X.shape 1105 1152  if len(s) != 2: 1106 - raise ValueError('A 2-dimensional array must be passed.'); 1153 + raise ValueError('A 2-dimensional array must be passed.') 1107 1154   1108 1155  m, n = s 1109 1156  dm = np.zeros((m * (m - 1) / 2,), dtype=np.double) @@ -1115,23 +1162,27 @@ def pdist(X, metric='euclidean', p=2, w=None, V=None, VI=None): 1115 1162   1116 1163  if callable(metric): 1117 1164  if metric == minkowski: 1118 - def dfun(u,v): return minkowski(u, v, p) 1165 + def dfun(u, v): 1166 + return minkowski(u, v, p) 1119 1167  elif metric == wminkowski: 1120 - def dfun(u,v): return wminkowski(u, v, p, w) 1168 + def dfun(u, v): 1169 + return wminkowski(u, v, p, w) 1121 1170  elif metric == seuclidean: 1122 - def dfun(u,v): return seuclidean(u, v, V) 1171 + def dfun(u, v): 1172 + return seuclidean(u, v, V) 1123 1173  elif metric == mahalanobis: 1124 - def dfun(u,v): return mahalanobis(u, v, V) 1174 + def dfun(u, v): 1175 + return mahalanobis(u, v, V) 1125 1176  else: 1126 1177  dfun = metric 1127 1178   1128 1179  k = 0 1129 1180  for i in xrange(0, m - 1): 1130 - for j in xrange(i+1, m): 1181 + for j in xrange(i + 1, m): 1131 1182  dm[k] = dfun(X[i], X[j]) 1132 1183  k = k + 1 1133 1184   1134 - elif isinstance(metric,basestring): 1185 + elif isinstance(metric, basestring): 1135 1186  mstr = metric.lower() 1136 1187   1137 1188  #if X.dtype != np.double and \ @@ -1169,7 +1220,8 @@ def dfun(u,v): return mahalanobis(u, v, V) 1169 1220  if V.dtype != np.double: 1170 1221  raise TypeError('Variance vector V must contain doubles.') 1171 1222  if len(V.shape) != 1: 1172 - raise ValueError('Variance vector V must be one-dimensional.') 1223 + raise ValueError('Variance vector V must ' 1224 + 'be one-dimensional.') 1173 1225  if V.shape[0] != n: 1174 1226  raise ValueError('Variance vector V must be of the same ' 1175 1227  'dimension as the vectors on which the distances ' @@ -1192,15 +1244,17 @@ def dfun(u,v): return mahalanobis(u, v, V) 1192 1244  # The numerator u * v 1193 1245  nm = np.dot(X, X.T) 1194 1246  # The denom. ||u||*||v|| 1195 - de = np.dot(nV, nV.T); 1247 + de = np.dot(nV, nV.T) 1196 1248  dm = 1.0 - (nm / de) 1197 - dm[xrange(0,m),xrange(0,m)] = 0.0 1249 + dm[xrange(0, m), xrange(0, m)] = 0.0 1198 1250  dm = squareform(dm) 1199 1251  elif mstr in set(['correlation', 'co']): 1200 - X2 = X - X.mean(1)[:,np.newaxis] 1252 + X2 = X - X.mean(1)[:, np.newaxis] 1201 1253  #X2 = X - np.matlib.repmat(np.mean(X, axis=1).reshape(m, 1), 1, n) 1202 1254  norms = np.sqrt(np.sum(X2 * X2, axis=1)) 1203 - _distance_wrap.pdist_cosine_wrap(_convert_to_double(X2), _convert_to_double(dm), _convert_to_double(norms)) 1255 + _distance_wrap.pdist_cosine_wrap(_convert_to_double(X2), 1256 + _convert_to_double(dm), 1257 + _convert_to_double(norms)) 1204 1258  elif mstr in set(['mahalanobis', 'mahal', 'mah']): 1205 1259  if VI is not None: 1206 1260  VI = _convert_to_double(np.asarray(VI, order='c')) @@ -1213,7 +1267,8 @@ def dfun(u,v): return mahalanobis(u, v, V) 1213 1267  V = np.cov(X.T) 1214 1268  VI = _convert_to_double(np.linalg.inv(V).T.copy()) 1215 1269  # (u-v)V^(-1)(u-v)^T 1216 - _distance_wrap.pdist_mahalanobis_wrap(_convert_to_double(X), VI, dm) 1270 + _distance_wrap.pdist_mahalanobis_wrap(_convert_to_double(X), 1271 + VI, dm) 1217 1272  elif mstr == 'canberra': 1218 1273  _distance_wrap.pdist_canberra_wrap(_convert_to_double(X), dm) 1219 1274  elif mstr == 'braycurtis': @@ -1227,11 +1282,13 @@ def dfun(u,v): return mahalanobis(u, v, V) 1227 1282  elif mstr == 'dice': 1228 1283  _distance_wrap.pdist_dice_bool_wrap(_convert_to_bool(X), dm) 1229 1284  elif mstr == 'rogerstanimoto': 1230 - _distance_wrap.pdist_rogerstanimoto_bool_wrap(_convert_to_bool(X), dm) 1285 + _distance_wrap.pdist_rogerstanimoto_bool_wrap(_convert_to_bool(X), 1286 + dm) 1231 1287  elif mstr == 'russellrao': 1232 1288  _distance_wrap.pdist_russellrao_bool_wrap(_convert_to_bool(X), dm) 1233 1289  elif mstr == 'sokalmichener': 1234 - _distance_wrap.pdist_sokalmichener_bool_wrap(_convert_to_bool(X), dm) 1290 + _distance_wrap.pdist_sokalmichener_bool_wrap(_convert_to_bool(X), 1291 + dm) 1235 1292  elif mstr == 'sokalsneath': 1236 1293  _distance_wrap.pdist_sokalsneath_bool_wrap(_convert_to_bool(X), dm) 1237 1294  elif metric == 'test_euclidean': @@ -1290,9 +1347,11 @@ def dfun(u,v): return mahalanobis(u, v, V) 1290 1347  else: 1291 1348  raise ValueError('Unknown Distance Metric: %s' % mstr) 1292 1349  else: 1293 - raise TypeError('2nd argument metric must be a string identifier or a function.') 1350 + raise TypeError('2nd argument metric must be a string identifier ' 1351 + 'or a function.') 1294 1352  return dm 1295 1353   1354 + 1296 1355  def squareform(X, force="no", checks=True): 1297 1356  r""" 1298 1357  Converts a vector-form distance vector to a square-form distance @@ -1355,16 +1414,17 @@ def squareform(X, force="no", checks=True): 1355 1414   1356 1415  if force.lower() == 'tomatrix': 1357 1416  if len(s) != 1: 1358 - raise ValueError("Forcing 'tomatrix' but input X is not a distance vector.") 1417 + raise ValueError("Forcing 'tomatrix' but input X is not a " 1418 + "distance vector.") 1359 1419  elif force.lower() == 'tovector': 1360 1420  if len(s) != 2: 1361 - raise ValueError("Forcing 'tovector' but input X is not a distance matrix.") 1362 - 1421 + raise ValueError("Forcing 'tovector' but input X is not a " 1422 + "distance matrix.") 1363 1423   1364 1424  # X = squareform(v) 1365 1425  if len(s) == 1: 1366 1426  if X.shape[0] == 0: 1367 - return np.zeros((1,1), dtype=np.double) 1427 + return np.zeros((1, 1), dtype=np.double) 1368 1428   1369 1429  # Grab the closest value to the square root of the number 1370 1430  # of elements times 2 to see if the number of elements @@ -1373,7 +1433,8 @@ def squareform(X, force="no", checks=True): 1373 1433   1374 1434  # Check that v is of valid dimensions. 1375 1435  if d * (d - 1) / 2 != int(s[0]): 1376 - raise ValueError('Incompatible vector size. It must be a binomial coefficient n choose 2 for some integer n >= 2.') 1436 + raise ValueError('Incompatible vector size. It must be a binomial ' 1437 + 'coefficient n choose 2 for some integer n >= 2.') 1377 1438   1378 1439  # Allocate memory for the distance matrix. 1379 1440  M = np.zeros((d, d), dtype=np.double) @@ -1411,7 +1472,10 @@ def squareform(X, force="no", checks=True): 1411 1472  _distance_wrap.to_vector_from_squareform_wrap(X, v) 1412 1473  return v 1413 1474  else: 1414 - raise ValueError('The first argument must be one or two dimensional array. A %d-dimensional array is not permitted' % len(s)) 1475 + raise ValueError(('The first argument must be one or two dimensional ' 1476 + 'array. A %d-dimensional array is not ' 1477 + 'permitted') % len(s)) 1478 + 1415 1479   1416 1480  def is_valid_dm(D, tol=0.0, throw=False, name="D", warning=False): 1417 1481  """ @@ -1453,36 +1517,49 @@ def is_valid_dm(D, tol=0.0, throw=False, name="D", warning=False): 1453 1517  s = D.shape 1454 1518  if D.dtype != np.double: 1455 1519  if name: 1456 - raise TypeError('Distance matrix \'%s\' must contain doubles (double).' % name) 1520 + raise TypeError(('Distance matrix \'%s\' must contain doubles ' 1521 + '(double).') % name) 1457 1522  else: 1458 - raise TypeError('Distance matrix must contain doubles (double).') 1523 + raise TypeError('Distance matrix must contain doubles ' 1524 + '(double).') 1459 1525  if len(D.shape) != 2: 1460 1526  if name: 1461 - raise ValueError('Distance matrix \'%s\' must have shape=2 (i.e. be two-dimensional).' % name) 1527 + raise ValueError(('Distance matrix \'%s\' must have shape=2 ' 1528 + '(i.e. be two-dimensional).') % name) 1462 1529  else: 1463 - raise ValueError('Distance matrix must have shape=2 (i.e. be two-dimensional).') 1530 + raise ValueError('Distance matrix must have shape=2 (i.e. ' 1531 + 'be two-dimensional).') 1464 1532  if tol == 0.0: 1465 1533  if not (D == D.T).all(): 1466 1534  if name: 1467 - raise ValueError('Distance matrix \'%s\' must be symmetric.' % name) 1535 + raise ValueError(('Distance matrix \'%s\' must be ' 1536 + 'symmetric.') % name) 1468 1537  else: 1469 1538  raise ValueError('Distance matrix must be symmetric.') 1470 1539  if not (D[xrange(0, s[0]), xrange(0, s[0])] == 0).all(): 1471 1540  if name: 1472 - raise ValueError('Distance matrix \'%s\' diagonal must be zero.' % name) 1541 + raise ValueError(('Distance matrix \'%s\' diagonal must ' 1542 + 'be zero.') % name) 1473 1543  else: 1474 1544  raise ValueError('Distance matrix diagonal must be zero.') 1475 1545  else: 1476 1546  if not (D - D.T <= tol).all(): 1477 1547  if name: 1478 - raise ValueError('Distance matrix \'%s\' must be symmetric within tolerance %d.' % (name, tol)) 1548 + raise ValueError(('Distance matrix \'%s\' must be ' 1549 + 'symmetric within tolerance %d.') 1550 + % (name, tol)) 1479 1551  else: 1480 - raise ValueError('Distance matrix must be symmetric within tolerance %5.5f.' % tol) 1552 + raise ValueError('Distance matrix must be symmetric within' 1553 + ' tolerance %5.5f.' % tol) 1481 1554  if not (D[xrange(0, s[0]), xrange(0, s[0])] <= tol).all(): 1482 1555  if name: 1483 - raise ValueError('Distance matrix \'%s\' diagonal must be close to zero within tolerance %5.5f.' % (name, tol)) 1556 + raise ValueError(('Distance matrix \'%s\' diagonal must be' 1557 + ' close to zero within tolerance %5.5f.') 1558 + % (name, tol)) 1484 1559  else: 1485 - raise ValueError('Distance matrix \'%s\' diagonal must be close to zero within tolerance %5.5f.' % tol) 1560 + raise ValueError(('Distance matrix \'%s\' diagonal must be' 1561 + ' close to zero within tolerance %5.5f.') 1562 + % tol) 1486 1563  except Exception, e: 1487 1564  if throw: 1488 1565  raise @@ -1491,6 +1568,7 @@ def is_valid_dm(D, tol=0.0, throw=False, name="D", warning=False): 1491 1568  valid = False 1492 1569  return valid 1493 1570   1571 + 1494 1572  def is_valid_y(y, warning=False, throw=False, name=None): 1495 1573  r""" 1496 1574  Returns True if the variable y passed is a valid condensed @@ -1521,26 +1599,37 @@ def is_valid_y(y, warning=False, throw=False, name=None): 1521 1599  try: 1522 1600  if type(y) != np.ndarray: 1523 1601  if name: 1524 - raise TypeError('\'%s\' passed as a condensed distance matrix is not a numpy array.' % name) 1602 + raise TypeError(('\'%s\' passed as a condensed distance ' 1603 + 'matrix is not a numpy array.') % name) 1525 1604  else: 1526 1605  raise TypeError('Variable is not a numpy array.') 1527 1606  if y.dtype != np.double: 1528 1607  if name: 1529 - raise TypeError('Condensed distance matrix \'%s\' must contain doubles (double).' % name) 1608 + raise TypeError(('Condensed distance matrix \'%s\' must ' 1609 + 'contain doubles (double).') % name) 1530 1610  else: 1531 - raise TypeError('Condensed distance matrix must contain doubles (double).') 1611 + raise TypeError('Condensed distance matrix must contain ' 1612 + 'doubles (double).') 1532 1613  if len(y.shape) != 1: 1533 1614  if name: 1534 - raise ValueError('Condensed distance matrix \'%s\' must have shape=1 (i.e. be one-dimensional).' % name) 1615 + raise ValueError(('Condensed distance matrix \'%s\' must ' 1616 + 'have shape=1 (i.e. be one-dimensional).') 1617 + % name) 1535 1618  else: 1536 - raise ValueError('Condensed distance matrix must have shape=1 (i.e. be one-dimensional).') 1619 + raise ValueError('Condensed distance matrix must have shape=1 ' 1620 + '(i.e. be one-dimensional).') 1537 1621  n = y.shape[0] 1538 1622  d = int(np.ceil(np.sqrt(n * 2))) 1539 - if (d*(d-1)/2) != n: 1623 + if (d * (d - 1) / 2) != n: 1540 1624  if name: 1541 - raise ValueError('Length n of condensed distance matrix \'%s\' must be a binomial coefficient, i.e. there must be a k such that (k \choose 2)=n)!' % name) 1625 + raise ValueError(('Length n of condensed distance matrix ' 1626 + '\'%s\' must be a binomial coefficient, i.e.' 1627 + 'there must be a k such that ' 1628 + '(k \choose 2)=n)!') % name) 1542 1629  else: 1543 - raise ValueError('Length n of condensed distance matrix must be a binomial coefficient, i.e. there must be a k such that (k \choose 2)=n)!') 1630 + raise ValueError('Length n of condensed distance matrix must ' 1631 + 'be a binomial coefficient, i.e. there must ' 1632 + 'be a k such that (k \choose 2)=n)!') 1544 1633  except Exception, e: 1545 1634  if throw: 1546 1635  raise @@ -1549,6 +1638,7 @@ def is_valid_y(y, warning=False, throw=False, name=None): 1549 1638  valid = False 1550 1639  return valid 1551 1640   1641 + 1552 1642  def num_obs_dm(d): 1553 1643  """ 1554 1644  Returns the number of original observations that correspond to a @@ -1568,6 +1658,7 @@ def num_obs_dm(d): 1568 1658  is_valid_dm(d, tol=np.inf, throw=True, name='d') 1569 1659  return d.shape[0] 1570 1660   1661 + 1571 1662  def num_obs_y(Y): 1572 1663  """ 1573 1664  Returns the number of original observations that correspond to a @@ -1589,10 +1680,12 @@ def num_obs_y(Y): 1589 1680  is_valid_y(Y, throw=True, name='Y') 1590 1681  k = Y.shape[0] 1591 1682  if k == 0: 1592 - raise ValueError("The number of observations cannot be determined on an empty distance matrix.") 1683 + raise ValueError("The number of observations cannot be determined on " 1684 + "an empty distance matrix.") 1593 1685  d = int(np.ceil(np.sqrt(k * 2))) 1594 - if (d*(d-1)/2) != k: 1595 - raise ValueError("Invalid condensed distance matrix passed. Must be some k where k=(n choose 2) for some n >= 2.") 1686 + if (d * (d - 1) / 2) != k: 1687 + raise ValueError("Invalid condensed distance matrix passed. Must be " 1688 + "some k where k=(n choose 2) for some n >= 2.") 1596 1689  return d 1597 1690   1598 1691   @@ -1825,14 +1918,12 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None): 1825 1918  A :math:m_A by :math:m_B distance matrix. 1826 1919  """ 1827 1920   1828 - 1829 1921  # 21. Y = cdist(XA, XB, 'test_Y') 1830 1922  # 1831 1923  # Computes the distance between all pairs of vectors in X 1832 1924  # using the distance metric Y but with a more succint, 1833 1925  # verifiable, but less efficient implementation. 1834 1926   1835 - 1836 1927  XA = np.asarray(XA, order='c') 1837 1928  XB = np.asarray(XB, order='c') 1838 1929   @@ -1848,11 +1939,12 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None): 1848 1939  sB = XB.shape 1849 1940   1850 1941  if len(s) != 2: 1851 - raise ValueError('XA must be a 2-dimensional array.'); 1942 + raise ValueError('XA must be a 2-dimensional array.') 1852 1943  if len(sB) != 2: 1853 - raise ValueError('XB must be a 2-dimensional array.'); 1944 + raise ValueError('XB must be a 2-dimensional array.') 1854 1945  if s[1] != sB[1]: 1855 - raise ValueError('XA and XB must have the same number of columns (i.e. feature dimension.)') 1946 + raise ValueError('XA and XB must have the same number of columns ' 1947 + '(i.e. feature dimension.)') 1856 1948   1857 1949  mA = s[0] 1858 1950  mB = sB[0] @@ -1880,7 +1972,7 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None): 1880 1972  for i in xrange(0, mA): 1881 1973  for j in xrange(0, mB): 1882 1974  dm[i, j] = metric(XA[i, :], XB[j, :]) 1883 - elif isinstance(metric,basestring): 1975 + elif isinstance(metric, basestring): 1884 1976  mstr = metric.lower() 1885 1977   1886 1978  #if XA.dtype != np.double and \ @@ -1899,14 +1991,16 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None): 1899 1991  elif mstr in set(['hamming', 'hamm', 'ha', 'h']): 1900 1992  if XA.dtype == np.bool: 1901 1993  _distance_wrap.cdist_hamming_bool_wrap(_convert_to_bool(XA), 1902 - _convert_to_bool(XB), dm) 1994 + _convert_to_bool(XB), 1995 + dm) 1903 1996  else: 1904 1997  _distance_wrap.cdist_hamming_wrap(_convert_to_double(XA), 1905 1998  _convert_to_double(XB), dm) 1906 1999  elif mstr in set(['jaccard', 'jacc', 'ja', 'j']): 1907 2000  if XA.dtype == np.bool: 1908 2001  _distance_wrap.cdist_jaccard_bool_wrap(_convert_to_bool(XA), 1909 - _convert_to_bool(XB), dm) 2002 + _convert_to_bool(XB), 2003 + dm) 1910 2004  else: 1911 2005  _distance_wrap.cdist_jaccard_wrap(_convert_to_double(XA), 1912 2006  _convert_to_double(XB), dm) @@ -1918,7 +2012,9 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None): 1918 2012  _convert_to_double(XB), dm, p) 1919 2013  elif mstr in set(['wminkowski', 'wmi', 'wm', 'wpnorm']): 1920 2014  _distance_wrap.cdist_weighted_minkowski_wrap(_convert_to_double(XA), 1921 - _convert_to_double(XB), dm, p, _convert_to_double(w)) 2015 + _convert_to_double(XB), 2016 + dm, p, 2017 + _convert_to_double(w)) 1922 2018  elif mstr in set(['seuclidean', 'se', 's']): 1923 2019  if V is not None: 1924 2020  V = np.asarray(V, order='c') @@ -1927,9 +2023,12 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None): 1927 2023  if V.dtype != np.double: 1928 2024  raise TypeError('Variance vector V must contain doubles.') 1929 2025  if len(V.shape) != 1: 1930 - raise ValueError('Variance vector V must be one-dimensional.') 2026 + raise ValueError('Variance vector V must be ' 2027 + 'one-dimensional.') 1931 2028  if V.shape[0] != n: 1932 - raise ValueError('Variance vector V must be of the same dimension as the vectors on which the distances are computed.') 2029 + raise ValueError('Variance vector V must be of the same ' 2030 + 'dimension as the vectors on which the ' 2031 + 'distances are computed.') 1933 2032  # The C code doesn't do striding. 1934 2033  [VV] = _copy_arrays_if_base_present([_convert_to_double(V)]) 1935 2034  else: @@ -1951,8 +2050,8 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None): 1951 2050  normsA, 1952 2051  normsB) 1953 2052  elif mstr in set(['correlation', 'co']): 1954 - XA2 = XA - XA.mean(1)[:,np.newaxis] 1955 - XB2 = XB - XB.mean(1)[:,np.newaxis] 2053 + XA2 = XA - XA.mean(1)[:, np.newaxis] 2054 + XB2 = XB - XB.mean(1)[:, np.newaxis] 1956 2055  #X2 = X - np.matlib.repmat(np.mean(X, axis=1).reshape(m, 1), 1, n) 1957 2056  normsA = np.sqrt(np.sum(XA2 * XA2, axis=1)) 1958 2057  normsB = np.sqrt(np.sum(XB2 * XB2, axis=1)) @@ -1977,7 +2076,8 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None): 1977 2076  VI = _convert_to_double(np.linalg.inv(V).T.copy()) 1978 2077  # (u-v)V^(-1)(u-v)^T 1979 2078  _distance_wrap.cdist_mahalanobis_wrap(_convert_to_double(XA), 1980 - _convert_to_double(XB), VI, dm) 2079 + _convert_to_double(XB), 2080 + VI, dm) 1981 2081  elif mstr == 'canberra': 1982 2082  _distance_wrap.cdist_canberra_wrap(_convert_to_double(XA), 1983 2083  _convert_to_double(XB), dm) @@ -1998,16 +2098,19 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None): 1998 2098  _convert_to_bool(XB), dm) 1999 2099  elif mstr == 'rogerstanimoto': 2000 2100  _distance_wrap.cdist_rogerstanimoto_bool_wrap(_convert_to_bool(XA), 2001 - _convert_to_bool(XB), dm) 2101 + _convert_to_bool(XB), 2102 + dm) 2002 2103  elif mstr == 'russellrao': 2003 2104  _distance_wrap.cdist_russellrao_bool_wrap(_convert_to_bool(XA), 2004 2105  _convert_to_bool(XB), dm) 2005 2106  elif mstr == 'sokalmichener': 2006 2107  _distance_wrap.cdist_sokalmichener_bool_wrap(_convert_to_bool(XA), 2007 - _convert_to_bool(XB), dm) 2108 + _convert_to_bool(XB), 2109 + dm) 2008 2110  elif mstr == 'sokalsneath': 2009 2111  _distance_wrap.cdist_sokalsneath_bool_wrap(_convert_to_bool(XA), 2010 - _convert_to_bool(XB), dm) 2112 + _convert_to_bool(XB), 2113 + dm) 2011 2114  elif metric == 'test_euclidean': 2012 2115  dm = cdist(XA, XB, euclidean) 2013 2116  elif metric == 'test_seuclidean': @@ -2069,5 +2172,6 @@ def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None): 2069 2172  else: 2070 2173  raise ValueError('Unknown Distance Metric: %s' % mstr) 2071 2174  else: 2072 - raise TypeError('2nd argument metric must be a string identifier or a function.') 2175 + raise TypeError('2nd argument metric must be a string identifier ' 2176 + 'or a function.') 2073 2177  return dm
170  scipy/spatial/tests/test_distance.py
 @@ -37,13 +37,16 @@ 37 37  import os.path 38 38   39 39  import numpy as np 40 +from numpy.linalg import norm 40 41  from numpy.testing import verbose, TestCase, run_module_suite, \ 41 42  assert_raises, assert_array_equal, assert_equal, assert_almost_equal 43 + 42 44  from scipy.spatial.distance import squareform, pdist, cdist, matching, \ 43 - jaccard, dice, sokalsneath, rogerstanimoto, \ 44 - russellrao, yule, num_obs_y, num_obs_dm, \ 45 - is_valid_dm, is_valid_y, wminkowski, \ 46 - canberra, braycurtis 45 + jaccard, dice, sokalsneath, rogerstanimoto, russellrao, yule, \ 46 + num_obs_y, num_obs_dm, is_valid_dm, is_valid_y, minkowski, wminkowski, \ 47 + euclidean, sqeuclidean, cosine, correlation, mahalanobis, \ 48 + canberra, braycurtis, sokalmichener, _validate_vector  49 + 47 50   48 51  _filenames = ["iris.txt", 49 52  "cdist-X1.txt", @@ -97,11 +100,6 @@ def load_testing_files(): 97 100   98 101  load_testing_files() 99 102   100 -#print eo.keys() 101 - 102 - 103 -#print np.abs(Y_test2 - Y_right).max() 104 -#print np.abs(Y_test1 - Y_right).max() 105 103   106 104  class TestCdist(TestCase): 107 105  """ @@ -463,6 +461,7 @@ def test_cdist_sokalsneath_random(self): 463 461  print (Y1-Y2).max() 464 462  self.assertTrue(within_tol(Y1, Y2, eps)) 465 463   464 + 466 465  class TestPdist(TestCase): 467 466  """ 468 467  Test suite for the pdist function. @@ -1402,10 +1401,76 @@ def test_pdist_canberra_ticket_711(self): 1402 1401  print np.abs(pdist_y-right_y).max() 1403 1402  self.assertTrue(within_tol(pdist_y, right_y, eps)) 1404 1403   1404 + 1405 1405  def within_tol(a, b, tol): 1406 1406  return np.abs(a - b).max() < tol 1407 1407   1408 1408   1409 +class TestSomeDistanceFunctions(TestCase): 1410 + 1411 + def setUp(self): 1412 + # 1D arrays 1413 + x = np.array([1.0, 2.0, 3.0]) 1414 + y = np.array([1.0, 1.0, 5.0]) 1415 + # 3x1 arrays 1416 + x31 = x[:,np.newaxis] 1417 + y31 = y[:,np.newaxis] 1418 + # 1x3 arrays 1419 + x13 = x31.T 1420 + y13 = y31.T 1421 + 1422 + self.cases = [(x,y), (x31, y31), (x13, y13)] 1423 + 1424 + def test_minkowski(self): 1425 + for x, y in self.cases: 1426 + dist1 = minkowski(x, y, p=1) 1427 + assert_almost_equal(dist1, 3.0) 1428 + dist1p5 = minkowski(x, y, p=1.5) 1429 + assert_almost_equal(dist1p5, (1.0+2.0**1.5)**(2./3)) 1430 + dist2 = minkowski(x, y, p=2) 1431 + assert_almost_equal(dist2, np.sqrt(5)) 1432 + 1433 + def test_wminkowski(self): 1434 + w = np.array([1.0, 2.0, 0.5]) 1435 + for x, y in self.cases: 1436 + dist1 = wminkowski(x, y, p=1, w=w) 1437 + assert_almost_equal(dist1, 3.0) 1438 + dist1p5 = wminkowski(x, y, p=1.5, w=w) 1439 + assert_almost_equal(dist1p5, (2.0**1.5+1.0)**(2./3)) 1440 + dist2 = wminkowski(x, y, p=2, w=w) 1441 + assert_almost_equal(dist2, np.sqrt(5)) 1442 + 1443 + def test_euclidean(self): 1444 + for x, y in self.cases: 1445 + dist = euclidean(x, y) 1446 + assert_almost_equal(dist, np.sqrt(5)) 1447 + 1448 + def test_sqeuclidean(self): 1449 + for x, y in self.cases: 1450 + dist = sqeuclidean(x, y) 1451 + assert_almost_equal(dist, 5.0) 1452 + 1453 + def test_cosine(self): 1454 + for x, y in self.cases: 1455 + dist = cosine(x, y) 1456 + assert_almost_equal(dist, 1.0 - 18.0/(np.sqrt(14)*np.sqrt(27))) 1457 + 1458 + def test_correlation(self): 1459 + xm = np.array([-1.0, 0, 1.0]) 1460 + ym = np.array([-4.0/3, -4.0/3, 5.0-7.0/3]) 1461 + for x, y in self.cases: 1462 + dist = correlation(x, y) 1463 + assert_almost_equal(dist, 1.0 - np.dot(xm, ym)/(norm(xm)*norm(ym))) 1464 + 1465 + def test_mahalanobis(self): 1466 + x = np.array([1.0, 2.0, 3.0]) 1467 + y = np.array([1.0, 1.0, 5.0]) 1468 + vi = np.array([[2.0, 1.0, 0.0],[1.0, 2.0, 1.0], [0.0, 1.0, 2.0]]) 1469 + for x, y in self.cases: 1470 + dist = mahalanobis(x, y, vi) 1471 + assert_almost_equal(dist, np.sqrt(6.0)) 1472 + 1473 + 1409 1474  class TestSquareForm(TestCase): 1410 1475   1411 1476  ################### squareform @@ -1472,6 +1537,7 @@ def check_squareform_multi_matrix(self, n): 1472 1537  else: 1473 1538  self.assertTrue(A[i, j] == 0) 1474 1539   1540 + 1475 1541  class TestNumObsY(TestCase): 1476 1542   1477 1543  def test_num_obs_y_multi_matrix(self): @@ -1525,6 +1591,7 @@ def check_y(self, n): 1525 1591  def make_y(self, n): 1526 1592  return np.random.rand((n*(n-1)/2)) 1527 1593   1594 + 1528 1595  class TestNumObsDM(TestCase): 1529 1596   1530 1597  ############## num_obs_dm @@ -1564,9 +1631,11 @@ def check_D(self, n): 1564 1631  def make_D(self, n): 1565 1632  return np.random.rand(n, n) 1566 1633   1634 + 1567 1635  def is_valid_dm_throw(D): 1568 1636  return is_valid_dm(D, throw=True) 1569 1637   1638 + 1570 1639  class TestIsValidDM(TestCase): 1571 1640   1572 1641  def test_is_valid_dm_int16_array_E(self): @@ -1658,9 +1727,11 @@ def test_is_valid_dm_correct_5_by_5(self): 1658 1727  D = squareform(y) 1659 1728  self.assertTrue(is_valid_dm(D) == True) 1660 1729   1730 + 1661 1731  def is_valid_y_throw(y): 1662 1732  return is_valid_y(y, throw=True) 1663 1733   1734 + 1664 1735  class TestIsValidY(TestCase): 1665 1736   1666 1737  def test_is_valid_y_int16_array_E(self): @@ -1731,20 +1802,101 @@ def correct_n_by_n(self, n): 1731 1802  return y 1732 1803   1733 1804   1805 +def test_bad_p(): 1806 + """Raise ValueError if p < 1.""" 1807 + p = 0.5 1808 + assert_raises(ValueError, minkowski, [1, 2], [3, 4], p) 1809 + assert_raises(ValueError, wminkowski, [1, 2], [3, 4], p, [1, 1]) 1810 + 1811 + 1734 1812  def test_sokalsneath_all_false(): 1735 1813  """Regression test for ticket #876""" 1736 1814  assert_raises(ValueError, sokalsneath, [False, False, False], [False, False, False]) 1737 1815   1816 + 1738 1817  def test_canberra(): 1739 1818  """Regression test for ticket #1430.""" 1740 1819  assert_equal(canberra([1,2,3], [2,4,6]), 1) 1741 1820  assert_equal(canberra([1,1,0,0], [1,0,1,0]), 2) 1742 1821   1822 + 1743 1823  def test_braycurtis(): 1744 1824  """Regression test for ticket #1430.""" 1745 1825  assert_almost_equal(braycurtis([1,2,3], [2,4,6]), 1./3, decimal=15) 1746 1826  assert_almost_equal(braycurtis([1,1,0,0], [1,0,1,0]), 0.5, decimal=15) 1747 1827   1748 1828   1829 +def test_euclideans(): 1830 + """Regression test for ticket #1328.""" 1831 + x1 = np.array([1, 1, 1]) 1832 + x2 = np.array([0, 0, 0]) 1833 +  1834 + # Basic test of the calculation. 1835 + assert_almost_equal(sqeuclidean(x1, x2), 3.0, decimal=14) 1836 + assert_almost_equal(euclidean(x1, x2), np.sqrt(3), decimal=14) 1837 + 1838 + # Check flattening for (1, N) or (N, 1) inputs 1839 + assert_almost_equal(euclidean(x1[np.newaxis, :], x2[np.newaxis, :]), 1840 + np.sqrt(3), decimal=14) 1841 + assert_almost_equal(sqeuclidean(x1[np.newaxis, :], x2[np.newaxis, :]), 1842 + 3.0, decimal=14) 1843 + assert_almost_equal(sqeuclidean(x1[:, np.newaxis], x2[:, np.newaxis]), 1844 + 3.0, decimal=14) 1845 + 1846 + # Distance metrics only defined for vectors (= 1-D) 1847 + x = np.arange(4).reshape(2, 2) 1848 + assert_raises(ValueError, euclidean, x, x)