From 98e88d75fb1bf252acdee5812409fda36770b00e Mon Sep 17 00:00:00 2001 From: Celelibi Date: Fri, 15 Jun 2018 10:12:19 +0200 Subject: [PATCH] ENH: euclidean_distances: swap X and Y sometimes with float32 Signed-off-by: Celelibi --- sklearn/metrics/pairwise.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 7862fa848ea78..739bc8ebf8ac1 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -202,6 +202,15 @@ def _euclidean_distances_cast(X, Y, outdtype, Y_norm_squared=None, The computation is done by blocks to limit additional memory usage. """ + # For performance reasons, swap X and Y if I got X_norm_squared but not + # Y_norm_squared + if X_norm_squared is not None and Y_norm_squared is None: + swap = True + X, Y = Y, X + X_norm_squared, Y_norm_squared = None, X_norm_squared.T + else: + swap = False + # No more than 10MB of additional memory will be used to cast X and Y to # float64 and to get the float64 result. maxmem = 10*1024*1024 @@ -262,6 +271,9 @@ def _euclidean_distances_cast(X, Y, outdtype, Y_norm_squared=None, if X is Y and j > i: distances[j:jpbs, i:ipbs] = d.T + if swap: + distances = distances.T + return distances if squared else np.sqrt(distances, out=distances)