Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for AttributeError thrown when calling metrics.pairwise_distances with binary metrics and Y is None #13864

Merged
merged 6 commits into from May 13, 2019
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 7 additions & 7 deletions sklearn/metrics/pairwise.py
Expand Up @@ -306,7 +306,7 @@ def _euclidean_distances_upcast(X, XX=None, Y=None, YY=None):
maxmem = max(
((x_density * n_samples_X + y_density * n_samples_Y) * n_features
+ (x_density * n_samples_X * y_density * n_samples_Y)) / 10,
10 * 2**17)
10 * 2 ** 17)

# The increase amount of memory in 8-byte blocks is:
# - x_density * batch_size * n_features (copy of chunk of X)
Expand All @@ -315,7 +315,7 @@ def _euclidean_distances_upcast(X, XX=None, Y=None, YY=None):
# Hence x² + (xd+yd)kx = M, where x=batch_size, k=n_features, M=maxmem
# xd=x_density and yd=y_density
tmp = (x_density + y_density) * n_features
batch_size = (-tmp + np.sqrt(tmp**2 + 4 * maxmem)) / 2
batch_size = (-tmp + np.sqrt(tmp ** 2 + 4 * maxmem)) / 2
batch_size = max(int(batch_size), 1)

x_batches = gen_batches(X.shape[0], batch_size)
Expand Down Expand Up @@ -900,7 +900,7 @@ def sigmoid_kernel(X, Y=None, gamma=None, coef0=1):
K = safe_sparse_dot(X, Y.T, dense_output=True)
K *= gamma
K += coef0
np.tanh(K, K) # compute tanh in-place
np.tanh(K, K) # compute tanh in-place
return K


Expand Down Expand Up @@ -933,7 +933,7 @@ def rbf_kernel(X, Y=None, gamma=None):

K = euclidean_distances(X, Y, squared=True)
K *= -gamma
np.exp(K, K) # exponentiate K in-place
np.exp(K, K) # exponentiate K in-place
return K


Expand Down Expand Up @@ -967,7 +967,7 @@ def laplacian_kernel(X, Y=None, gamma=None):
gamma = 1.0 / X.shape[1]

K = -gamma * manhattan_distances(X, Y)
np.exp(K, K) # exponentiate K in-place
np.exp(K, K) # exponentiate K in-place
return K


Expand Down Expand Up @@ -1545,7 +1545,8 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=None, **kwds):

dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else None

if dtype == bool and (X.dtype != bool or Y.dtype != bool):
if dtype == bool and \
(X.dtype != bool or not ((Y is None) or Y.dtype == bool)):
rick2047 marked this conversation as resolved.
Show resolved Hide resolved
msg = "Data was converted to boolean for metric %s" % metric
warnings.warn(msg, DataConversionWarning)

Expand Down Expand Up @@ -1576,7 +1577,6 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=None, **kwds):
'yule',
]


# Helper functions - distance
PAIRWISE_KERNEL_FUNCTIONS = {
# If updating this dictionary, update the doc in both distance_metrics()
Expand Down