Merged
Merged

# [MRG+2] Fix trustworthiness custom metric#9775

Commits
Show all changes
11 commits
Select commit Hold shift + click to select a range
Filter file types
Failed to load files and symbols.
+26 −4

#### Just for now

@@ -8,6 +8,7 @@
# * Fast Optimization for t-SNE:
# http://cseweb.ucsd.edu/~lvdmaaten/workshops/nips2010/papers/vandermaaten.pdf

import warnings
from time import time
import numpy as np
from scipy import linalg
@@ -377,7 +378,8 @@ def _gradient_descent(objective, p0, it, n_iter,
return p, error, i

def trustworthiness(X, X_embedded, n_neighbors=5, precomputed=False):
def trustworthiness(X, X_embedded, n_neighbors=5,
precomputed=False, metric='euclidean'):
"""Expresses to what extent the local structure is retained.
The trustworthiness is within [0, 1]. It is defined as
@@ -413,15 +415,36 @@ def trustworthiness(X, X_embedded, n_neighbors=5, precomputed=False):
precomputed : bool, optional (default: False)
Set this flag if X is a precomputed square distance matrix.
..deprecated:: 0.20
``precomputed`` has been deprecated in version 0.20 and will be
removed in version 0.22. Use ``metric`` instead.
metric : string, or callable, optional, default 'euclidean'
Which metric to use for computing pairwise distances between samples
from the original input space. If metric is 'precomputed', X must be a
matrix of pairwise distances. Otherwise, see the documentation of

#### jnothman Sep 18, 2017

Member

Can you say "pairwise distances or squared distances" (either will work as this function only uses their rank, not their value).

argument metric in sklearn.pairwise.pairwise_distances for a list of
available metrics. However, using a metric different from 'euclidean'
and 'precomputed' seems not standard for this task.
Returns
-------
trustworthiness : float
Trustworthiness of the low-dimensional embedding.
"""
if precomputed:
warnings.warn("The flag 'precomputed' has been deprecated in version"

#### massich Sep 15, 2017 • edited

Contributor

Check scikit-learn contributing guide to see howto deprecate the `precomputed` attribute using a decorator.

#### massich Sep 15, 2017

Contributor

and `precomputed` is not an attribute but a parameter. (my bad. sorry)

"0.20 and will be removed in 0.22. See 'metric'"
metric = 'precomputed'
if metric == 'precomputed':

#### jnothman Sep 19, 2017

Member

I think pairwise_distances handled precomputed, so we don't need a special case here

#### wdevazelhes Sep 20, 2017

Author Contributor

Thanks, I will change this

dist_X = X
elif metric == 'euclidean':
dist_X = pairwise_distances(X, metric='euclidean', squared=True)

#### jnothman Sep 19, 2017

Member

I similarly don't think it's essential to specially handle Euclidean, but at least it saves some computation

#### wdevazelhes Sep 20, 2017

Author Contributor

I will change this too

else:
dist_X = pairwise_distances(X, squared=True)
warnings.warn("The metric '{}' seems not standard for computing"
"trustworthiness.".format(metric))
dist_X = pairwise_distances(X, metric=metric)
dist_X_embedded = pairwise_distances(X_embedded, squared=True)
ind_X = np.argsort(dist_X, axis=1)
ind_X_embedded = np.argsort(dist_X_embedded, axis=1)[:, 1:n_neighbors + 1]
 @@ -286,8 +286,7 @@ def test_preserve_trustworthiness_approximately_with_precomputed_distances(): early_exaggeration=2.0, metric="precomputed", random_state=i, verbose=0) X_embedded = tsne.fit_transform(D) t = trustworthiness(D, X_embedded, n_neighbors=1, precomputed=True) t = trustworthiness(D, X_embedded, n_neighbors=1, metric="precomputed") assert t > .95
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.