Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] Add early exaggeration iterations as argument to t-SNE #12476

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 31 additions & 17 deletions sklearn/manifold/t_sne.py
Expand Up @@ -518,15 +518,27 @@ class TSNE(BaseEstimator):
learning rate is too low, most points may look compressed in a dense
cloud with few outliers. If the cost function gets stuck in a bad local
minimum increasing the learning rate may help.
Some discussion on how to set learning rate optimally can be found
at https://doi.org/10.1101/451690. Effective use of this parameter has
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the References section and ReST citation format instead.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the References section and ReST citation format instead.

the benefit of decreasing the number of iterations required
for a good embedding.

n_iter : int, optional (default: 1000)
Maximum number of iterations for the optimization. Should be at
least 250.
Maximum number of iterations for the optimization. Must be greater than
or equal to n_iter_early_exag. If embedding quality is suffering as a
consequence of increasing number of samples being embedded, increasing
this value and n_iter_early_exag proportionately can help.

n_iter_early_exag : int, optional (default: 250)
Number of iterations out of total n_iter that t-SNE should spend
in the early exaggeration phase. If embedding quality is suffering as a
consequence of increasing number of samples being embedded, increasing
this value and n_iter proportionately can help.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it often be useful to specify this as a fraction of n_iter?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is one way to do it but I think it is better to specify the number as the actual number. This is for two main reasons:

  1. Aligns with other existing implementations which have the equivalent of this argument specified as the number (see my note about LvdM implementation above).
  2. Ensures that existing client code will get the same results as before, regardless of the number of iterations they had set with the n_iter argument. If we suddenly start using a percentage as the default argument, code with n_iter set to something different than 1000 will suddenly use a different early exaggeration phase length.


n_iter_without_progress : int, optional (default: 300)
Maximum number of iterations without progress before we abort the
optimization, used after 250 initial iterations with early
exaggeration. Note that progress is only checked every 50 iterations so
optimization, used after early exaggeration.
Note that progress is only checked every 50 iterations so
this value is rounded to the next multiple of 50.

.. versionadded:: 0.17
Expand Down Expand Up @@ -619,22 +631,22 @@ class TSNE(BaseEstimator):
Journal of Machine Learning Research 15(Oct):3221-3245, 2014.
https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf
"""
# Control the number of exploration iterations with early_exaggeration on
_EXPLORATION_N_ITER = 250

# Control the number of iterations between progress checks
_N_ITER_CHECK = 50

def __init__(self, n_components=2, perplexity=30.0,
early_exaggeration=12.0, learning_rate=200.0, n_iter=1000,
n_iter_without_progress=300, min_grad_norm=1e-7,
metric="euclidean", init="random", verbose=0,
random_state=None, method='barnes_hut', angle=0.5):
n_iter_early_exag=250, n_iter_without_progress=300,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For backward compatibility, please place, n_iter_early_exag at the end of the function signature.

min_grad_norm=1e-7, metric="euclidean",
init="random", verbose=0, random_state=None,
method='barnes_hut', angle=0.5):
self.n_components = n_components
self.perplexity = perplexity
self.early_exaggeration = early_exaggeration
self.learning_rate = learning_rate
self.n_iter = n_iter
self.n_iter_early_exag = n_iter_early_exag
self.n_iter_without_progress = n_iter_without_progress
self.min_grad_norm = min_grad_norm
self.metric = metric
Expand Down Expand Up @@ -703,8 +715,9 @@ def _fit(self, X, skip_num_points=0):
raise ValueError("early_exaggeration must be at least 1, but is {}"
.format(self.early_exaggeration))

if self.n_iter < 250:
raise ValueError("n_iter should be at least 250")
if self.n_iter < self.n_iter_early_exag:
raise ValueError("n_iter should be greater than or equal "
"to n_iter_early_exag.")

n_samples = X.shape[0]

Expand Down Expand Up @@ -822,8 +835,8 @@ def _tsne(self, P, degrees_of_freedom, n_samples, X_embedded,
"verbose": self.verbose,
"kwargs": dict(skip_num_points=skip_num_points),
"args": [P, degrees_of_freedom, n_samples, self.n_components],
"n_iter_without_progress": self._EXPLORATION_N_ITER,
"n_iter": self._EXPLORATION_N_ITER,
"n_iter_without_progress": self.n_iter_early_exag,
"n_iter": self.n_iter_early_exag,
"momentum": 0.5,
}
if self.method == 'barnes_hut':
Expand All @@ -834,8 +847,9 @@ def _tsne(self, P, degrees_of_freedom, n_samples, X_embedded,
else:
obj_func = _kl_divergence

# Learning schedule (part 1): do 250 iteration with lower momentum but
# higher learning rate controlled via the early exageration parameter
# Learning schedule (part 1): do n_iter_early_exag iteration with
# lower momentum but higher learning rate controlled via
# the early exageration parameter
P *= self.early_exaggeration
params, kl_divergence, it = _gradient_descent(obj_func, params,
**opt_args)
Expand All @@ -846,8 +860,8 @@ def _tsne(self, P, degrees_of_freedom, n_samples, X_embedded,
# Learning schedule (part 2): disable early exaggeration and finish
# optimization with a higher momentum at 0.8
P /= self.early_exaggeration
remaining = self.n_iter - self._EXPLORATION_N_ITER
if it < self._EXPLORATION_N_ITER or remaining > 0:
remaining = self.n_iter - self.n_iter_early_exag
if it < self.n_iter_early_exag or remaining > 0:
opt_args['n_iter'] = self.n_iter
opt_args['it'] = it + 1
opt_args['momentum'] = 0.8
Expand Down
4 changes: 2 additions & 2 deletions sklearn/manifold/tests/test_t_sne.py
Expand Up @@ -678,7 +678,7 @@ def test_n_iter_without_progress():
tsne = TSNE(n_iter_without_progress=-1, verbose=2, learning_rate=1e8,
random_state=0, method=method, n_iter=351, init="random")
tsne._N_ITER_CHECK = 1
tsne._EXPLORATION_N_ITER = 0
tsne.n_iter_early_exag = 0

old_stdout = sys.stdout
sys.stdout = StringIO()
Expand Down Expand Up @@ -827,7 +827,7 @@ def test_bh_match_exact():
init="random", random_state=0, n_iter=251,
perplexity=30.0, angle=0)
# Kill the early_exaggeration
tsne._EXPLORATION_N_ITER = 0
tsne.n_iter_early_exag = 0
X_embeddeds[method] = tsne.fit_transform(X)
n_iter[method] = tsne.n_iter_

Expand Down