Skip to content

Commit

Permalink
Fix errors due to previous refactoring on MDS
Browse files Browse the repository at this point in the history
  • Loading branch information
NelleV authored and GaelVaroquaux committed Jun 1, 2012
1 parent ebaee5a commit 81b75a8
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 12 deletions.
2 changes: 1 addition & 1 deletion examples/manifold/plot_mds.py
Expand Up @@ -23,7 +23,7 @@
cities_dataset = load_cities()
similarities = cities_dataset.data

mds = manifold.MDS(p=2, max_iter=3000, eps=1e-9)
mds = manifold.MDS(out_dim=2, max_iter=3000, eps=1e-9)
pos = mds.fit(similarities).positions_

fig = plt.figure(1)
Expand Down
32 changes: 22 additions & 10 deletions sklearn/manifold/mds.py
Expand Up @@ -35,7 +35,7 @@ def pool_adjacent_violators(distances, similarities):
"""
# First approach for ties: ignore them. The multidimensional scaling won't
# enforce that points with equal similarity be at equal distance.
indxs = np.lexsort((similarities, distances))
indxs = np.lexsort((distances, similarities))

new_blocks = range(len(indxs))

Expand Down Expand Up @@ -71,7 +71,7 @@ def pool_adjacent_violators(distances, similarities):


def _smacof_single(similarities, metric=True, out_dim=2, init=None,
max_iter=300, verbose=0, eps=1e-3):
max_iter=300, verbose=0, eps=1e-3, random_state=None):
"""
Computes multidimensional scaling using SMACOF algorithm
Expand Down Expand Up @@ -100,6 +100,11 @@ def _smacof_single(similarities, metric=True, out_dim=2, init=None,
eps: float, optional, default: 1e-6
relative tolerance w.r.t stress to declare converge
random_state: integer or numpy.RandomState, optional
The generator used to initialize the centers. If an integer is
given, it fixes the seed. Defaults to the global numpy random
number generator.
Returns
-------
X: ndarray (n_samples, out_dim), float
Expand All @@ -109,9 +114,9 @@ def _smacof_single(similarities, metric=True, out_dim=2, init=None,
The final value of the stress (sum of squared distance of the
disparities and the distances for all constrained points)
"""
n_samples = similarities.shape[0]
random_state = check_random_state(random_state)

if similarities.shape[0] != similarities.shape[1]:
raise ValueError("similarities must be a square array (shape=%d)" % \
Expand All @@ -124,7 +129,8 @@ def _smacof_single(similarities, metric=True, out_dim=2, init=None,
sim_flat_w = sim_flat[sim_flat != 0]
if init is None:
# Randomly choose initial configuration
X = np.random.random(size=(n_samples, out_dim))
X = random_state.rand(n_samples * out_dim)
X = X.reshape((n_samples, out_dim))
else:
# overrides the parameter p
out_dim = init.shape[1]
Expand Down Expand Up @@ -211,6 +217,11 @@ def smacof(similarities, metric=True, out_dim=2, init=None, n_init=8, n_jobs=1,
eps: float, optional, default: 1e-6
relative tolerance w.r.t stress to declare converge
random_state: integer or numpy.RandomState, optional
The generator used to initialize the centers. If an integer is
given, it fixes the seed. Defaults to the global numpy random
number generator.
Returns
-------
X: ndarray (n_samples,out_dim)
Expand Down Expand Up @@ -239,7 +250,8 @@ def smacof(similarities, metric=True, out_dim=2, init=None, n_init=8, n_jobs=1,
pos, stress = _smacof_single(similarities, metric=metric,
out_dim=out_dim,
init=init, max_iter=max_iter,
verbose=verbose, eps=eps)
verbose=verbose, eps=eps,
random_state=random_state)
if best_stress is None or stress < best_stress:
best_stress = stress
best_pos = pos.copy()
Expand All @@ -250,7 +262,7 @@ def smacof(similarities, metric=True, out_dim=2, init=None, n_init=8, n_jobs=1,
similarities, metric=metric, out_dim=out_dim,
init=init, max_iter=max_iter,
verbose=verbose, eps=eps,
rendom_state=seed)
random_state=seed)
for seed in seeds)
positions, stress = zip(results)
best = np.argmin(stress)
Expand Down Expand Up @@ -298,14 +310,14 @@ class MDS(BaseEstimator):
Notes
-----
Modern Multidimensional Scaling - Theory and Applications Borg, I.;
"Modern Multidimensional Scaling - Theory and Applications" Borg, I.;
Groenen P. Springer Series in Statistics (1997)
Nonmetric multidimensional scaling: a numerical method Kruskal, J.
"Nonmetric multidimensional scaling: a numerical method" Kruskal, J.
Psychometrika, 29 (1964)
Multidimensional scaling by optimizing goodness of fit to a nonmetric
hypothesis Kruskal, J. Psychometrika, 29, (1964)
"Multidimensional scaling by optimizing goodness of fit to a nonmetric
hypothesis" Kruskal, J. Psychometrika, 29, (1964)
"""
def __init__(self, out_dim=2, metric=True, n_init=8,
Expand Down
2 changes: 1 addition & 1 deletion sklearn/manifold/tests/test_mds.py
Expand Up @@ -26,7 +26,7 @@ def test_smacof():
[.451, .252],
[.016, -.238],
[-.200, .524]])
X, _ = mds.smacof(sim, init=Z, p=2, max_iter=1)
X, _ = mds.smacof(sim, init=Z, out_dim=2, max_iter=1)
X_true = np.array([[-1.415, -2.471],
[1.633, 1.107],
[.249, -.067],
Expand Down

0 comments on commit 81b75a8

Please sign in to comment.