Skip to content
Browse files

MDS now computes the SMACOF algorithm several times, and returns the …

…results with the lowest stress
  • Loading branch information...
1 parent f568e36 commit 6366a8e0bad9d970311ae6e8fa13bfdf08a952e2 @NelleV NelleV committed with GaelVaroquaux Apr 25, 2012
Showing with 77 additions and 8 deletions.
  1. +1 −0 sklearn/manifold/__init__.py
  2. +76 −8 sklearn/manifold/mds.py
View
1 sklearn/manifold/__init__.py
@@ -4,3 +4,4 @@
from .locally_linear import locally_linear_embedding, LocallyLinearEmbedding
from .isomap import Isomap
+from .mds import MDS
View
84 sklearn/manifold/mds.py
@@ -78,7 +78,7 @@ def guttman_image_ranking(distances, similarities, verbose=False):
return disparities
-def smacof(similarities, metric=True, p=2, init=None,
+def _smacof_single(similarities, metric=True, p=2, init=None,
max_iter=300, verbose=0, eps=1e-3):
"""
Computes multidimensional scaling using SMACOF algorithm
@@ -110,8 +110,8 @@ def smacof(similarities, metric=True, p=2, init=None,
Returns
-------
- X: ndarray (n, p)
- coordinates of the n points in a p-space
+ X, stress: ndarray (n, p), float
+ coordinates of the n points in a p-space
"""
n = similarities.shape[0]
@@ -175,7 +175,67 @@ def smacof(similarities, metric=True, p=2, init=None,
break
old_stress = stress
- return X
+ return X, stress
+
+
+def smacof(similarities, metric=True, p=2, init=None, n_init=8, n_jobs=1,
+ max_iter=300, verbose=0, eps=1e-3, random_state=None):
+ """
+ Computes multidimensional scaling using SMACOF algorithm
+
+ Parameters
+ ----------
+ similarities: symmetric ndarray, shape [n * n]
+ similarities between the points
+
+ metric: boolean, optional, default: True
+ compute metric or nonmetric SMACOF algorithm
+
+ p: int, optional, default: 2
+ number of dimension in which to immerse the similarities
+ overridden if initial array is provided.
+
+ init: {None or ndarray}
+ if None, randomly chooses the initial configuration
+ if ndarray, initialize the SMACOF algorithm with this array
+
+ n_init: int, optional, default: 8
+ Number of time the smacof algorithm will be run with different
+ initialisation. The final results will be the best output of the
+ n_init consecutive runs in terms of stress.
+
+ max_iter: int, optional, default: 300
+ Maximum number of iterations of the SMACOF algorithm for a single run
+
+ verbose: int, optional, default: 0
+ level of verbosity
+
+ eps: float, optional, default: 1e-6
+ relative tolerance w.r.t stress to declare converge
+
+ Returns
+ -------
+ X: ndarray (n, p)
+ coordinates of the n points in a p-space
+ """
+ best_pos, best_stress = None, None
+
+ if hasattr(init, '__array__'):
+ # TODO
+ init = np.asarray(init).copy()
+
+ if n_jobs == 1:
+ for it in range(n_init):
+ pos, stress = _smacof_single(similarities, metric=metric, p=p,
+ init=init, max_iter=max_iter,
+ verbose=verbose, eps=eps)
+ if best_stress is None or stress < best_stress:
+ best_stress = stress
+ best_pos = pos.copy()
+ else:
+ # TODD
+ seeds = random_state.randint(np.iinfo(np.int32).max, size=n_init)
+ return best_pos, best_stress
class MDS(BaseEstimator):
@@ -188,18 +248,26 @@ class MDS(BaseEstimator):
-----
"""
# TODO
- def __init__(self, p=2, metric=True, init=None, max_iter=300, eps=1e-3):
+ def __init__(self, p=2, metric=True, init=None, n_init=8,
+ max_iter=300, verbose=0, eps=1e-3, n_jobs=1):
self.p = p
+ self.metric = metric
self.init = init
+ self.n_init = n_init
self.max_iter = max_iter
self.eps = eps
+ self.verbose = verbose
+ self.n_jobs = n_jobs
def fit(self, X, y=None):
"""
"""
- self.X = smacof(X, metric=self.metric, p=self.p, init=self.init,
- max_iter=self.max_iter, verbose=self.verbose,
- eps=self.eps)
+ self.X, self.stress = smacof(X, metric=self.metric, p=self.p,
+ init=self.init,
+ n_init=self.n_init,
+ max_iter=self.max_iter,
+ verbose=self.verbose,
+ eps=self.eps)
return self
def predict(self, X):

0 comments on commit 6366a8e

Please sign in to comment.
Something went wrong with that request. Please try again.