Browse files

MDS: out_dim → n_components

  • Loading branch information...
1 parent 096e792 commit 5b53e8dd77e7411ed0ffd41503411d63c22cc440 @NelleV NelleV committed with GaelVaroquaux May 3, 2012
Showing with 53 additions and 26 deletions.
  1. +3 −3 examples/manifold/plot_mds.py
  2. +49 −22 sklearn/manifold/mds.py
  3. +1 −1 sklearn/manifold/tests/test_mds.py
View
6 examples/manifold/plot_mds.py
@@ -32,17 +32,17 @@
noise[np.arange(noise.shape[0]), np.arange(noise.shape[0])] = 0
similarities += noise
-mds = manifold.MDS(out_dim=2, max_iter=3000, n_jobs=2,
+mds = manifold.MDS(n_components=2, max_iter=3000, n_jobs=2,
eps=1e-9)
pos = mds.fit(similarities).positions_
-nmds = manifold.MDS(out_dim=2, metric=False,
+nmds = manifold.MDS(n_components=2, metric=False,
max_iter=3000, n_jobs=2,
eps=1e-9)
npos = mds.fit(similarities).positions_
# Rotate the data
-clf = PCA(n_components=3)
+clf = PCA(n_components=2)
X_true = clf.fit_transform(X_true)
pos = clf.fit_transform(pos)
View
71 sklearn/manifold/mds.py
@@ -70,7 +70,7 @@ def pool_adjacent_violators(distances, similarities):
return distances
-def _smacof_single(similarities, metric=True, out_dim=2, init=None,
+def _smacof_single(similarities, metric=True, n_components=2, init=None,
max_iter=300, verbose=0, eps=1e-3, random_state=None):
"""
Computes multidimensional scaling using SMACOF algorithm
@@ -83,9 +83,9 @@ def _smacof_single(similarities, metric=True, out_dim=2, init=None,
metric: boolean, optional, default: True
compute metric or nonmetric SMACOF algorithm
- out_dim: int, optional, default: 2
+ n_components: int, optional, default: 2
number of dimension in which to immerse the similarities
- overridden if initial array is provided.
+ overwritten if initial array is provided.
init: {None or ndarray}
if None, randomly chooses the initial configuration
@@ -107,8 +107,8 @@ def _smacof_single(similarities, metric=True, out_dim=2, init=None,
Returns
-------
- X: ndarray (n_samples, out_dim), float
- coordinates of the n_samples points in a out_dim-space
+ X: ndarray (n_samples, n_components), float
+ coordinates of the n_samples points in a n_components-space
stress_: float
The final value of the stress (sum of squared distance of the
@@ -129,14 +129,14 @@ def _smacof_single(similarities, metric=True, out_dim=2, init=None,
sim_flat_w = sim_flat[sim_flat != 0]
if init is None:
# Randomly choose initial configuration
- X = random_state.rand(n_samples * out_dim)
- X = X.reshape((n_samples, out_dim))
+ X = random_state.rand(n_samples * n_components)
+ X = X.reshape((n_samples, n_components))
else:
# overrides the parameter p
- out_dim = init.shape[1]
+ n_components = init.shape[1]
if n_samples != init.shape[0]:
raise ValueError("init matrix should be of shape (%d, %d)" % \
- (n_samples, out_dim))
+ (n_samples, n_components))
X = init
old_stress = None
@@ -182,11 +182,27 @@ def _smacof_single(similarities, metric=True, out_dim=2, init=None,
return X, stress
-def smacof(similarities, metric=True, out_dim=2, init=None, n_init=8, n_jobs=1,
- max_iter=300, verbose=0, eps=1e-3, random_state=None):
+def smacof(similarities, metric=True, n_components=2, init=None, n_init=8,
+ n_jobs=1, max_iter=300, verbose=0, eps=1e-3, random_state=None):
"""
Computes multidimensional scaling using SMACOF algorithm
+ The SMACOF algorithm is a multidimensional scaling algorithm: it minimizes
+ a objective function, the *stress*, using a majorization technique. The
+ Stress Majorization, also known as the Guttman Transform, guarantees a
+ monotone convergence of Stress, and is more powerful than traditional
+ technics such as gradient descent.
+
+ The SMACOF algorithm for metric MDS can summarized by the following steps:
+
+ 1. Set an initial start configuration, randomly or not.
+ 2. Compute the stress
+ 3. Compute the Guttman Transform
+ 4. Iterate 2 and 3 until convergence.
+
+ The nonmetric algorithm adds a monotonic regression steps before computing
+ the stress.
+
Parameters
----------
similarities: symmetric ndarray, shape (n_samples, n_samples)
@@ -195,11 +211,11 @@ def smacof(similarities, metric=True, out_dim=2, init=None, n_init=8, n_jobs=1,
metric: boolean, optional, default: True
compute metric or nonmetric SMACOF algorithm
- out_dim: int, optional, default: 2
+ n_components: int, optional, default: 2
number of dimension in which to immerse the similarities
overridden if initial array is provided.
- init: {None or ndarray of shape (n_samples, out_dim)}
+ init: {None or ndarray of shape (n_samples, n_components)}
if None, randomly chooses the initial configuration
if ndarray, initialize the SMACOF algorithm with this array
@@ -208,6 +224,17 @@ def smacof(similarities, metric=True, out_dim=2, init=None, n_init=8, n_jobs=1,
initialisation. The final results will be the best output of the
n_init consecutive runs in terms of stress.
+ n_jobs: int, optional, default: 1
+
+ The number of jobs to use for the computation. This works by breaking
+ down the pairwise matrix into n_jobs even slices and computing them in
+ parallel.
+
+ If -1 all CPUs are used. If 1 is given, no parallel computing code is
+ used at all, which is useful for debuging. For n_jobs below -1,
+ (n_cpus + 1 - n_jobs) are used. Thus for n_jobs = -2, all CPUs but one
+ are used.
+
max_iter: int, optional, default: 300
Maximum number of iterations of the SMACOF algorithm for a single run
@@ -224,8 +251,8 @@ def smacof(similarities, metric=True, out_dim=2, init=None, n_init=8, n_jobs=1,
Returns
-------
- X: ndarray (n_samples,out_dim)
- Coordinates of the n_samples points in a out_dim-space
+ X: ndarray (n_samples,n_components)
+ Coordinates of the n_samples points in a n_components-space
stress: float
The final value of the stress (sum of squared distance of the
@@ -248,7 +275,7 @@ def smacof(similarities, metric=True, out_dim=2, init=None, n_init=8, n_jobs=1,
if n_jobs == 1:
for it in range(n_init):
pos, stress = _smacof_single(similarities, metric=metric,
- out_dim=out_dim,
+ n_components=n_components,
init=init, max_iter=max_iter,
verbose=verbose, eps=eps,
random_state=random_state)
@@ -259,7 +286,7 @@ def smacof(similarities, metric=True, out_dim=2, init=None, n_init=8, n_jobs=1,
seeds = random_state.randint(np.iinfo(np.int32).max, size=n_init)
results = Parallel(n_jobs=n_jobs, verbose=max(verbose - 1, 0))(
delayed(_smacof_single)(
- similarities, metric=metric, out_dim=out_dim,
+ similarities, metric=metric, n_components=n_components,
init=init, max_iter=max_iter,
verbose=verbose, eps=eps,
random_state=seed)
@@ -280,7 +307,7 @@ class MDS(BaseEstimator):
metric: boolean, optional, default: True
compute metric or nonmetric SMACOF algorithm
- out_dim: int, optional, default: 2
+ n_components: int, optional, default: 2
number of dimension in which to immerse the similarities
overridden if initial array is provided.
@@ -300,7 +327,7 @@ class MDS(BaseEstimator):
Attributes
----------
- positions_: array-like, shape [out_dim, n_samples]
+ positions_: array-like, shape [n_components, n_samples]
Stores the position of the dataset in the embedding space
stress_: float
@@ -320,9 +347,9 @@ class MDS(BaseEstimator):
hypothesis" Kruskal, J. Psychometrika, 29, (1964)
"""
- def __init__(self, out_dim=2, metric=True, n_init=8,
+ def __init__(self, n_components=2, metric=True, n_init=8,
max_iter=300, verbose=0, eps=1e-3, n_jobs=1):
- self.out_dim = out_dim
+ self.n_components = n_components
self.metric = metric
self.n_init = n_init
self.max_iter = max_iter
@@ -344,7 +371,7 @@ def fit(self, X, init=None, y=None):
if ndarray, initialize the SMACOF algorithm with this array
"""
self.positions_, self.stress_ = smacof(X, metric=self.metric,
- out_dim=self.out_dim,
+ n_components=self.n_components,
init=init,
n_init=self.n_init,
max_iter=self.max_iter,
View
2 sklearn/manifold/tests/test_mds.py
@@ -26,7 +26,7 @@ def test_smacof():
[.451, .252],
[.016, -.238],
[-.200, .524]])
- X, _ = mds.smacof(sim, init=Z, out_dim=2, max_iter=1)
+ X, _ = mds.smacof(sim, init=Z, n_components=2, max_iter=1)
X_true = np.array([[-1.415, -2.471],
[1.633, 1.107],
[.249, -.067],

0 comments on commit 5b53e8d

Please sign in to comment.