Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge branch 'master' into l1_logreg_minC

  • Loading branch information...
commit a426dbfbd381e83a1a3cd8f9a4dc100363091e02 2 parents 27206e0 + cd5acaf
@paolo-losi paolo-losi authored
View
51 doc/modules/clustering.rst
@@ -1,8 +1,8 @@
.. _clustering:
-===================================================
+==========
Clustering
-===================================================
+==========
`Clustering <http://en.wikipedia.org/wiki/Cluster_analysis>`__ of
unlabeled data can be performed with the module :mod:`scikits.learn.cluster`.
@@ -15,7 +15,7 @@ data can be found in the `labels_` attribute.
.. currentmodule:: scikits.learn.cluster
-.. topic:: Input data
+.. topic:: Input data
One important thing to note is that the algorithms implemented in
this module take different kinds of matrix as input. On one hand,
@@ -41,7 +41,6 @@ be specified. It scales well to large number of samples, however its
results may be dependent on an initialisation.
-
Affinity propagation
====================
@@ -84,7 +83,7 @@ of cluster. It will have difficulties scaling to thousands of samples.
Spectral clustering
-====================
+===================
:class:`SpectralClustering` does a low-dimension embedding of the
affinity matrix between samples, followed by a KMeans in the low
@@ -121,6 +120,24 @@ function of the gradient of the image.
* :ref:`example_cluster_plot_lena_segmentation.py`: Spectral clustering
to split the image of lena in regions.
+.. topic:: References:
+
+ * `"A Tutorial on Spectral Clustering"
+ <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323>`_
+ Ulrike von Luxburg, 2007
+
+ * `"Normalized cuts and image segmentation"
+ <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324>`_
+ Jianbo Shi, Jitendra Malik, 2000
+
+ * `"A Random Walks View of Spectral Segmentation"
+ <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.33.1501>`_
+ Marina Meila, Jianbo Shi, 2001
+
+ * `"On Spectral Clustering: Analysis and an algorithm"
+ <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8100>`_
+ Andrew Y. Ng, Michael I. Jordan, Yair Weiss, 2001
+
.. _hierarchical_clustering:
@@ -132,27 +149,27 @@ build nested clusters by merging them successively. This hierarchy of
clusters represented as a tree (or dendrogram). The root of the tree is
the unique cluster that gathers all the samples, the leaves being the
clusters with only one sample. See the `Wikipedia page
-<http://en.wikipedia.org/wiki/Hierarchical_clustering for more
-details>`_.
+<http://en.wikipedia.org/wiki/Hierarchical_clustering>`_ for more
+details.
-
-The :class:`Ward` object performs a hierarchical clustering based on Ward
-algorithm, that is a variance-minimizing approach. At each step, it
-minimizes the sum of squared differences within all clusters (inertia
-criterion).
+The :class:`Ward` object performs a hierarchical clustering based on
+the Ward algorithm, that is a variance-minimizing approach. At each
+step, it minimizes the sum of squared differences within all clusters
+(inertia criterion).
This algorithm can scale to large number of samples when it is used jointly
with an connectivity matrix, but can be computationally expensive when no
connectivity constraints are added between samples: it considers at each step
all the possible merges.
-Adding connectivity constraints
-----------------------------------
+
+Adding connectivity constraints
+-------------------------------
An interesting aspect of the :class:`Ward` object is that connectivity
constraints can be added to this algorithm (only adjacent clusters can be
merged together), through an connectivity matrix that defines for each
-sample the neighboring samples following a given structure of the data. For
+sample the neighboring samples following a given structure of the data. For
instance, in the swiss-roll example below, the connectivity constraints
forbid the merging of points that are not adjacent on the swiss roll, and
thus avoid forming clusters that extend across overlapping folds of the
@@ -184,10 +201,10 @@ enable only merging of neighboring pixels on an image, as in the
.. topic:: Examples:
- * :ref:`example_cluster_plot_lena_ward_segmentation.py`: Ward clustering
+ * :ref:`example_cluster_plot_lena_ward_segmentation.py`: Ward clustering
to split the image of lena in regions.
- * :ref:`example_cluster_plot_ward_structured_vs_unstructured.py`: Example of
+ * :ref:`example_cluster_plot_ward_structured_vs_unstructured.py`: Example of
Ward algorithm on a swiss-roll, comparison of structured approaches
versus unstructured approaches.
View
11 scikits/learn/cluster/k_means_.py
@@ -12,6 +12,7 @@
from ..base import BaseEstimator
from ..metrics.pairwise import euclidean_distances
+from ..utils import make_rng
###############################################################################
@@ -52,8 +53,7 @@ def k_init(X, k, n_local_trials=None, rng=None, x_squared_norms=None):
which is the implementation used in the aforementioned paper.
"""
n_samples, n_features = X.shape
- if rng is None:
- rng = np.random
+ rng = make_rng(rng)
centers = np.empty((k, n_features))
@@ -80,8 +80,8 @@ def k_init(X, k, n_local_trials=None, rng=None, x_squared_norms=None):
for c in xrange(1, k):
# Choose center candidates by sampling with probability proportional
# to the squared distance to the closest existing center
- rand_vals = rng.random(n_local_trials) * current_pot
- candidate_ids = np.searchsorted(closest_dist_sq.cumsum(), rand_vals)
+ rand_vals = rng.random_sample(n_local_trials) * current_pot
+ candidate_ids = np.searchsorted(closest_dist_sq.cumsum(), rand_vals)
# Compute distances to center candidates
distance_to_candidates = euclidean_distances(
@@ -181,8 +181,7 @@ def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0,
The final value of the inertia criterion
"""
- if rng is None:
- rng = np.random
+ rng = make_rng(rng)
n_samples = X.shape[0]
vdata = np.mean(np.var(X, 0))
View
305 scikits/learn/cluster/spectral.py
@@ -1,5 +1,4 @@
-""" Algorithms for spectral clustering.
-"""
+"""Algorithms for spectral clustering"""
# Author: Gael Varoquaux gael.varoquaux@normalesup.org
# License: BSD
@@ -9,34 +8,54 @@
from ..base import BaseEstimator
+from ..utils import make_rng
from ..utils.graph import graph_laplacian
from .k_means_ import k_means
-def spectral_embedding(adjacency, k=8, mode=None):
- """ Spectral embedding: project the sample on the k first
- eigen vectors of the normalized graph Laplacian.
+def spectral_embedding(adjacency, n_components=8, mode=None, rng=None):
+ """Project the sample on the first eigen vectors of the graph Laplacian
- Parameters
- -----------
- adjacency: array-like or sparse matrix, shape: (p, p)
- The adjacency matrix of the graph to embed.
- k: integer, optional
- The dimension of the projection subspace.
- mode: {None, 'arpack' or 'amg'}
- The eigenvalue decomposition strategy to use. AMG (Algebraic
- MultiGrid) is much faster, but requires pyamg to be
- installed.
-
- Returns
- --------
- embedding: array, shape: (p, k)
- The reduced samples
+ The adjacency matrix is used to compute a normalized graph Laplacian
+ whose spectrum (especially the eigen vectors associated to the
+ smallest eigen values) has an interpretation in terms of minimal
+ number of cuts necessary to split the graph into comparably sized
+ components.
- Notes
- ------
- The graph should contain only one connect component,
- elsewhere the results make little sens.
+ This embedding can also 'work' even if the ``adjacency`` variable is
+ not strictly the adjacency matrix of a graph but more generally
+ an affinity or similarity matrix between samples (for instance the
+ heat kernel of a euclidean distance matrix or a k-NN matrix).
+
+ However care must taken to always make the affinity matrix symmetric
+ so that the eigen vector decomposition works as expected.
+
+ Parameters
+ -----------
+ adjacency: array-like or sparse matrix, shape: (n_samples, n_samples)
+ The adjacency matrix of the graph to embed.
+
+ n_components: integer, optional
+ The dimension of the projection subspace.
+
+ mode: {None, 'arpack' or 'amg'}
+ The eigenvalue decomposition strategy to use. AMG (Algebraic
+ MultiGrid) is much faster, but requires pyamg to be
+ installed.
+
+ rng: int seed, RandomState instance, or None (default)
+ A pseudo random number generator used for the initialization of the
+ lobpcg eigen vectors decomposition when mode == 'amg'.
+
+ Returns
+ --------
+ embedding: array, shape: (n_samples, n_components)
+ The reduced samples
+
+ Notes
+ ------
+ The graph should contain only one connected component, elsewhere the
+ results make little sense.
"""
from scipy import sparse
@@ -48,6 +67,8 @@ def spectral_embedding(adjacency, k=8, mode=None):
except ImportError:
amg_loaded = False
+ rng = make_rng(rng)
+
n_nodes = adjacency.shape[0]
# XXX: Should we check that the matrices given is symmetric
if not amg_loaded:
@@ -58,11 +79,12 @@ def spectral_embedding(adjacency, k=8, mode=None):
normed=True, return_diag=True)
if (mode == 'arpack'
or not sparse.isspmatrix(laplacian)
- or n_nodes < 5*k # This is the threshold under which lobpcg has bugs
- ):
+ or n_nodes < 5 * n_components):
+ # lobpcg used with mode='amg' has bugs for low number of nodes
+
# We need to put the diagonal at zero
if not sparse.isspmatrix(laplacian):
- laplacian[::n_nodes+1] = 0
+ laplacian[::n_nodes + 1] = 0
else:
laplacian = laplacian.tocoo()
diag_idx = (laplacian.row == laplacian.col)
@@ -78,123 +100,186 @@ def spectral_embedding(adjacency, k=8, mode=None):
# csr has the fastest matvec and is thus best suited to
# arpack
laplacian = laplacian.tocsr()
- lambdas, diffusion_map = arpack_eigsh(-laplacian, k=k, which='LA')
- embedding = diffusion_map.T[::-1]*dd
+ lambdas, diffusion_map = arpack_eigsh(-laplacian, k=n_components,
+ which='LA')
+ embedding = diffusion_map.T[::-1] * dd
elif mode == 'amg':
# Use AMG to get a preconditionner and speed up the eigenvalue
# problem.
- laplacian = laplacian.astype(np.float) # lobpcg needs the native float
+ laplacian = laplacian.astype(np.float) # lobpcg needs native floats
ml = smoothed_aggregation_solver(laplacian.tocsr())
- X = np.random.rand(laplacian.shape[0], k)
+ X = rng.rand(laplacian.shape[0], n_components)
X[:, 0] = 1. / dd.ravel()
M = ml.aspreconditioner()
lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-12,
largest=False)
embedding = diffusion_map.T * dd
- if embedding.shape[0] == 1: raise ValueError
+ if embedding.shape[0] == 1:
+ raise ValueError
else:
- raise ValueError("Unknown value for mode: '%s'." % mode)
+ raise ValueError("Unknown value for mode: '%s'."
+ "Should be 'amg' or 'arpack'" % mode)
return embedding
-def spectral_clustering(adjacency, k=8, mode=None):
- """ Spectral clustering: apply k-means to a projection of the
- graph laplacian, finds normalized graph cuts.
+def spectral_clustering(affinity, k=8, n_components=None, mode=None,
+ rng=None):
+ """Apply k-means to a projection to the normalized laplacian
- Parameters
- -----------
- adjacency: array-like or sparse matrix, shape: (p, p)
- The adjacency matrix of the graph to embed.
- k: integer, optional
- The dimension of the projection subspace.
- mode: {None, 'arpack' or 'amg'}
- The eigenvalue decomposition strategy to use. AMG (Algebraic
- MultiGrid) is much faster, but requires pyamg to be
- installed.
-
- Returns
- --------
- labels: array of integers, shape: p
- The labels of the clusters.
- centers: array of integers, shape: k
- The indices of the cluster centers
+ In practice Spectral Clustering is very useful when the structure of
+ the individual clusters is highly non-convex or more generally when
+ a measure of the center and spread of the cluster is not a suitable
+ description of the complete cluster. For instance when clusters are
+ nested circles on the 2D plan.
- Notes
- ------
- The graph should contain only one connect component,
- elsewhere the results make little sens.
+ If affinity is the adjacency matrix of a graph, this method can be
+ used to find normalized graph cuts.
+
+ Parameters
+ -----------
+ affinity: array-like or sparse matrix, shape: (n_samples, n_samples)
+ The affinity matrix describing the relationship of the samples to
+ embed. **Must be symetric**.
+
+ Possible examples:
+ - adjacency matrix of a graph,
+ - heat kernel of the pairwise distance matrix of the samples,
+ - symmetic k-nearest neighbours connectivity matrix of the samples.
+
+ k: integer, optional
+ Number of clusters to extract.
+
+ n_components: integer, optional, default is k
+ Number of eigen vectors to use for the spectral embedding
+
+ mode: {None, 'arpack' or 'amg'}
+ The eigenvalue decomposition strategy to use. AMG (Algebraic
+ MultiGrid) is much faster, but requires pyamg to be
+ installed.
+
+ rng: int seed, RandomState instance, or None (default)
+ A pseudo random number generator used for the initialization
+ of the lobpcg eigen vectors decomposition when mode == 'amg'
+ and by the K-Means initialization.
+
+ Returns
+ -------
+ labels: array of integers, shape: n_samples
+ The labels of the clusters.
- This algorithm solves the normalized cut for k=2: it is a
- normalized spectral clustering.
+ centers: array of integers, shape: k
+ The indices of the cluster centers
+
+ References
+ ----------
+ - Normalized cuts and image segmentation, 2000
+ Jianbo Shi, Jitendra Malik
+ http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324
+
+ - A Tutorial on Spectral Clustering, 2007
+ Ulrike von Luxburg
+ http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323
+
+ Notes
+ ------
+ The graph should contain only one connect component, elsewhere
+ the results make little sense.
+
+ This algorithm solves the normalized cut for k=2: it is a
+ normalized spectral clustering.
"""
- maps = spectral_embedding(adjacency, k=k, mode=mode)
+ rng = make_rng(rng)
+ n_components = k if n_components is None else n_components
+ maps = spectral_embedding(affinity, n_components=n_components,
+ mode=mode, rng=rng)
maps = maps[1:]
- _, labels, _ = k_means(maps.T, k)
+ _, labels, _ = k_means(maps.T, k, rng=rng)
return labels
-################################################################################
class SpectralClustering(BaseEstimator):
- """ Spectral clustering: apply k-means to a projection of the
- graph laplacian, finds normalized graph cuts.
+ """Apply k-means to a projection to the normalized laplacian
- Parameters
- -----------
- k: integer, optional
- The dimension of the projection subspace.
- mode: {None, 'arpack' or 'amg'}
- The eigenvalue decomposition strategy to use. AMG (Algebraic
- MultiGrid) is much faster, but requires pyamg to be
- installed.
+ In practice Spectral Clustering is very useful when the structure of
+ the individual clusters is highly non-convex or more generally when
+ a measure of the center and spread of the cluster is not a suitable
+ description of the complete cluster. For instance when clusters are
+ nested circles on the 2D plan.
- Methods
- -------
+ If affinity is the adjacency matrix of a graph, this method can be
+ used to find normalized graph cuts.
- fit(X):
- Compute spectral clustering
+ Parameters
+ -----------
+ k: integer, optional
+ The dimension of the projection subspace.
- Attributes
- ----------
+ mode: {None, 'arpack' or 'amg'}
+ The eigenvalue decomposition strategy to use. AMG (Algebraic
+ MultiGrid) is much faster, but requires pyamg to be installed.
- labels_:
- Labels of each point
+ rng: int seed, RandomState instance, or None (default)
+ A pseudo random number generator used for the initialization
+ of the lobpcg eigen vectors decomposition when mode == 'amg'
+ and by the K-Means initialization.
- """
+ Methods
+ -------
+
+ fit(X):
+ Compute spectral clustering
+
+ Attributes
+ ----------
+ labels_:
+ Labels of each point
+
+ References
+ ----------
+ - Normalized cuts and image segmentation, 2000
+ Jianbo Shi, Jitendra Malik
+ http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324
+
+ - A Tutorial on Spectral Clustering, 2007
+ Ulrike von Luxburg
+ http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323
+ """
- def __init__(self, k=8, mode=None):
+ def __init__(self, k=8, mode=None, rng=None):
self.k = k
self.mode = mode
-
+ self.rng = make_rng(rng)
def fit(self, X, **params):
- """ Compute the spectral clustering from the adjacency matrix of
- the graph.
-
- Parameters
- -----------
- X: array-like or sparse matrix, shape: (p, p)
- The adjacency matrix of the graph to embed.
- X is an adjacency matrix of a similarity graph: its
- entries must be positive or zero. Zero means that
- elements have nothing in common, whereas high values mean
- that elements are strongly similar.
-
- Notes
- ------
- If you have an affinity matrix, such as a distance matrix,
- for which 0 means identical elements, and high values means
- very dissimilar elements, it can be transformed in a
- similarity matrix that is well suited for the algorithm by
- applying the gaussian (heat) kernel::
-
- np.exp(- X**2/2. * delta**2)
-
- If the pyamg package is installed, it is used. This
- greatly speeds up computation.
+ """Compute the spectral clustering from the affinity matrix
+
+ Parameters
+ -----------
+ X: array-like or sparse matrix, shape: (n_samples, n_samples)
+ An affinity matrix describing the pairwise similarity of the
+ data. If can also be an adjacency matrix of the graph to embed.
+ X must be symmetric and its entries must be positive or
+ zero. Zero means that elements have nothing in common,
+ whereas high values mean that elements are strongly similar.
+
+ Notes
+ ------
+ If you have an affinity matrix, such as a distance matrix,
+ for which 0 means identical elements, and high values means
+ very dissimilar elements, it can be transformed in a
+ similarity matrix that is well suited for the algorithm by
+ applying the gaussian (heat) kernel::
+
+ np.exp(- X ** 2 / (2. * delta ** 2))
+
+ Another alternative is to take a symmetric version of the k
+ nearest neighbors connectivity matrix of the points.
+
+ If the pyamg package is installed, it is used: this greatly
+ speeds up computation.
"""
self._set_params(**params)
- self.labels_ = spectral_clustering(X,
- k=self.k, mode=self.mode)
+ self.labels_ = spectral_clustering(X, k=self.k, mode=self.mode,
+ rng=self.rng)
return self
-
View
19 scikits/learn/cluster/tests/test_spectral.py
@@ -1,12 +1,11 @@
-"""
-Testing for Clustering methods
-
-"""
+"""Testing for Spectral Clustering methods"""
import numpy as np
from numpy.testing import assert_equal
from scipy import sparse
import nose
+from cPickle import loads
+from cPickle import dumps
from .. import SpectralClustering
@@ -22,12 +21,19 @@ def test_spectral_clustering():
])
for mat in (S, sparse.csr_matrix(S)):
- labels = SpectralClustering().fit(mat, k=2).labels_
+ model = SpectralClustering(rng=0).fit(mat, k=2)
+ labels = model.labels_
if labels[0] == 0:
labels = 1 - labels
assert_equal(labels, [1, 1, 1, 0, 0, 0, 0])
+ model_copy = loads(dumps(model))
+ assert_equal(model_copy.k, model.k)
+ assert_equal(model_copy.mode, model.mode)
+ assert_equal(model_copy.rng.get_state(), model.rng.get_state())
+ assert_equal(model_copy.labels_, model.labels_)
+
def test_spectral_clustering_sparse():
# We need a large matrice, or the lobpcg solver will fallback to its
@@ -47,9 +53,8 @@ def test_spectral_clustering_sparse():
S = sparse.coo_matrix(S)
- labels = SpectralClustering().fit(S, k=2).labels_
+ labels = SpectralClustering(rng=0).fit(S, k=2).labels_
if labels[0] == 0:
labels = 1 - labels
assert np.mean(labels == [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) > .9
-
View
16 scikits/learn/utils/__init__.py
@@ -9,3 +9,19 @@ def safe_asanyarray(X, dtype=None, order=None):
else:
return np.asanyarray(X, dtype, order)
+def make_rng(seed):
+ """Turn seed into a np.random.RandomState instance
+
+ If seed is None, return the np.random singleton.
+ If seed is an int, return a new RandomState instance seeded with seed.
+ If seed is already a RandomState instance, return it.
+ Otherwise raise ValueError.
+ """
+ if seed is None or seed is np.random:
+ return np.random
+ if isinstance(seed, int):
+ return np.random.RandomState(seed)
+ if isinstance(seed, np.random.RandomState):
+ return seed
+ raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
+ ' instance' % seed)
View
7 scikits/learn/utils/extmath.py
@@ -7,6 +7,7 @@
import sys
import math
+from . import make_rng
import numpy as np
#XXX: We should have a function with numpy's slogdet API
@@ -154,11 +155,7 @@ def fast_svd(M, k, p=None, q=0, transpose='auto', rng=0):
if p == None:
p = k
- if rng is None:
- rng = np.random.RandomState()
- elif isinstance(rng, int):
- rng = np.random.RandomState(rng)
-
+ rng = make_rng(rng)
n_samples, n_features = M.shape
if transpose == 'auto' and n_samples > n_features:
View
33 scikits/learn/utils/fixes.py
@@ -1,6 +1,4 @@
-"""
-Fixes for older version of numpy and scipy.
-"""
+"""Compatibility fixes for older version of numpy and scipy"""
# Authors: Emmanuelle Gouillart <emmanuelle.gouillart@normalesup.org>
# Gael Varoquaux <gael.varoquaux@normalesup.org>
# Fabian Pedregosa <fpedregosa@acm.org>
@@ -8,8 +6,9 @@
import numpy as np
+
def _unique(ar, return_index=False, return_inverse=False):
- """ A replacement for the np.unique that appeared in numpy 1.4.
+ """A replacement for the np.unique that appeared in numpy 1.4.
While np.unique existed long before, keyword return_inverse was
only added in 1.4.
@@ -57,10 +56,8 @@ def _unique(ar, return_index=False, return_inverse=False):
unique = np.unique
-def _copysign (x1, x2):
- """
- (slow) Replacement for np.copysign, which was introduced in numpy 1.4
- """
+def _copysign(x1, x2):
+ """Slow replacement for np.copysign, which was introduced in numpy 1.4"""
return np.abs(x1) * np.sign(x2)
if not hasattr(np, 'copysign'):
@@ -70,20 +67,19 @@ def _copysign (x1, x2):
def _in1d(ar1, ar2, assume_unique=False):
- """ Replacement for in1d that is provided for numpy >= 1.4
- """
+ """Replacement for in1d that is provided for numpy >= 1.4"""
if not assume_unique:
ar1, rev_idx = unique(ar1, return_inverse=True)
ar2 = np.unique(ar2)
- ar = np.concatenate( (ar1, ar2) )
+ ar = np.concatenate((ar1, ar2))
# We need this to be a stable sort, so always use 'mergesort'
# here. The values from the first array should always come before
# the values from the second array.
order = ar.argsort(kind='mergesort')
sar = ar[order]
equal_adj = (sar[1:] == sar[:-1])
- flag = np.concatenate( (equal_adj, [False] ) )
- indx = order.argsort(kind='mergesort')[:len( ar1 )]
+ flag = np.concatenate((equal_adj, [False]))
+ indx = order.argsort(kind='mergesort')[:len(ar1)]
if assume_unique:
return flag[indx]
@@ -97,7 +93,8 @@ def _in1d(ar1, ar2, assume_unique=False):
def qr_economic(A, **kwargs):
- """
+ """Compat function for the QR-decomposition in economic mode
+
Scipy 0.9 changed the keyword econ=True to mode='economic'
"""
import scipy.linalg
@@ -109,7 +106,8 @@ def qr_economic(A, **kwargs):
def arpack_eigsh(A, **kwargs):
- """
+ """Compat function for sparse symmetric eigen vectors decomposition
+
Scipy 0.9 renamed eigen_symmetric to eigsh in
scipy.sparse.linalg.eigen.arpack
"""
@@ -118,8 +116,3 @@ def arpack_eigsh(A, **kwargs):
return arpack.eigsh(A, **kwargs)
else:
return arpack.eigen_symmetric(A, **kwargs)
-
-
-
-
-
View
21 scikits/learn/utils/tests/test_utils.py
@@ -0,0 +1,21 @@
+import numpy as np
+
+from scikits.learn.utils import make_rng
+from nose.tools import assert_raises
+
+
+def test_make_rng():
+ """Check the make_rng utility function behavior"""
+ assert make_rng(None) is np.random
+ assert make_rng(np.random) is np.random
+
+ rng_42 = np.random.RandomState(42)
+ assert make_rng(42).randint(100) == rng_42.randint(100)
+
+ rng_42 = np.random.RandomState(42)
+ assert make_rng(rng_42) is rng_42
+
+ rng_42 = np.random.RandomState(42)
+ assert make_rng(43).randint(100) != rng_42.randint(100)
+
+ assert_raises(ValueError, make_rng, "some invalid seed")
Please sign in to comment.
Something went wrong with that request. Please try again.