Permalink
Browse files

Merge branch 'master' of github.com:scikit-learn/scikit-learn

  • Loading branch information...
2 parents b56dcde + a91efb2 commit c1600741bab3c0dd24af33c9cbe7aecbed5c8a6f @duchesnay duchesnay committed Jan 21, 2011
View
@@ -53,7 +53,7 @@
# General information about the project.
project = u'scikits.learn'
-copyright = u'2010, scikits.learn developers (BSD Lincense)'
+copyright = u'2010, scikits.learn developers (BSD License)'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
View
@@ -118,6 +118,13 @@ by typing the following command::
sudo port install py26-scikits-learn
+NetBSD
+------
+
+scikits.learn is available via `pkgsrc-wip <http://pkgsrc-wip.sourceforge.net/>`_:
+
+ http://pkgsrc.se/wip/py-scikits_learn
+
.. _install_bleeding_edge:
Bleeding Edge
@@ -56,7 +56,7 @@ data as only one sample is removed from the learning set.
>>> loo = LeaveOneOut(len(Y))
>>> print loo
scikits.learn.cross_val.LeaveOneOut(n=4)
- >>> for train, test in loo: print train,test
+ >>> for train, test in loo: print train, test
[False True True True] [ True False False False]
[ True False True True] [False True False False]
[ True True False True] [False False True False]
@@ -69,10 +69,22 @@ Thus, one can create the training/test sets using:
>>> X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test]
+If X or Y are `scipy.sparse` matrices, train and test need to be
+integer indices. It can be obtained by setting the parameter indices to True
+when creating the cross-validation procedure.
-
-
-
+ >>> import numpy as np
+ >>> from scikits.learn.cross_val import LeaveOneOut
+ >>> X = np.array([[0., 0.], [1., 1.], [-1., -1.], [2., 2.]])
+ >>> Y = np.array([0, 1, 0, 1])
+ >>> loo = LeaveOneOut(len(Y), indices=True)
+ >>> print loo
+ scikits.learn.cross_val.LeaveOneOut(n=4)
+ >>> for train, test in loo: print train, test
+ [1 2 3] [0]
+ [0 2 3] [1]
+ [0 1 3] [2]
+ [0 1 2] [3]
Leave-P-Out - LPO
@@ -36,7 +36,10 @@ def test(self, label='fast', verbose=1, extra_argv=['--exe'],
__all__ = ['cross_val', 'ball_tree', 'cluster', 'covariance', 'datasets',
- 'gmm', 'linear_model', 'logistic', 'lda', 'metrics', 'svm',
- 'features', 'clone', 'metrics', 'test', 'gaussian_process']
+ 'fastica', 'feature_extraction', 'feature_selection',
+ 'gaussian_process', 'grid_search', 'hmm', 'lda', 'linear_model',
+ 'metrics', 'mixture', 'naive_bayes', 'neighbors',
+ 'pca', 'pipeline', 'preprocessing', 'qda', 'svm', 'test',
+ 'clone']
__version__ = '0.7.git'
@@ -51,7 +51,9 @@ def affinity_propagation(S, p=None, convit=30, max_iter=200, damping=0.5,
"""
if copy:
# Copy the affinity matrix to avoid modifying it inplace
- S = S.copy()
+ S = np.array(S, copy=True, dtype=np.float)
+ else:
+ S = np.asanyarray(S, dtype=np.float)
n_points = S.shape[0]
@@ -72,9 +74,8 @@ def affinity_propagation(S, p=None, convit=30, max_iter=200, damping=0.5,
R = np.zeros((n_points, n_points)) # Initialize messages
# Remove degeneracies
- S += ( np.finfo(np.double).eps*S
- + np.finfo(np.double).tiny*100
- )*random_state.randn(n_points, n_points)
+ S += (np.finfo(np.double).eps * S + np.finfo(np.double).tiny * 100) * \
+ random_state.randn(n_points, n_points)
# Execute parallel affinity propagation updates
e = np.zeros((n_points, convit))
@@ -118,7 +119,7 @@ def affinity_propagation(S, p=None, convit=30, max_iter=200, damping=0.5,
K = np.sum(E, axis=0)
if it >= convit:
- se = np.sum(e, axis=1);
+ se = np.sum(e, axis=1)
unconverged = np.sum((se == convit) + (se == 0)) != n_points
if (not unconverged and (K>0)) or (it==max_iter):
if verbose:
@@ -137,7 +138,7 @@ def affinity_propagation(S, p=None, convit=30, max_iter=200, damping=0.5,
# Refine the final set of exemplars and clusters and return results
for k in range(K):
ii = np.where(c==k)[0]
- j = np.argmax(np.sum(S[ii, ii], axis=0))
+ j = np.argmax(np.sum(S[ii[:,np.newaxis], ii], axis=0))
I[k] = ii[j]
c = np.argmax(S[:, I], axis=1)
@@ -153,7 +154,8 @@ def affinity_propagation(S, p=None, convit=30, max_iter=200, damping=0.5,
return cluster_centers_indices, labels
-################################################################################
+###############################################################################
+
class AffinityPropagation(BaseEstimator):
"""Perform Affinity Propagation Clustering of data
@@ -225,8 +227,8 @@ def fit(self, S, p=None, **params):
"""
self._set_params(**params)
- self.cluster_centers_indices_, self.labels_ = affinity_propagation(S, p,
- max_iter=self.max_iter, convit=self.convit, damping=self.damping,
+ self.cluster_centers_indices_, self.labels_ = affinity_propagation(S,
+ p, max_iter=self.max_iter, convit=self.convit,
+ damping=self.damping,
copy=self.copy)
return self
-
@@ -175,9 +175,9 @@ def fit(self, X, **params):
-----------
X: array-like or sparse matrix, shape: (p, p)
The adjacency matrix of the graph to embed.
- X is an adjacency matrix of a simimlarity graph: its
+ X is an adjacency matrix of a similarity graph: its
entries must be positive or zero. Zero means that
- elements have nothing in comon, wereas high values mean
+ elements have nothing in conmon, whereas high values mean
that elements are strongly similar.
Notes
@@ -4,12 +4,13 @@
"""
import numpy as np
-from numpy.testing import assert_equal
+from numpy.testing import assert_equal, assert_array_equal
from ..affinity_propagation_ import AffinityPropagation, \
affinity_propagation
from .common import generate_clustered_data
+
n_clusters = 3
X = generate_clustered_data(n_clusters=n_clusters)
@@ -21,7 +22,7 @@ def test_affinity_propagation():
"""
# Compute similarities
X_norms = np.sum(X*X, axis=1)
- S = - X_norms[:,np.newaxis] - X_norms[np.newaxis,:] + 2 * np.dot(X, X.T)
+ S = - X_norms[:, np.newaxis] - X_norms[np.newaxis, :] + 2 * np.dot(X, X.T)
p = 10*np.median(S)
# Compute Affinity Propagation
@@ -37,5 +38,8 @@ def test_affinity_propagation():
n_clusters_ = len(cluster_centers_indices)
assert_equal(np.unique(labels).size, n_clusters_)
-
assert_equal(n_clusters, n_clusters_)
+
+ # Test also with no copy
+ _, labels_no_copy = affinity_propagation(S, p, copy=False)
+ assert_array_equal(labels, labels_no_copy)
Oops, something went wrong.

0 comments on commit c160074

Please sign in to comment.