Skip to content

Commit

Permalink
ENH: KMeans tolerance parameter renamed tol (as in coordinate descent…
Browse files Browse the repository at this point in the history
…) and made public
  • Loading branch information
ogrisel committed Jan 22, 2011
1 parent 09c53ae commit 544e531
Showing 1 changed file with 14 additions and 10 deletions.
24 changes: 14 additions & 10 deletions scikits/learn/cluster/k_means_.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def k_init(X, k, n_samples_max=500, rng=None):
# K-means estimation by EM (expectation maximisation) # K-means estimation by EM (expectation maximisation)


def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0, def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0,
delta=1e-4, rng=None, copy_x=True): tol=1e-4, rng=None, copy_x=True):
""" K-means clustering algorithm. """ K-means clustering algorithm.
Parameters Parameters
Expand Down Expand Up @@ -118,7 +118,7 @@ def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0,
If an ndarray is passed, it should be of shape (k, p) and gives If an ndarray is passed, it should be of shape (k, p) and gives
the initial centers. the initial centers.
delta: float, optional tol: float, optional
The relative increment in the results before declaring convergence. The relative increment in the results before declaring convergence.
verbose: boolean, optional verbose: boolean, optional
Expand Down Expand Up @@ -189,8 +189,8 @@ def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0,
labels, inertia = _e_step(X, centers) labels, inertia = _e_step(X, centers)
centers = _m_step(X, labels, k) centers = _m_step(X, labels, k)
if verbose: if verbose:
print 'Iteration %i, intertia %s' % (i, inertia) print 'Iteration %i, inertia %s' % (i, inertia)
if np.sum((centers_old - centers) ** 2) < delta * vdata: if np.sum((centers_old - centers) ** 2) < tol * vdata:
if verbose: if verbose:
print 'Converged to similar centers at iteration', i print 'Converged to similar centers at iteration', i
break break
Expand Down Expand Up @@ -319,6 +319,9 @@ class KMeans(BaseEstimator):
'matrix': interpret the k parameter as a k by M (or length k 'matrix': interpret the k parameter as a k by M (or length k
array for one-dimensional data) array of initial centroids. array for one-dimensional data) array of initial centroids.
tol: float, optional default: 1e-4
Relative tolerance w.r.t. inertia to declare convergence
Methods Methods
------- -------
Expand Down Expand Up @@ -355,23 +358,24 @@ class KMeans(BaseEstimator):
it can be useful to restart it several times. it can be useful to restart it several times.
""" """


def __init__(self, k=8, init='random', n_init=10, max_iter=300, def __init__(self, k=8, init='random', n_init=10, max_iter=300, tol=1e-4,
verbose=0, rng=None, copy_x=True): verbose=0, rng=None, copy_x=True):
self.k = k self.k = k
self.init = init self.init = init
self.max_iter = max_iter self.max_iter = max_iter
self.tol = tol
self.n_init = n_init self.n_init = n_init
self.verbose = verbose self.verbose = verbose
self.rng = rng self.rng = rng
self.copy_x = copy_x self.copy_x = copy_x


def fit(self, X, **params): def fit(self, X, **params):
""" Compute k-means""" """Compute k-means"""
X = np.asanyarray(X) X = np.asanyarray(X)
self._set_params(**params) self._set_params(**params)
self.cluster_centers_, self.labels_, self.inertia_ = k_means(X, self.cluster_centers_, self.labels_, self.inertia_ = k_means(
k=self.k, init=self.init, n_init=self.n_init, X, k=self.k, init=self.init, n_init=self.n_init,
max_iter=self.max_iter, verbose=self.verbose, max_iter=self.max_iter, verbose=self.verbose,
rng=self.rng, copy_x=self.copy_x) tol=self.tol, rng=self.rng, copy_x=self.copy_x)
return self return self


0 comments on commit 544e531

Please sign in to comment.