Skip to content

Commit

Permalink
more work on PIC
Browse files Browse the repository at this point in the history
  • Loading branch information
ogrisel committed Apr 23, 2011
1 parent 20a71cb commit 9a84598
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 9 deletions.
12 changes: 12 additions & 0 deletions examples/cluster/plot_clustering_toy_2D_circles.py
Expand Up @@ -13,6 +13,7 @@
import pylab as pl

from scikits.learn.cluster import spectral_clustering
from scikits.learn.cluster import power_iteration_clustering
from scikits.learn.metrics.pairwise import euclidean_distances
from scikits.learn.neighbors import kneighbors_graph

Expand Down Expand Up @@ -64,4 +65,15 @@
X_l = X[labels == l, :]
pl.scatter(X_l[:, 0], X_l[:, 1], color=c)
pl.title("Data clustered by spectral clustering")


labels, inertia, vectors = power_iteration_clustering(
affinity, k=3, n_vectors=2, verbose=True)
print "Power Iteration Clustering inertia: %f" % inertia

pl.figure()
for l, c in zip(np.unique(labels), 'rgbcmyk'):
X_l = X[labels == l, :]
pl.scatter(X_l[:, 0], X_l[:, 1], color=c)
pl.title("Data clustered by Power Iteration Clustering")
pl.show()
25 changes: 16 additions & 9 deletions scikits/learn/cluster/power_iteration.py
Expand Up @@ -8,7 +8,7 @@

import numpy as np

from .k_means_ import k_means_
from .k_means_ import k_means
from ..utils.extmath import safe_sparse_dot


Expand Down Expand Up @@ -60,8 +60,10 @@ def power_iteration_clustering(affinity, k=8, n_vectors=1, tol=1e-5,
# row normalize the affinity matrix
sums = affinity.sum(axis=1)
volume = sums.sum()

scales = sums.copy()
nnzeros = np.where(scales > 0)
scales[nnzeros] = 1 / sums[nnzeros]
scales[nnzeros] = 1 / scales[nnzeros]

if hasattr(affinity, 'tocsr'):
# inplace row normalization for sparse matrices
Expand All @@ -84,7 +86,7 @@ def power_iteration_clustering(affinity, k=8, n_vectors=1, tol=1e-5,
normalized.data, scales)
else:
# inplace row normalization for ndarray
normalized = affinity / scales.reshape((s.shape[0], -1))
normalized = affinity / scales[:, np.newaxis]

n_samples = affinity.shape[0]

Expand All @@ -103,17 +105,22 @@ def power_iteration_clustering(affinity, k=8, n_vectors=1, tol=1e-5,
previous_vectors[:] = vectors
previous_delta = delta

vectors[:] = safe_sparse_dot(normalized, vectors.T)
vectors /= np.abs(vectors).sum()
vectors[:] = safe_sparse_dot(normalized, vectors.T).T
vectors /= np.abs(vectors).sum(axis=1)[:, np.newaxis]

delta = np.abs(previous_vectors - vectors).mean()

if verbose and i % 50 == 0:
print "Iteration %04d/%04d: delta=%f" % (i + 1, max_iter, delta)
if verbose and i % 10 == 0:
print "Power Iteration %04d/%04d: delta=%f" % (
i + 1, max_iter, delta)

if np.abs(previous_delta - delta) < tol:
break
if verbose:
print "Converged at iteration: %04d/%04d with delta=%f" % (
i + 1, max_iter, delta)

_, labels, inertia = k_means(vectors, k, rng=rng, verbose=verbose)
return labels, inertia
# TODO: pass the rng correctly
_, labels, inertia = k_means(vectors.T, k, verbose=verbose)
return labels, inertia, vectors

0 comments on commit 9a84598

Please sign in to comment.