Permalink
Browse files

DOC correct / simplify dbscan examle

  • Loading branch information...
1 parent df74a8a commit 307afc84414fa60733d9c210f91c192dcaf9e042 @amueller amueller committed Apr 4, 2013
Showing with 11 additions and 16 deletions.
  1. +10 −15 examples/cluster/plot_dbscan.py
  2. +1 −1 sklearn/cluster/dbscan_.py
@@ -10,25 +10,24 @@
print(__doc__)
import numpy as np
-from scipy.spatial import distance
+
from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.datasets.samples_generator import make_blobs
+from sklearn.preprocessing import StandardScaler
##############################################################################
# Generate sample data
centers = [[1, 1], [-1, -1], [1, -1]]
-X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4)
+X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4,
+ random_state=0)
-##############################################################################
-# Compute similarities
-D = distance.squareform(distance.pdist(X))
-S = 1 - (D / np.max(D))
+X = StandardScaler().fit_transform(X)
##############################################################################
# Compute DBSCAN
-db = DBSCAN(eps=0.95, min_samples=10).fit(S)
+db = DBSCAN(eps=0.3, min_samples=10).fit(X)
core_samples = db.core_sample_indices_
labels = db.labels_
@@ -44,20 +43,16 @@
print("Adjusted Mutual Information: %0.3f"
% metrics.adjusted_mutual_info_score(labels_true, labels))
print("Silhouette Coefficient: %0.3f"
- % metrics.silhouette_score(D, labels, metric='precomputed'))
+ % metrics.silhouette_score(X, labels))
##############################################################################
# Plot result
import pylab as pl
-from itertools import cycle
-
-pl.close('all')
-pl.figure(1)
-pl.clf()
# Black removed and is used for noise instead.
-colors = cycle('bgrcmybgrcmybgrcmybgrcmy')
-for k, col in zip(set(labels), colors):
+unique_labels = set(labels)
+colors = pl.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
+for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = 'k'
@@ -165,7 +165,7 @@ def __init__(self, eps=0.5, min_samples=5, metric='euclidean',
self.random_state = random_state
def fit(self, X):
- """Perform DBSCAN clustering from vector array or distance matrix.
+ """Perform DBSCAN clustering from features or distance matrix.
Parameters
----------

0 comments on commit 307afc8

Please sign in to comment.