# Beispiele für Clustering

In [None]:
import numpy as np

from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN
from sklearn.cluster import OPTICS
from sklearn.cluster import MeanShift

In [None]:
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt

random_state = 170
n_samples = 1000
X, y = make_blobs(n_samples=n_samples, centers=3, center_box=(-7.0, 7.0),
                  random_state=random_state)


def plots(X, y, Title):
    fig, axs = plt.subplots(2, 2)
    axs = axs.ravel()

    for i in range(4):
        axs[i].scatter(X[:, 0], X[:, 1], c=y[i])
        axs[i].set_title(Title + ' ' + str(i+1))
    fig.tight_layout()

In [None]:
titel = 'KMeans n_cluster ='
liste = [y,
         KMeans(n_clusters=2, random_state=random_state).fit_predict(X),
         KMeans(n_clusters=3, random_state=random_state).fit_predict(X),
         KMeans(n_clusters=4, random_state=random_state).fit_predict(X)]

kmean = plots(X, liste, titel)

In [None]:
titel = 'DBSCAN min_samples= '
liste = [y,
         DBSCAN(min_samples=2).fit(X).labels_,
         DBSCAN(min_samples=3).fit(X).labels_,
         DBSCAN(min_samples=4).fit(X).labels_]

dbsca = plots(X, liste, titel)

In [None]:
titel = 'SpectClust n_cluster ='
liste = [y,
         SpectralClustering(n_clusters=2).fit(X).labels_,
         SpectralClustering(n_clusters=3).fit(X).labels_,
         SpectralClustering(n_clusters=4).fit(X).labels_]

spect = plots(X, liste, titel)

In [None]:
titel = 'OPTICS min_samples ='
liste = [y,
         OPTICS(min_samples=21, xi=.05, min_cluster_size=.05).fit(X).labels_,
         OPTICS(min_samples=41, xi=.05, min_cluster_size=.05).fit(X).labels_,
         OPTICS(min_samples=61, xi=.05, min_cluster_size=.05).fit(X).labels_]

spect = plots(X, liste, titel)

In [None]:
titel = 'MeanShift bandwith'
liste = [y,
         MeanShift(bandwidth=0.1).fit(X).labels_,
         MeanShift(bandwidth=1).fit(X).labels_,
         MeanShift(bandwidth=10).fit(X).labels_]

spect = plots(X, liste, titel)

In [None]:
titel = 'AgglClust n_clusters ='
liste = [y,
         AgglomerativeClustering(n_clusters=2).fit(X).labels_,
         AgglomerativeClustering(n_clusters=3).fit(X).labels_,
         AgglomerativeClustering(n_clusters=4).fit(X).labels_]

spect = plots(X, liste, titel)

In [None]:
X, y = make_circles(n_samples=1500, factor=.3, noise=.05)

clustering = SpectralClustering(n_clusters=2, assign_labels="discretize",
                                random_state=0).fit(X)

In [None]:
random_state = 170
n_samples = 1000


fig, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5, 1, figsize=(3, 8))

ax1.scatter(X[:, 0], X[:, 1], c=y, s=20)
ax1.set(title='Original')
ax1.grid()

ax2.scatter(X[:, 0], X[:, 1],
            c=KMeans(n_clusters=2, random_state=random_state).fit_predict(X))
ax2.set(title='KMeans')
ax2.grid()

clust = SpectralClustering(n_clusters=2).fit(X)
ax3.scatter(X[:, 0], X[:, 1], c=clust.labels_)
ax3.set(title='SpectClust')
ax3.grid()

clust = OPTICS(min_samples=50, xi=.05, min_cluster_size=.05)
ax4.scatter(X[:, 0], X[:, 1], c=clust.fit_predict(X))
ax4.set(title='Optics')
ax4.grid()

# clust = OPTICS(min_samples=50, xi=.05, min_cluster_size=.05)
ax5.scatter(X[:, 0], X[:, 1], c=MeanShift(bandwidth=0.1).fit(X).labels_)
ax5.set(title='MeanShift')
ax5.grid()


fig.tight_layout()


Sum_of_squared_distances = []
K = range(1, 15)
for k in K:
    km = KMeans(n_clusters=k)
    km = km.fit(X)
    Sum_of_squared_distances.append(km.inertia_)

fig, ax = plt.subplots()
ax.plot(K, Sum_of_squared_distances, 'bx-')
ax.set_title('Optimales k (elbow method)')
ax.grid()
fig.show()

In [None]:
from yellowbrick.cluster import KElbowVisualizer

model = KMeans()
visualizer = KElbowVisualizer(
    model, k=(2,20), metric='calinski_harabasz', timings=True
)

visualizer.fit(X)        # Fit the data to the visualizer
visualizer.show()        # Finalize and render the figure