In [4]:
import numpy as np
import matplotlib.pyplot as plt

In [5]:
from sklearn.datasets import make_blobs
X, y = make_blobs(n_samples=250, centers=5, n_features=2, random_state=0)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.show()

### KMeans

In [None]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=5, random_state=0)
kmeans.fit(X)

In [None]:
kmeans.labels_

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=kmeans.labels_)
centers = kmeans.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], marker = 'x', c='red')
plt.show()

In [None]:
for n_clusters in [2, 3, 4, 5, 6, 7]:
    kmeans = KMeans(n_clusters=n_clusters, random_state=0)
    kmeans.fit(X)
    plt.scatter(X[:, 0], X[:, 1], c=kmeans.labels_)
    centers = kmeans.cluster_centers_
    plt.scatter(centers[:, 0], centers[:, 1], marker = 'x', c='red')
    plt.title(f'Number of clusters: {n_clusters}')
    plt.show()

### 层次聚类

In [None]:
from sklearn.cluster import AgglomerativeClustering
agg = AgglomerativeClustering(n_clusters=5, linkage='ward')
agg.fit(X)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=agg.labels_)
plt.show()

In [None]:
from sklearn.cluster import AgglomerativeClustering
agg = AgglomerativeClustering(n_clusters=None, distance_threshold=10)
agg.fit(X)
plt.scatter(X[:, 0], X[:, 1], c=agg.labels_)
plt.show()

In [20]:
from scipy.cluster.hierarchy import dendrogram, linkage

def show_denrogram(model):
    counts = np.zeros(model.children_.shape[0])
    n_samples = len(model.labels_)

    for i, merge in enumerate(model.children_):
        current_count = 0
        for child_idx in merge:
            if child_idx < n_samples:
                current_count += 1
            else:
                current_count += counts[child_idx - n_samples]
        counts[i] = current_count

    linkage_matrix = np.column_stack(
        [model.children_, model.distances_, counts]).astype(float)

    dendrogram(linkage_matrix)
    plt.show()
    

In [None]:
show_denrogram(agg)