# 楼 + 机器学习实战

# 挑战：常用聚类算法对比评估

---

** 以下内容仅保留挑战代码部分，挑战全文请到原课程查看。**

---

**挑战：** 使用 sklearn 生成 3 组测试数据，并绘制出散点图（横排拼接子图）。

In [None]:
import numpy as np
from sklearn import datasets
from matplotlib import pyplot as plt
%matplotlib inline

### 代码开始 ### ((≈ 5-7 行代码))
moons, _ = datasets.make_moons(n_samples=100, noise=.1, random_state=10)
circles, _ = datasets.make_circles(n_samples=100, noise=.1, factor=.3, random_state=10)
blobs, _ = datasets.make_blobs(n_samples=100, n_features=2, centers=2, cluster_std=1.5, random_state=10)

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(16, 4))
axes[0].scatter(moons[:, 0],moons[:, 1])
axes[1].scatter(circles[:, 0],circles[:, 1])
axes[2].scatter(blobs[:, 0],blobs[:, 1])
### 代码结束 ###

---

**挑战：** 采用上述 8 个聚类方法分别在 moons, circles 和 blobs 上进行测试，并将聚类结果绘制成图。

In [None]:
from sklearn import cluster

### 代码开始 ### (> 10 行代码)
cluster_names = ['KMeans', 'MiniBatchKMeans', 'AffinityPropagation', 
                 'MeanShift', 'SpectralClustering', 'AgglomerativeClustering', 'Birch', 'DBSCAN']

cluster_estimators = [
    cluster.KMeans(n_clusters=2),
    cluster.MiniBatchKMeans(n_clusters=2),
    cluster.AffinityPropagation(),
    cluster.MeanShift(),
    cluster.SpectralClustering(n_clusters=2),
    cluster.AgglomerativeClustering(n_clusters=2),
    cluster.Birch(n_clusters=2),
    cluster.DBSCAN(eps=0.2, min_samples=5)
]

for algorithm_name, algorithm in zip(cluster_names, cluster_estimators):
    
    moons_clusters = algorithm.fit_predict(moons)
    circles_clusters = algorithm.fit_predict(circles)
    blobs_clusters = algorithm.fit_predict(blobs)
    
    fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(16, 4))
    axes[0].scatter(moons[:, 0],moons[:, 1], c=moons_clusters)
    axes[1].scatter(circles[:, 0],circles[:, 1], c=circles_clusters)
    axes[2].scatter(blobs[:, 0],blobs[:, 1], c=blobs_clusters)
    axes[0].set_ylabel('{}'.format(algorithm_name))
### 代码结束 ###

---

**挑战：** 使用 `create_data(n)` 生成不同规模的测试数据，并计算不同算法的聚类时间，且绘制对应的折线图。

In [None]:
import time

### 代码开始 ### (> 10 行代码)
cluster_names = ['KMeans', 'MiniBatchKMeans', 'AffinityPropagation', 
                 'MeanShift', 'SpectralClustering', 'AgglomerativeClustering', 'Birch', 'DBSCAN']

cluster_estimators = [
    cluster.KMeans(n_clusters=2),
    cluster.MiniBatchKMeans(n_clusters=2),
    cluster.AffinityPropagation(),
    cluster.MeanShift(),
    cluster.SpectralClustering(n_clusters=2),
    cluster.AgglomerativeClustering(n_clusters=2),
    cluster.Birch(n_clusters=2),
    cluster.DBSCAN()
]

cluster_t_list = []
for algorithm_name, algorithm in zip(cluster_names, cluster_estimators):
    t_list = []
    for num in [i for i in range(100, 1000, 100)]:
        data = create_data(num)
        t0 = time.time()
        moons_clusters = algorithm.fit(data)
        t1 = time.time()
        t_list.append(t1 - t0)
    print("{} fitted & average time:{:4f}".format(algorithm_name, np.mean(t_list)))
    cluster_t_list.append(t_list)

plt.figure(figsize=(12, 6))
for cluster_t, cluster_name in zip(cluster_t_list, cluster_names):
    plt.plot(cluster_t, marker='.', label=cluster_name)
    plt.legend()
### 代码结束 ###

---

<div style="color: #999;font-size: 12px;font-style: italic;">*本课程内容，由作者授权实验楼发布，未经允许，禁止转载、下载及非法传播。</div>