In [1]:
import numpy as np
import sklearn
from sklearn.cluster import KMeans
from sklearn.cluster import MiniBatchKMeans
from sklearn import metrics
from sklearn.datasets import make_blobs

In [6]:
centers = [[1, 1], [-1, -1], [1, -1]]
clusters = len(centers)

X, Y = make_blobs(n_samples=3000, centers=centers,
                  cluster_std=0.7, random_state=9)

In [3]:
# 创建K-means算法
k_means = KMeans(n_clusters=clusters, init='k-means++', random_state=9)
k_means.fit(X)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=3, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=9, tol=0.0001, verbose=0)

mbkmeans = MiniBatchKMeans(
    n_clusters=clusters, init='k-means++', batch_size=200, random_state=9)
mbkmeans.fit(X)

In [7]:
km_y = k_means.predict(X, Y)
mbk_y = mbkmeans.predict(X, Y)

In [10]:
# 效果评估
score_funcs = [
    metrics.adjusted_rand_score,    # ARI
    metrics.v_measure_score,     # 均一性和完整行的加权平均
    metrics.adjusted_mutual_info_score,    # AMI
    metrics.mutual_info_score     # 互信息
]

for score_func in score_funcs:
    km_scores = score_func(Y, km_y)
    print("K-Means算法:%s评估函数计算结果值:%.5f" % (score_func.__name__, km_scores))

    mbk_scores = score_func(Y, mbk_y)
    print("MiniBatchMeans算法:%s评估函数计算结果值:%.5f" %
          (score_func.__name__, mbk_scores))
    print('='*20 + '分割线' + '='*20)

K-Means算法:adjusted_rand_score评估函数计算结果值:0.70736
MiniBatchMeans算法:adjusted_rand_score评估函数计算结果值:0.70368
K-Means算法:v_measure_score评估函数计算结果值:0.63888
MiniBatchMeans算法:v_measure_score评估函数计算结果值:0.63675
K-Means算法:adjusted_mutual_info_score评估函数计算结果值:0.63853
MiniBatchMeans算法:adjusted_mutual_info_score评估函数计算结果值:0.63629
K-Means算法:mutual_info_score评估函数计算结果值:0.70174
MiniBatchMeans算法:mutual_info_score评估函数计算结果值:0.69928


