In [1]:
import numpy as np

Contingency Matrix

In [2]:
from sklearn.metrics.cluster import contingency_matrix

true_labels = [0, 0, 0, 1, 1, 1]
cluster_labels = [0, 0, 1, 1, 2, 2]

contingency_matrix(true_labels, cluster_labels)

array([[2, 1, 0],
       [0, 1, 2]], dtype=int64)

Rand Index

In [3]:
from sklearn.metrics import rand_score

true_labels = [0, 0, 0, 1, 1, 1]
cluster_labels = [0, 0, 1, 1, 2, 2]

print(f'RI = {rand_score(true_labels, cluster_labels):.3f}')

RI = 0.667


In [4]:
from sklearn.metrics import adjusted_rand_score

print(f'ARI = {adjusted_rand_score(true_labels, cluster_labels):.3f}')

ARI = 0.242


In [5]:
true_labels = [0, 0, 0, 1, 1, 1]
cluster_labels = [0, 1, 2, 0, 1, 2]

print(f'RI = {rand_score(true_labels, cluster_labels):.3f}')
print(f'ARI = {adjusted_rand_score(true_labels, cluster_labels):.3f}')

RI = 0.400
ARI = -0.364


In [6]:
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler

X, y = make_blobs(n_samples=500, centers=3, cluster_std=0.6, random_state=0)
X = StandardScaler().fit_transform(X)

In [7]:
from sklearn.cluster import KMeans

labels_k2 = KMeans(n_clusters=2, random_state=0).fit_predict(X)
labels_k3 = KMeans(n_clusters=3, random_state=0).fit_predict(X)
labels_k4 = KMeans(n_clusters=4, random_state=0).fit_predict(X)

In [8]:
print(f'ARI(2 clusters): {adjusted_rand_score(y, labels_k2):.3f}')
print(f'ARI(3 clusters): {adjusted_rand_score(y, labels_k3):.3f}')
print(f'ARI(4 clusters): {adjusted_rand_score(y, labels_k4):.3f}')

ARI(2 clusters): 0.565
ARI(3 clusters): 1.000
ARI(4 clusters): 0.864


Homogeneity, Completeness and V-measure

In [9]:
from sklearn.metrics import homogeneity_score, completeness_score, v_measure_score

true_labels = [0, 0, 0, 1, 1, 1]
cluster_labels = [0, 0, 1, 1, 2, 2]

print(f'Homogeneity = {homogeneity_score(true_labels, cluster_labels):.3f}')
print(f'Completeness = {completeness_score(true_labels, cluster_labels):.3f}')
print(f'V-measure = {v_measure_score(true_labels, cluster_labels):.3f}')

Homogeneity = 0.667
Completeness = 0.421
V-measure = 0.516


In [10]:
true_labels = [0, 0, 0, 1, 1, 1]
cluster_labels = [0, 1, 2, 0, 1, 2]

print(f'Homogeneity = {homogeneity_score(true_labels, cluster_labels):.3f}')
print(f'Completeness = {completeness_score(true_labels, cluster_labels):.3f}')
print(f'V-measure = {v_measure_score(true_labels, cluster_labels):.3f}')

Homogeneity = 0.000
Completeness = 0.000
V-measure = 0.000


In [11]:
print(f'V-measure(2 clusters): {v_measure_score(y, labels_k2):.3f}')
print(f'V-measure(3 clusters): {v_measure_score(y, labels_k3):.3f}')
print(f'V-measure(4 clusters): {v_measure_score(y, labels_k4):.3f}')

V-measure(2 clusters): 0.711
V-measure(3 clusters): 1.000
V-measure(4 clusters): 0.895


Fowlkes-Mallows Index

In [12]:
from sklearn.metrics import fowlkes_mallows_score

true_labels = [0, 0, 0, 1, 1, 1]
cluster_labels = [0, 0, 1, 1, 2, 2]

print(f'FMI = {fowlkes_mallows_score(true_labels, cluster_labels):.3f}')

FMI = 0.471


In [13]:
true_labels = [0, 0, 0, 1, 1, 1]
cluster_labels = [0, 1, 2, 0, 1, 2]

print(f'FMI = {fowlkes_mallows_score(true_labels, cluster_labels):.3f}')

FMI = 0.000


In [14]:
print(f'FMI(2 clusters): {v_measure_score(y, labels_k2):.3f}')
print(f'FMI(3 clusters): {v_measure_score(y, labels_k3):.3f}')
print(f'FMI(4 clusters): {v_measure_score(y, labels_k4):.3f}')

FMI(2 clusters): 0.711
FMI(3 clusters): 1.000
FMI(4 clusters): 0.895
