In [1]:
from sklearn.datasets import load_iris

In [7]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.cluster import KMeans
from sklearn.cluster import DBSCAN
from sklearn.cluster import AgglomerativeClustering
from sklearn import metrics

In [21]:
def evaluation(X, y_true, y_label, n_clusters):
    print("Estimated number of clusters: %d" % n_clusters)
    print("Homogeneity: %0.3f" % metrics.homogeneity_score(y_true, y_label))
    print("Completeness: %0.3f" % metrics.completeness_score(y_true, y_label))
    print("V-measure: %0.3f" % metrics.v_measure_score(y_true, y_label))
    print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(y_true, y_label))
    print(
        "Adjusted Mutual Information: %0.3f"
        % metrics.adjusted_mutual_info_score(y_true, y_label))
    print(    "Silhouette Coefficient: %0.3f \n"
        % metrics.silhouette_score(X, y_label, metric="sqeuclidean"))


In [3]:
# Create a folder to fetch the dataset
iris = load_iris()

In [19]:
X = iris.data
y = iris.target

In [22]:
print("KMeans model: \n")

kmeans2 = KMeans(n_clusters=2).fit(X)
y_labels = kmeans2.labels_
evaluation(X, y, y_labels, 2)

kmeans3 = KMeans(n_clusters=3).fit(X)
y_labels = kmeans3.labels_
evaluation(X, y, y_labels, 3)

kmeans4 = KMeans(n_clusters=4).fit(X)
y_labels = kmeans4.labels_
evaluation(X, y, y_labels, 4)

kmeans5 = KMeans(n_clusters=5).fit(X)
y_labels = kmeans5.labels_
evaluation(X, y, y_labels, 5)

KMeans model: 

Estimated number of clusters: 2
Homogeneity: 0.522
Completeness: 0.884
V-measure: 0.657
Adjusted Rand Index: 0.540
Adjusted Mutual Information: 0.654
Silhouette Coefficient: 0.850 

Estimated number of clusters: 3
Homogeneity: 0.751
Completeness: 0.765
V-measure: 0.758
Adjusted Rand Index: 0.730
Adjusted Mutual Information: 0.755
Silhouette Coefficient: 0.736 

Estimated number of clusters: 4
Homogeneity: 0.808
Completeness: 0.652
V-measure: 0.722
Adjusted Rand Index: 0.650
Adjusted Mutual Information: 0.717
Silhouette Coefficient: 0.671 

Estimated number of clusters: 5
Homogeneity: 0.824
Completeness: 0.599
V-measure: 0.694
Adjusted Rand Index: 0.608
Adjusted Mutual Information: 0.687
Silhouette Coefficient: 0.670 



In [26]:
print("DBSCAN model: \n")

dbscan = DBSCAN().fit(X)
y_labels = dbscan.labels_
centers = np.unique(y_labels)
evaluation(X, y, y_labels, len(centers))

DBSCAN model: 

Estimated number of clusters: 3
Homogeneity: 0.560
Completeness: 0.657
V-measure: 0.604
Adjusted Rand Index: 0.521
Adjusted Mutual Information: 0.599
Silhouette Coefficient: 0.637 



In [27]:
print("Hierarchical model (AgglomerativeClustering): \n")

agg2 = AgglomerativeClustering(n_clusters=2).fit(X)
y_labels = agg2.labels_
evaluation(X, y, y_labels, 2)

agg3 = AgglomerativeClustering(n_clusters=3).fit(X)
y_labels = agg3.labels_
evaluation(X, y, y_labels, 3)

agg4 = AgglomerativeClustering(n_clusters=4).fit(X)
y_labels = agg4.labels_
evaluation(X, y, y_labels, 4)

agg5 = AgglomerativeClustering(n_clusters=5).fit(X)
y_labels = agg5.labels_
evaluation(X, y, y_labels, 5)

Hierarchical model (AgglomerativeClustering): 

Estimated number of clusters: 2
Homogeneity: 0.579
Completeness: 1.000
V-measure: 0.734
Adjusted Rand Index: 0.568
Adjusted Mutual Information: 0.732
Silhouette Coefficient: 0.847 

Estimated number of clusters: 3
Homogeneity: 0.761
Completeness: 0.780
V-measure: 0.770
Adjusted Rand Index: 0.731
Adjusted Mutual Information: 0.767
Silhouette Coefficient: 0.735 

Estimated number of clusters: 4
Homogeneity: 0.795
Completeness: 0.642
V-measure: 0.710
Adjusted Rand Index: 0.660
Adjusted Mutual Information: 0.705
Silhouette Coefficient: 0.653 

Estimated number of clusters: 5
Homogeneity: 0.797
Completeness: 0.579
V-measure: 0.671
Adjusted Rand Index: 0.595
Adjusted Mutual Information: 0.664
Silhouette Coefficient: 0.657 

