In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import davies_bouldin_score
from sklearn.metrics import silhouette_score
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import calinski_harabasz_score
import pandas as pd
import numpy as np

In [2]:
#importing the iris dataset
from sklearn.datasets import load_iris

iris = load_iris()

dfiris = pd.DataFrame( data=iris.data, columns=iris.feature_names)

true_labels = iris.target

dfiris

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [4]:
#KMeans Clustering

from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters = 3, random_state = 20, n_init = "auto").fit(dfiris)


kmeans_labels = kmeans.labels_
silhouette_avg1 = silhouette_score(dfiris, kmeans_labels)
dbi_score1 = davies_bouldin_score(dfiris, kmeans_labels)
rand_score1 = adjusted_rand_score(true_labels, kmeans_labels)
ch_score1 = calinski_harabasz_score(dfiris, kmeans_labels)


print("KMeans Clustering")
print("Silhouette Score: ", silhouette_avg1)
print("Davies Bouldin Index: ", dbi_score1)
print("Rand Score: ", rand_score1)
print("Calinski and Harabasz Score: ", ch_score1)


KMeans Clustering
Silhouette Score:  0.551191604619592
Davies Bouldin Index:  0.6660385791628494
Rand Score:  0.7163421126838476
Calinski and Harabasz Score:  561.5937320156642


In [5]:
#MeanShift Clustering

from sklearn.cluster import MeanShift
meanshift = MeanShift().fit(dfiris)

meanshift_labels = meanshift.labels_
silhouette_avg2 = silhouette_score(dfiris, meanshift_labels)
dbi_score2 = davies_bouldin_score(dfiris, meanshift_labels)
rand_score2 = adjusted_rand_score(true_labels, meanshift_labels)
ch_score2 = calinski_harabasz_score(dfiris, meanshift_labels)


print("MeanShift Clustering")
print("Silhouette Score: ", silhouette_avg2)
print("Davies Bouldin Index: ", dbi_score2)
print("Rand Score: ", rand_score2)
print("Calinski and Harabasz Score: ", ch_score2)

MeanShift Clustering
Silhouette Score:  0.6857881712617192
Davies Bouldin Index:  0.3885516762710371
Rand Score:  0.5583714437541352
Calinski and Harabasz Score:  509.7034266772457


In [6]:
#Agglomerative Clustering

from sklearn.cluster import AgglomerativeClustering

AClustering = AgglomerativeClustering(n_clusters = 3).fit(dfiris)
agg_labels = AClustering.labels_
silhouette_avg3 = silhouette_score(dfiris, agg_labels)
dbi_score3 = davies_bouldin_score(dfiris, agg_labels)
rand_score3 = adjusted_rand_score(true_labels, agg_labels)
ch_score3 = calinski_harabasz_score(dfiris, agg_labels)

print("Agglomerative Clustering")
print("Silhouette Score: ", silhouette_avg3)
print("Davies Bouldin Index: ", dbi_score3)
print("Rand Score: ", rand_score3)
print("Calinski and Harabasz Score: ", ch_score3)

Agglomerative Clustering
Silhouette Score:  0.5543236611296419
Davies Bouldin Index:  0.6562564540642021
Rand Score:  0.7311985567707746
Calinski and Harabasz Score:  558.0580408128307


In [7]:
#Spectral Clustering

from sklearn.cluster import SpectralClustering

SClustering = SpectralClustering(n_clusters = 3, random_state = 20).fit(dfiris)
spec_labels = SClustering.labels_
silhouette_avg4 = silhouette_score(dfiris, spec_labels)
dbi_score4 = davies_bouldin_score(dfiris, spec_labels)
rand_score4 = adjusted_rand_score(true_labels, spec_labels)
ch_score4 = calinski_harabasz_score(dfiris, spec_labels)

print("Spectral Clustering")
print("Silhouette Score: ", silhouette_avg4)
print("Davies Bouldin Index: ", dbi_score4)
print("Rand Score: ", rand_score4)
print("Calinski and Harabasz Score: ", ch_score4)

Spectral Clustering
Silhouette Score:  0.5553062646081594
Davies Bouldin Index:  0.6538599267578205
Rand Score:  0.7455038681804481
Calinski and Harabasz Score:  556.1176919037501
