| **Method** | **Feature**|
|--------|--------|
|*KMeans Cluster* | `Based on Distance` |
|*Agglomerative Cluster* | `Bottom Up Approach` |
|*Hierarchical Cluster* | `Top Down Approach` |

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering, KMeans
from sklearn.metrics import silhouette_score, davies_bouldin_score, accuracy_score
from scipy.cluster.hierarchy import dendrogram, linkage

In [None]:
# Dataset

iris = load_iris()

x = iris.data
y = iris.target

In [None]:
# KMeans

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=42)

knn = KNeighborsClassifier(n_neighbors=3)

knn.fit(x_train, y_train)

y_pred = knn.predict(x_test)

print(f"Accuracy: {100*accuracy_score(y_test,y_pred):.2f}%")

In [None]:
# Plot

plt.figure(figsize=(8, 6))
plt.scatter(x_test[:, 0], x_test[:, 1], c=y_pred, cmap='viridis', edgecolor='k', s=100)
plt.title("KNN Predictions")
plt.xlabel("Sepal Length")
plt.ylabel("Sepal Width")
plt.grid()
plt.show()

In [None]:
# Agglomerative

model = AgglomerativeClustering(n_clusters=3)

y_pred = model.fit_predict(x)

print(f"Accuracy: {100*accuracy_score(y,y_pred):.2f}%")

In [None]:
# Plot

plt.figure(figsize=(8,6))
plt.scatter(x[:,0],x[:,1], c=y_pred, marker='o')
plt.title("Hierarchical Clustering")
plt.xlabel("Sepal Length")
plt.ylabel("Sepal Width")
plt.show()

In [None]:
# Dendrogram

linked = linkage(x,'ward')

plt.figure(figsize=(10,7))
dendrogram(linked, orientation='top', show_leaf_counts=True, distance_sort='ascending')
plt.xlabel('Sample Index')
plt.ylabel('Distance')
plt.title('Dendrogram for Iris')
plt.show()

In [None]:
# KMeans cluster

iris = sns.load_dataset('iris')
X = iris.drop('species',axis=1)

model = KMeans(n_clusters=3, random_state=42)
labels = model.fit_predict(X)

In [None]:
#Agglomerative cluster

agg_model = AgglomerativeClustering(n_clusters=3)
agg_labels = agg_model.fit_predict(X)

In [None]:
# Principal component analysis

pca = PCA(n_components=2)
X_red = pca.fit_transform(X)

In [None]:
# Plot

fig, (ax1,ax2,ax3) = plt.subplots(1,3, figsize=(12,6))

ax1.scatter(X_red[:,0], X_red[:, 1], c = agg_labels)
ax1.set_title("Agglomerative clustering")


ax2.scatter(X_red[:, 0], X_red[:, 1], c=labels)
ax2.set_title("Kmeans clustering")


ax3.scatter(X_red[:, 0], X_red[:, 1], c=iris['species'].map(lambda i:['setosa','virginica','versicolor'].index(i)))
ax3.set_title("Actual classification")

plt.show()

In [None]:
# Metrics

res = pd.DataFrame({'Silhouette Score':[silhouette_score(X, iris['species']),silhouette_score(X, labels),silhouette_score(X, agg_labels)],
                    'Davies Bouldin Score':[davies_bouldin_score(X, iris['species']),davies_bouldin_score(X, labels),davies_bouldin_score(X, agg_labels)]})

res.index = ['Actual', 'KMeans', 'Agglomerative']
print(res)

In [None]:
# Dendrogram linkages

ward = linkage(X.head(20), method="ward")
average = linkage(X.head(20), method="average")
single = linkage(X.head(20), method="single")
complete = linkage(X.head(20), method="complete")

In [None]:
# Plot

fig, ax = plt.subplots(2, 2, figsize=(12, 10))


dendrogram(ward, ax=ax[0,0])
ax[0,0].set_title("Dendogram using ward linkage")


dendrogram(average, ax=ax[0,1])
ax[0,1].set_title("Dendogram using Average linkage")


dendrogram(single, ax=ax[1,0])
ax[1,0].set_title("Dendogram Single linkage")

dendrogram(complete, ax=ax[1,1])
ax[1,1].set_title("Dendogram Complete linkage")

plt.show()