In [1]:
from sklearn.datasets import load_iris

# Load the Iris dataset
data = load_iris()
X = data.data
y = data.target  # Target labels, though we won't use these for clustering
from sklearn.preprocessing import StandardScaler

In [2]:
from sklearn.preprocessing import StandardScaler

# Normalization
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

In [3]:
from sklearn.decomposition import PCA

# Apply PCA (reduce to 2 components for simplicity)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

In [4]:
import numpy as np

# Log transformation followed by normalization
X_transformed = np.log1p(X)  # Applying log transformation
X_tn = scaler.fit_transform(X_transformed)  # Then normalize

In [5]:
# Apply transformation, normalization, and PCA
X_tn_pca = pca.fit_transform(X_tn)

In [6]:
from sklearn.cluster import KMeans

# Perform K-Means clustering with 3 clusters
kmeans = KMeans(n_clusters=3)
clusters = kmeans.fit_predict(X_normalized)

In [9]:
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score

# Perform Hierarchical Clustering
hierarchical = AgglomerativeClustering(n_clusters=3)
clusters_hierarchical = hierarchical.fit_predict(X_normalized)

# Calculate evaluation metrics for Hierarchical Clustering
silhouette_hierarchical = silhouette_score(X_normalized, clusters_hierarchical)
calinski_hierarchical = calinski_harabasz_score(X_normalized, clusters_hierarchical)
davies_hierarchical = davies_bouldin_score(X_normalized, clusters_hierarchical)

print(f"Hierarchical Silhouette: {silhouette_hierarchical}")
print(f"Hierarchical Calinski-Harabasz: {calinski_hierarchical}")
print(f"Hierarchical Davies-Bouldin: {davies_hierarchical}")

Hierarchical Silhouette: 0.4466890410285909
Hierarchical Calinski-Harabasz: 222.71916382215363
Hierarchical Davies-Bouldin: 0.8034665302876753


In [11]:
!pip install scikit-learn
from sklearn.cluster import MeanShift # Import the MeanShift class from sklearn.cluster

# Perform Mean Shift Clustering
mean_shift = MeanShift()
clusters_mean_shift = mean_shift.fit_predict(X_normalized)

# Calculate evaluation metrics for Mean Shift Clustering
silhouette_mean_shift = silhouette_score(X_normalized, clusters_mean_shift)
calinski_mean_shift = calinski_harabasz_score(X_normalized, clusters_mean_shift)
davies_mean_shift = davies_bouldin_score(X_normalized, clusters_mean_shift)

print(f"Mean Shift Silhouette: {silhouette_mean_shift}")
print(f"Mean Shift Calinski-Harabasz: {calinski_mean_shift}")
print(f"Mean Shift Davies-Bouldin: {davies_mean_shift}")

Mean Shift Silhouette: 0.5817500491982808
Mean Shift Calinski-Harabasz: 251.34933946458108
Mean Shift Davies-Bouldin: 0.5933126905762435


In [12]:
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score

# K-Means Evaluation
silhouette_kmeans = silhouette_score(X_normalized, clusters)
calinski_kmeans = calinski_harabasz_score(X_normalized, clusters)
davies_kmeans = davies_bouldin_score(X_normalized, clusters)

print(f"K-Means Silhouette: {silhouette_kmeans}")
print(f"K-Means Calinski-Harabasz: {calinski_kmeans}")
print(f"K-Means Davies-Bouldin: {davies_kmeans}")

K-Means Silhouette: 0.45994823920518635
K-Means Calinski-Harabasz: 241.90440170183157
K-Means Davies-Bouldin: 0.8335949464754338


In [14]:
import pandas as pd

# Create a DataFrame for the results
results = pd.DataFrame({
    'Algorithm': ['K-Means', 'Hierarchical', 'Mean Shift'],
    'Silhouette': [silhouette_kmeans, silhouette_hierarchical, silhouette_mean_shift],
    'Calinski-Harabasz': [calinski_kmeans, calinski_hierarchical, calinski_mean_shift],
    'Davies-Bouldin': [davies_kmeans, davies_hierarchical, davies_mean_shift]
})

# Display the results table
results

Unnamed: 0,Algorithm,Silhouette,Calinski-Harabasz,Davies-Bouldin
0,K-Means,0.459948,241.904402,0.833595
1,Hierarchical,0.446689,222.719164,0.803467
2,Mean Shift,0.58175,251.349339,0.593313
