In [23]:
# Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import davies_bouldin_score, silhouette_score, adjusted_rand_score, calinski_harabasz_score
from sklearn.datasets import load_breast_cancer
from sklearn.cluster import KMeans, MeanShift, AgglomerativeClustering, SpectralClustering
import pandas as pd

# Load the Breast Cancer dataset
breast_cancer = load_breast_cancer()

# Create a DataFrame
breast_cancer_df = pd.DataFrame(data=breast_cancer.data, columns=breast_cancer.feature_names)

# Add the target variable (class) to the DataFrame
breast_cancer_df['target'] = breast_cancer.target

# True labels
true_labels_bc = breast_cancer.target

# Display the DataFrame
print(breast_cancer_df.head())


   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  worst texture  worst perimeter  worst area  \
0             

In [24]:
# KMeans Clustering
kmeans = KMeans(n_clusters=2, random_state=20, n_init="auto").fit(breast_cancer_df)
kmeans_labels = kmeans.labels_
silhouette_avg1 = silhouette_score(breast_cancer_df, kmeans_labels)
dbi_score1 = davies_bouldin_score(breast_cancer_df, kmeans_labels)
rand_score1 = adjusted_rand_score(true_labels_bc, kmeans_labels)
ch_score1 = calinski_harabasz_score(breast_cancer_df, kmeans_labels)

print("KMeans")
print("Silhouette Score:", silhouette_avg1)
print("Davies-Bouldin Index:", dbi_score1)
print("Rand Score:", rand_score1)
print("Calinski and Harabasz Score:", ch_score1)


KMeans
Silhouette Score: 0.6972643037728601
Davies-Bouldin Index: 0.5044037959937971
Rand Score: 0.49142453622455523
Calinski and Harabasz Score: 1300.2075740879543


In [25]:
# MeanShift Clustering
mean_shift = MeanShift().fit(breast_cancer_df)
mean_shift_labels = mean_shift.labels_
silhouette_avg2 = silhouette_score(breast_cancer_df, mean_shift_labels)
dbi_score2 = davies_bouldin_score(breast_cancer_df, mean_shift_labels)
rand_score2 = adjusted_rand_score(true_labels_bc, mean_shift_labels)
ch_score2 = calinski_harabasz_score(breast_cancer_df, mean_shift_labels)

print("\nMeanShift")
print("Silhouette Score:", silhouette_avg2)
print("Davies-Bouldin Index:", dbi_score2)
print("Rand Score:", rand_score2)
print("Calinski and Harabasz Score:", ch_score2)



MeanShift
Silhouette Score: 0.626981169540688
Davies-Bouldin Index: 0.5121912907710544
Rand Score: 0.5528955767952657
Calinski and Harabasz Score: 637.985938359916


In [26]:
# Agglomerative Clustering
agglomerative_clustering = AgglomerativeClustering(n_clusters=2, linkage='complete').fit(breast_cancer_df)
agglomerative_labels = agglomerative_clustering.labels_
silhouette_avg3 = silhouette_score(breast_cancer_df, agglomerative_labels)
dbi_score3 = davies_bouldin_score(breast_cancer_df, agglomerative_labels)
rand_score3 = adjusted_rand_score(true_labels_bc, agglomerative_labels)
ch_score3 = calinski_harabasz_score(breast_cancer_df, agglomerative_labels)

print("\nAgglomerative Clustering")
print("Silhouette Score:", silhouette_avg3)
print("Davies-Bouldin Index:", dbi_score3)
print("Rand Score:", rand_score3)
print("Calinski and Harabasz Score:", ch_score3)



Agglomerative Clustering
Silhouette Score: 0.6909349309609722
Davies-Bouldin Index: 0.42900130643083484
Rand Score: 0.05230450912720369
Calinski and Harabasz Score: 334.3354181116257


In [27]:
# Spectral Clustering
spectral_clustering = SpectralClustering(n_clusters=2, random_state=20, gamma=0.1).fit(breast_cancer_df)
spectral_labels = spectral_clustering.labels_
silhouette_avg4 = silhouette_score(breast_cancer_df, spectral_labels)
dbi_score4 = davies_bouldin_score(breast_cancer_df, spectral_labels)
rand_score4 = adjusted_rand_score(true_labels_bc, spectral_labels)
ch_score4 = calinski_harabasz_score(breast_cancer_df, spectral_labels)

print("\nSpectral Clustering")
print("Silhouette Score:", silhouette_avg4)
print("Davies-Bouldin Index:", dbi_score4)
print("Rand Score:", rand_score4)
print("Calinski and Harabasz Score:", ch_score4)





Spectral Clustering
Silhouette Score: 0.25524128777815663
Davies-Bouldin Index: 0.5967473205315075
Rand Score: 0.0024029438603797937
Calinski and Harabasz Score: 1.5882640394783059
