# Clustering Activity

In [15]:
# Import necessary libraries
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans, AgglomerativeClustering, SpectralClustering
from sklearn.metrics import adjusted_rand_score
import skfuzzy.cluster as fuzz
import numpy as np
import numpy as np

# Generate a hypothetical dataset
X, Y = make_blobs(n_samples=300, centers=4, random_state=42, cluster_std=1.0)

In [16]:
from sklearn.metrics import adjusted_rand_score, silhouette_score

#define the colors to be used in the plots
colors = ['red', 'orange', 'lime', 'green', 'blue', 'navy', 'violet']
fig_size = 6

# Params of Data Generation
samples = 300  # Number of samples per class
cluster_count = 4     # Number of classes
random_state = 42

Exercise: Evaluate four clustering algorithms (K-Means, Fuzzy C-Means, Spectral, and Agglomerative)
1. Apply each clustering algorithm to the dataset where:
- cluster_count = 4
- random_state = 42 (where applicable)
- do not specify affinity, metric, algo, init, linkage (if applicable)
2. Use the **Adjusted Rand index** and **Silhouette Score** to evaluate the clustering performance.
3. Compare the performance of the algorithms based on the aforementioned metrics and decide which algorithm is best suited for the data given.

### Fuzzy C-Means

In [17]:
from skfuzzy import cmeans, cmeans_predict

centers, labels, _, _, _, _, _  = cmeans(
    data=X.T,
    c = cluster_count,
    m = 1.25,
    error = 0.075,
    maxiter = 1000
)
target2 = np.argmax(labels, axis=0)
fcm_ari = adjusted_rand_score(Y, target2)
fcm_sil = silhouette_score(X, target2)

### Spectral

In [18]:
from sklearn.cluster import SpectralClustering

labels = SpectralClustering(n_clusters=cluster_count, random_state=42).fit_predict(X)
spectral_ari = adjusted_rand_score(Y, labels)
spectral_sil = silhouette_score(X, labels)

### K-Means

In [19]:
from sklearn.cluster import KMeans

labels = KMeans(n_clusters=cluster_count, random_state=random_state).fit_predict(X)
kmeans_ari = adjusted_rand_score(Y, labels)
kmeans_sil = silhouette_score(X, labels)

  super()._check_params_vs_input(X, default_n_init=10)


### Agglomerative

In [20]:
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram

labels = AgglomerativeClustering(n_clusters=cluster_count).fit_predict(X)
agglomerative_ari = adjusted_rand_score(Y, labels)
agglomerative_sil = silhouette_score(X, labels)

In [26]:
print("Adjusted Rand Index:")
print(f"Fuzzy C-Means: {fcm_ari}")
print("Silhouette Score:")
print(f"Fuzzy C-Means: {fcm_sil} \n")


print("Adjusted Rand Index:")
print(f"Spectral Clustering: {spectral_ari}")
print("Silhouette Score:")
print(f"Spectral Clustering: {spectral_sil} \n")

print("Adjusted Rand Index:")
print(f"K-Means: {kmeans_ari}")
print("Silhouette Score:")
print(f"K-Means: {kmeans_sil} \n")

print("Adjusted Rand Index:")
print(f"Agglomerative Clustering: {agglomerative_ari}")
print("Silhouette Score:")
print(f"Agglomerative Clustering: {agglomerative_sil} \n")

Adjusted Rand Index:
Fuzzy C-Means: 0.9910811504997546
Silhouette Score:
Fuzzy C-Means: 0.7915830011443039 

Adjusted Rand Index:
Spectral Clustering: 0.9910811504997546
Silhouette Score:
Spectral Clustering: 0.7915830011443039 

Adjusted Rand Index:
K-Means: 0.9910811504997546
Silhouette Score:
K-Means: 0.7915830011443039 

Adjusted Rand Index:
Agglomerative Clustering: 0.9910811504997546
Silhouette Score:
Agglomerative Clustering: 0.7915830011443039 



The Silhouette Score and Adjusted Rand Index yielded identical results for all algorithms, possibly indicating that specific parameters, such as affinity, metric, algo, init, and linkage, were not explicitly specified for some algorithms.