# Clustering Activity

In [4]:
# Import necessary libraries
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans, AgglomerativeClustering, SpectralClustering
from sklearn.metrics import adjusted_rand_score
import skfuzzy.cluster as fuzz
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Generate a hypothetical dataset
X, y_true = make_blobs(n_samples=1000, centers=4, random_state=42, cluster_std=1.5)

Exercise: Evaluate four clustering algorithms (K-Means, Fuzzy C-Means, Spectral, and Agglomerative)
1. Apply each clustering algorithm to the dataset where:
- cluster_count = 4
- random_state = 42 (where applicable)
- do not specify affinity, metric, algo, init, linkage (if applicable)
2. Use the **Adjusted Rand index** and **Silhouette Score** to evaluate the clustering performance.
3. Compare the performance of the algorithms based on the aforementioned metrics and decide which algorithm is best suited for the data given.

In [5]:
from sklearn.metrics import silhouette_score

# Define the number of clusters
cluster_count = 4

# Initialize the algorithms
kmeans = KMeans(n_clusters=cluster_count, random_state=42)
fuzzy_cmeans = fuzz.cmeans(X.T, c=cluster_count, m=2, error=0.005, maxiter=1000, seed=42)
spectral = SpectralClustering(n_clusters=cluster_count, random_state=42)
agglomerative = AgglomerativeClustering(n_clusters=cluster_count)

# Fit the algorithms to the data
kmeans_labels = kmeans.fit_predict(X)
fuzzy_cmeans_labels = np.argmax(fuzzy_cmeans[1], axis=0)
spectral_labels = spectral.fit_predict(X)
agglomerative_labels = agglomerative.fit_predict(X)

# Evaluate using Adjusted Rand index and Silhouette Score
ari_kmeans = adjusted_rand_score(y_true, kmeans_labels)
ari_fuzzy_cmeans = adjusted_rand_score(y_true, fuzzy_cmeans_labels)
ari_spectral = adjusted_rand_score(y_true, spectral_labels)
ari_agglomerative = adjusted_rand_score(y_true, agglomerative_labels)

silhouette_kmeans = silhouette_score(X, kmeans_labels)
silhouette_fuzzy_cmeans = silhouette_score(X, fuzzy_cmeans_labels)
silhouette_spectral = silhouette_score(X, spectral_labels)
silhouette_agglomerative = silhouette_score(X, agglomerative_labels)

# Print the results
print("Adjusted Rand Index:")
print(f"K-Means: {ari_kmeans}")
print(f"Fuzzy C-Means: {ari_fuzzy_cmeans}")
print(f"Spectral: {ari_spectral}")
print(f"Agglomerative: {ari_agglomerative}")

print("\nSilhouette Score:")
print(f"K-Means: {silhouette_kmeans}")
print(f"Fuzzy C-Means: {silhouette_fuzzy_cmeans}")
print(f"Spectral: {silhouette_spectral}")
print(f"Agglomerative: {silhouette_agglomerative}")



Adjusted Rand Index:
K-Means: 0.9867602764824711
Fuzzy C-Means: 0.9867602764824711
Spectral: 0.9920240695900462
Agglomerative: 0.9763615827878084

Silhouette Score:
K-Means: 0.6876600436588094
Fuzzy C-Means: 0.6876600436588094
Spectral: 0.6874976652400637
Agglomerative: 0.685223260645522
