In [None]:
!pip install scikit-learn-extra


In [1]:
import numpy as np
import pandas as pd

from sklearn.cluster import (
    KMeans, MiniBatchKMeans, AgglomerativeClustering,
    DBSCAN, OPTICS, SpectralClustering, Birch
)
from sklearn_extra.cluster import KMedoids
from sklearn.mixture import GaussianMixture, BayesianGaussianMixture


In [10]:
NUM_CLASSES = 7   # change if needed

MODELS = [
    "EfficientNetB0",
    "ResNet152",
    "VGG19",
    "MobileNetV3",
    "Xception"
]


In [7]:
def compute_label_change(true_labels, cluster_labels):
    changed = np.sum(cluster_labels != true_labels)
    total = len(true_labels)
    percent = (changed / total) * 100
    return total, changed, percent


In [8]:
CLUSTERING_ALGOS = {
    "KMeans": lambda k: KMeans(n_clusters=k, random_state=42),
    "MiniBatchKMeans": lambda k: MiniBatchKMeans(n_clusters=k, random_state=42),
    "KMedoids": lambda k: KMedoids(n_clusters=k, method="pam", random_state=42),

    "Agglomerative_Single": lambda k: AgglomerativeClustering(n_clusters=k, linkage="single"),
    "Agglomerative_Complete": lambda k: AgglomerativeClustering(n_clusters=k, linkage="complete"),
    "Agglomerative_Average": lambda k: AgglomerativeClustering(n_clusters=k, linkage="average"),
    "Agglomerative_Ward": lambda k: AgglomerativeClustering(n_clusters=k, linkage="ward"),

    "DBSCAN": lambda k: DBSCAN(eps=0.5, min_samples=5),
    "OPTICS": lambda k: OPTICS(min_samples=5),
    

    "GMM": lambda k: GaussianMixture(n_components=k, random_state=42),
    "BayesianGMM": lambda k: BayesianGaussianMixture(n_components=k, random_state=42),

    "SpectralClustering": lambda k: SpectralClustering(
        n_clusters=k, affinity="nearest_neighbors", random_state=42
    ),

    "BIRCH": lambda k: Birch(n_clusters=k)
}


In [11]:
results = []

# Load true labels once
y_true = np.load("true_labels.npy")

for model_name in MODELS:
    print(f"\n================ {model_name} =================")

    X = np.load(f"{model_name}_features.npy")

    for algo_name, algo_fn in CLUSTERING_ALGOS.items():
        print(f"Running {algo_name}...")

        try:
            clusterer = algo_fn(NUM_CLASSES)

            cluster_labels = clusterer.fit_predict(X)

            total = len(y_true)
            changed = np.sum(cluster_labels != y_true)
            percent = (changed / total) * 100

            results.append([
                algo_name,
                model_name,
                total,
                changed,
                percent
            ])

        except Exception as e:
            print(f"❌ {algo_name} failed on {model_name}: {e}")



Running KMeans...
Running MiniBatchKMeans...
Running KMedoids...
Running Agglomerative_Single...
Running Agglomerative_Complete...
Running Agglomerative_Average...
Running Agglomerative_Ward...
Running DBSCAN...
Running OPTICS...
Running GMM...
Running BayesianGMM...




Running SpectralClustering...
Running BIRCH...





Running KMeans...
Running MiniBatchKMeans...
Running KMedoids...
Running Agglomerative_Single...
Running Agglomerative_Complete...
Running Agglomerative_Average...
Running Agglomerative_Ward...
Running DBSCAN...
Running OPTICS...
Running GMM...
Running BayesianGMM...
Running SpectralClustering...
Running BIRCH...

Running KMeans...
Running MiniBatchKMeans...
Running KMedoids...
Running Agglomerative_Single...
Running Agglomerative_Complete...
Running Agglomerative_Average...
Running Agglomerative_Ward...
Running DBSCAN...
Running OPTICS...
Running GMM...
Running BayesianGMM...
Running SpectralClustering...
Running BIRCH...

Running KMeans...




Running MiniBatchKMeans...
Running KMedoids...




Running Agglomerative_Single...
Running Agglomerative_Complete...
Running Agglomerative_Average...
Running Agglomerative_Ward...
Running DBSCAN...
Running OPTICS...
Running GMM...
Running BayesianGMM...
Running SpectralClustering...
Running BIRCH...





Running KMeans...
Running MiniBatchKMeans...
Running KMedoids...
Running Agglomerative_Single...
Running Agglomerative_Complete...
Running Agglomerative_Average...
Running Agglomerative_Ward...
Running DBSCAN...
Running OPTICS...
Running GMM...
Running BayesianGMM...
Running SpectralClustering...
Running BIRCH...


In [12]:
df = pd.DataFrame(
    results,
    columns=[
        "clustering_algorithm",
        "model",
        "total_images",
        "labels_changed",
        "changed_percentage"
    ]
)

df.to_csv("clustering_results.csv", index=False)
print("\n✅ clustering_results.csv saved successfully")



✅ clustering_results.csv saved successfully
