In [None]:
!pip install scikit-learn-extra


In [2]:
!pip install hdbscan


Defaulting to user installation because normal site-packages is not writeable
Collecting hdbscan
  Downloading hdbscan-0.8.41-cp313-cp313-win_amd64.whl.metadata (15 kB)
Downloading hdbscan-0.8.41-cp313-cp313-win_amd64.whl (671 kB)
   ---------------------------------------- 0.0/671.7 kB ? eta -:--:--
   ---------------------------------------- 671.7/671.7 kB 5.5 MB/s eta 0:00:00
Installing collected packages: hdbscan
Successfully installed hdbscan-0.8.41



[notice] A new release of pip is available: 24.3.1 -> 26.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
!pip install scikit-fuzzy


Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-fuzzy
  Downloading scikit_fuzzy-0.5.0-py2.py3-none-any.whl.metadata (2.6 kB)
Downloading scikit_fuzzy-0.5.0-py2.py3-none-any.whl (920 kB)
   ---------------------------------------- 0.0/920.8 kB ? eta -:--:--
   ----------- ---------------------------- 262.1/920.8 kB ? eta -:--:--
   ---------------------------------------- 920.8/920.8 kB 2.6 MB/s eta 0:00:00
Installing collected packages: scikit-fuzzy
Successfully installed scikit-fuzzy-0.5.0



[notice] A new release of pip is available: 24.3.1 -> 26.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import numpy as np

from sklearn.cluster import (
    KMeans,
    MiniBatchKMeans,
    AgglomerativeClustering,
    SpectralClustering,
    MeanShift,
    AffinityPropagation,
    DBSCAN,
    OPTICS,
    Birch,
    BisectingKMeans
)

from sklearn.mixture import GaussianMixture, BayesianGaussianMixture
from sklearn.preprocessing import normalize

from sklearn_extra.cluster import KMedoids
import hdbscan
import skfuzzy as fuzz


In [2]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd
original_df = pd.read_csv("ALZ_working/labels.csv")

image_names = original_df["image_name"].values
original_labels = original_df["label"].values

le = LabelEncoder()
original_labels_enc = le.fit_transform(original_labels)

In [3]:
def compute_label_change(original, clustered):
    return (np.sum(original != clustered) / len(original)) * 100


def save_cluster_labels_csv(model, algo, image_names, original_labels, labels):
    os.makedirs("clustering_labels", exist_ok=True)

    df = pd.DataFrame({
        "image_name": image_names,
        "original_label": original_labels,   # ← human-readable
        "cluster_label": labels               # ← cluster assignment
    })

    filename = f"clustering_labels/{model}_{algo}.csv"
    df.to_csv(filename, index=False)



In [4]:
def get_algorithms(X):
    """
    Returns clustering algorithms that do NOT require training images.
    """

    X_norm = normalize(X)

    return {

        # ================= PARTITION-BASED =================
        "KMeans": KMeans(n_clusters=NUM_CLASSES, random_state=42),

        "MiniBatchKMeans": MiniBatchKMeans(
            n_clusters=NUM_CLASSES, random_state=42
        ),

        "KMedoids_PAM": KMedoids(
            n_clusters=NUM_CLASSES, method="pam", random_state=42
        ),

        "BisectingKMeans": BisectingKMeans(
            n_clusters=NUM_CLASSES, random_state=42
        ),

        # Spherical K-Means (handled as callable)
        "SphericalKMeans": lambda X: KMeans(
            n_clusters=NUM_CLASSES,
            random_state=42
        ).fit_predict(X_norm),

        # Fuzzy C-Means (callable)
        "FuzzyCMeans": lambda X: np.argmax(
            fuzz.cluster.cmeans(
                X_norm.T,
                c=NUM_CLASSES,
                m=2.0,
                error=0.005,
                maxiter=1000
            )[1],
            axis=0
        ),

        # ================= HIERARCHICAL =================
        "Agglomerative_Single": AgglomerativeClustering(
            n_clusters=NUM_CLASSES, linkage="single"
        ),

        "Agglomerative_Complete": AgglomerativeClustering(
            n_clusters=NUM_CLASSES, linkage="complete"
        ),

        "Agglomerative_Average": AgglomerativeClustering(
            n_clusters=NUM_CLASSES, linkage="average"
        ),

        "Agglomerative_Ward": AgglomerativeClustering(
            n_clusters=NUM_CLASSES, linkage="ward"
        ),

        # ================= DENSITY-BASED =================
        "DBSCAN": DBSCAN(eps=0.5, min_samples=5),

        "OPTICS": OPTICS(min_samples=5),

        "HDBSCAN": hdbscan.HDBSCAN(min_cluster_size=10),

        # "MeanShift": MeanShift(),

        # ================= MODEL-BASED =================
        "GMM": GaussianMixture(
            n_components=NUM_CLASSES,
            reg_covar=1e-4,
            random_state=42
        ),

        "BayesianGMM": BayesianGaussianMixture(
            n_components=NUM_CLASSES,
            random_state=42
        ),

        # ================= GRAPH-BASED =================
        "SpectralClustering": SpectralClustering(
            n_clusters=NUM_CLASSES,
            assign_labels="kmeans",
            random_state=42
        ),

        # ================= LARGE-SCALE =================
        "BIRCH": Birch(n_clusters=NUM_CLASSES),

    }


In [5]:
import os


In [6]:
NUM_CLASSES = 4
MODEL_NAME = "EfficientNetB0"

print(f"\n===== Processing {MODEL_NAME} =====")

X = np.load(f"{MODEL_NAME}_features.npy").astype(np.float64)

algorithms = get_algorithms(X)
results = []

for algo_name, algo in algorithms.items():
    print(f"→ Implementing {algo_name} ...")

    try:
        # Case 1: callable algorithms (Fuzzy, Spherical)
        if callable(algo):
            labels = algo(X)

        # Case 2: GMM / BayesianGMM
        elif hasattr(algo, "predict") and not hasattr(algo, "fit_predict"):
            algo.fit(X)
            labels = algo.predict(X)

        # Case 3: sklearn clustering
        else:
            labels = algo.fit_predict(X)

        # Skip degenerate clustering
        if len(np.unique(labels)) < 2:
            raise ValueError("Only one cluster formed")

        save_cluster_labels_csv(
            MODEL_NAME,
            algo_name,
            image_names,
            original_labels,
            labels
        )

        change = compute_label_change(original_labels_enc, labels)
        results.append([MODEL_NAME, algo_name, change])

        print(f"   ✓ Done | Label change: {change:.2f}%")

    except Exception as e:
        print(f"   ✗ Failed: {str(e)}")
        continue


pd.DataFrame(
    results,
    columns=["model", "clustering", "label_percentage_change"]
).to_csv(
    "clustering_results.csv",
    mode="a",
    header=not os.path.exists("clustering_results.csv"),
    index=False
)



===== Processing EfficientNetB0 =====
→ Implementing KMeans ...
   ✓ Done | Label change: 84.08%
→ Implementing MiniBatchKMeans ...
   ✓ Done | Label change: 84.28%
→ Implementing KMedoids_PAM ...
   ✓ Done | Label change: 82.89%
→ Implementing BisectingKMeans ...
   ✓ Done | Label change: 67.86%
→ Implementing SphericalKMeans ...
   ✓ Done | Label change: 83.31%
→ Implementing FuzzyCMeans ...
   ✓ Done | Label change: 91.59%
→ Implementing Agglomerative_Single ...
   ✓ Done | Label change: 65.03%
→ Implementing Agglomerative_Complete ...
   ✓ Done | Label change: 83.11%
→ Implementing Agglomerative_Average ...
   ✓ Done | Label change: 93.23%
→ Implementing Agglomerative_Ward ...
   ✓ Done | Label change: 66.28%
→ Implementing DBSCAN ...
   ✓ Done | Label change: 86.02%
→ Implementing OPTICS ...
   ✓ Done | Label change: 99.95%
→ Implementing HDBSCAN ...
   ✓ Done | Label change: 99.77%
→ Implementing MeanShift ...
   ✓ Done | Label change: 85.84%
→ Implementing GMM ...
   ✓ Done | L



   ✓ Done | Label change: 86.56%
→ Implementing SpectralClustering ...
   ✓ Done | Label change: 81.39%
→ Implementing BIRCH ...
   ✓ Done | Label change: 55.44%


In [7]:
NUM_CLASSES = 4
MODEL_NAME = "MobileNetV3"

print(f"\n===== Processing {MODEL_NAME} =====")

X = np.load(f"{MODEL_NAME}_features.npy").astype(np.float64)

algorithms = get_algorithms(X)
results = []

for algo_name, algo in algorithms.items():
    print(f"→ Implementing {algo_name} ...")

    try:
        # Case 1: callable algorithms (Fuzzy, Spherical)
        if callable(algo):
            labels = algo(X)

        # Case 2: GMM / BayesianGMM
        elif hasattr(algo, "predict") and not hasattr(algo, "fit_predict"):
            algo.fit(X)
            labels = algo.predict(X)

        # Case 3: sklearn clustering
        else:
            labels = algo.fit_predict(X)

        # Skip degenerate clustering
        if len(np.unique(labels)) < 2:
            raise ValueError("Only one cluster formed")

        save_cluster_labels_csv(
            MODEL_NAME,
            algo_name,
            image_names,
            original_labels,
            labels
        )

        change = compute_label_change(original_labels_enc, labels)
        results.append([MODEL_NAME, algo_name, change])

        print(f"   ✓ Done | Label change: {change:.2f}%")

    except Exception as e:
        print(f"   ✗ Failed: {str(e)}")
        continue


pd.DataFrame(
    results,
    columns=["model", "clustering", "label_percentage_change"]
).to_csv(
    "clustering_results.csv",
    mode="a",
    header=not os.path.exists("clustering_results.csv"),
    index=False
)



===== Processing MobileNetV3 =====
→ Implementing KMeans ...
   ✓ Done | Label change: 76.88%
→ Implementing MiniBatchKMeans ...
   ✓ Done | Label change: 62.34%
→ Implementing KMedoids_PAM ...
   ✓ Done | Label change: 77.72%
→ Implementing BisectingKMeans ...
   ✓ Done | Label change: 77.52%
→ Implementing SphericalKMeans ...
   ✓ Done | Label change: 73.73%
→ Implementing FuzzyCMeans ...
   ✓ Done | Label change: 73.06%
→ Implementing Agglomerative_Single ...
   ✓ Done | Label change: 86.00%
→ Implementing Agglomerative_Complete ...
   ✓ Done | Label change: 81.95%
→ Implementing Agglomerative_Average ...
   ✓ Done | Label change: 85.97%
→ Implementing Agglomerative_Ward ...
   ✓ Done | Label change: 77.03%
→ Implementing DBSCAN ...
   ✗ Failed: Only one cluster formed
→ Implementing OPTICS ...
   ✗ Failed: Only one cluster formed
→ Implementing HDBSCAN ...
   ✗ Failed: Only one cluster formed
→ Implementing MeanShift ...
   ✗ Failed: Only one cluster formed
→ Implementing GMM ...




In [8]:
NUM_CLASSES = 4
MODEL_NAME = "Xception"

print(f"\n===== Processing {MODEL_NAME} =====")

X = np.load(f"{MODEL_NAME}_features.npy").astype(np.float64)

algorithms = get_algorithms(X)
results = []

for algo_name, algo in algorithms.items():
    print(f"→ Implementing {algo_name} ...")

    try:
        # Case 1: callable algorithms (Fuzzy, Spherical)
        if callable(algo):
            labels = algo(X)

        # Case 2: GMM / BayesianGMM
        elif hasattr(algo, "predict") and not hasattr(algo, "fit_predict"):
            algo.fit(X)
            labels = algo.predict(X)

        # Case 3: sklearn clustering
        else:
            labels = algo.fit_predict(X)

        # Skip degenerate clustering
        if len(np.unique(labels)) < 2:
            raise ValueError("Only one cluster formed")

        save_cluster_labels_csv(
            MODEL_NAME,
            algo_name,
            image_names,
            original_labels,
            labels
        )

        change = compute_label_change(original_labels_enc, labels)
        results.append([MODEL_NAME, algo_name, change])

        print(f"   ✓ Done | Label change: {change:.2f}%")

    except Exception as e:
        print(f"   ✗ Failed: {str(e)}")
        continue

pd.DataFrame(
    results,
    columns=["model", "clustering", "label_percentage_change"]
).to_csv(
    "clustering_results.csv",
    mode="a",
    header=not os.path.exists("clustering_results.csv"),
    index=False
)



===== Processing Xception =====
→ Implementing KMeans ...
   ✓ Done | Label change: 69.38%
→ Implementing MiniBatchKMeans ...
   ✓ Done | Label change: 75.36%
→ Implementing KMedoids_PAM ...
   ✓ Done | Label change: 75.47%
→ Implementing BisectingKMeans ...
   ✓ Done | Label change: 71.20%
→ Implementing SphericalKMeans ...
   ✓ Done | Label change: 83.00%
→ Implementing FuzzyCMeans ...
   ✓ Done | Label change: 68.72%
→ Implementing Agglomerative_Single ...
   ✓ Done | Label change: 98.98%
→ Implementing Agglomerative_Complete ...
   ✓ Done | Label change: 87.89%
→ Implementing Agglomerative_Average ...
   ✓ Done | Label change: 92.25%
→ Implementing Agglomerative_Ward ...
   ✓ Done | Label change: 77.17%
→ Implementing DBSCAN ...
   ✓ Done | Label change: 86.00%
→ Implementing OPTICS ...
   ✓ Done | Label change: 99.92%
→ Implementing HDBSCAN ...
   ✓ Done | Label change: 87.00%
→ Implementing GMM ...
   ✓ Done | Label change: 70.98%
→ Implementing BayesianGMM ...




   ✓ Done | Label change: 70.83%
→ Implementing SpectralClustering ...
   ✓ Done | Label change: 85.97%
→ Implementing BIRCH ...
   ✓ Done | Label change: 81.31%


In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import silhouette_score

# ============================
# LOAD FEATURES
# ============================

X_eff = np.load("EfficientNetB0_features.npy")
X_xcep = np.load("Xception_features.npy")

# ============================
# LOAD CLUSTER LABEL FILES
# ============================

eff_kmeans_df = pd.read_csv("clustering_labels/EfficientNetB0_KMeans.csv")
xcep_birch_df = pd.read_csv("clustering_labels/Xception_BIRCH.csv")

labels_eff = eff_kmeans_df["cluster_label"].values
labels_xcep = xcep_birch_df["cluster_label"].values

# ============================
# REMOVE NOISE IF PRESENT (-1)
# ============================

def compute_silhouette(X, labels, name):
    unique_labels = np.unique(labels)

    if len(unique_labels) < 2:
        print(f"{name}: Cannot compute silhouette (only one cluster)")
        return None

    # Remove noise label -1 if exists
    mask = labels != -1
    X_clean = X[mask]
    labels_clean = labels[mask]

    if len(np.unique(labels_clean)) < 2:
        print(f"{name}: Not enough valid clusters after removing noise")
        return None

    score = silhouette_score(X_clean, labels_clean)
    print(f"{name} Silhouette Score: {score:.4f}")
    return score


# ============================
# COMPUTE SCORES
# ============================

score_min = compute_silhouette(
    X_eff,
    labels_eff,
    "Minimum Change (EfficientNetB0 + KMeans)"
)

score_max = compute_silhouette(
    X_xcep,
    labels_xcep,
    "Maximum Change (Xception + Birch)"
)


Minimum Change (EfficientNetB0 + KMeans) Silhouette Score: 0.8922
Maximum Change (Xception + Birch) Silhouette Score: 0.3899
