In [None]:
import os
import random

import cv2
import numpy as np
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN, HDBSCAN
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score


In [None]:
from DL_Datasets import *
from DL_Models import *
from DL_ModelTrainer import ModelTrainer

dataset = ImageDataset("heredenene")
model = PowerOf2s256andAbove()
mt = ModelTrainer(num_of_epochs=100, lr=0.001,
                  batch_size=16, loss_type="mse",
                  dataset=dataset, model=model,
                  ckpt_path="heredenene_PowerOf2s256andAbove_mse_05:16:19:01:53/min_loss:0.06544473022222519_epoch:19.pth")

features = mt()
paths = list(features.keys())
reps = np.array(list(features.values()))

In [None]:
def get_labels(method="", number_of_clusters=None, max_iter=200, DBSCAN_eps=0.5, DBSCAN_min_samples=5, HDBSCAN_min_cluster_size=5, HDBSCAN_max_cluster_size=None, verbose=0):
    if method == "":
        kmeans = KMeans(n_clusters=number_of_clusters, max_iter=max_iter, verbose=verbose)
        labels = kmeans.fit_predict(reps)
    elif method == "":
        agg_clustering = AgglomerativeClustering(n_clusters=number_of_clusters)
        labels = agg_clustering.fit_predict(reps)
    elif method == "":
        dbscan = DBSCAN(eps=DBSCAN_eps, min_samples=DBSCAN_min_samples)
        labels = dbscan.fit_predict(reps)
    elif method == "":
        gmm = GaussianMixture(n_components=number_of_clusters, max_iter=max_iter, verbose=verbose)
        labels = gmm.fit_predict(reps)
    elif method == "":
        hdb = HDBSCAN(min_cluster_size=HDBSCAN_min_cluster_size, max_cluster_size=HDBSCAN_max_cluster_size)
        labels = hdb.fit_predict(reps)
    else:
        pass

    return labels

In [None]:
def evaluate_cluster_metrics(data, max_k):
    silhouette_scores = []
    db_scores = []
    ch_scores = []
    inertias = []

    for k in range(2, max_k + 1):
        kmeans = KMeans(n_clusters=k)
        kmeans.fit(data)
        
        silhouette = silhouette_score(data, kmeans.labels_)
        db = davies_bouldin_score(data, kmeans.labels_)
        ch = calinski_harabasz_score(data, kmeans.labels_)
        inertia = kmeans.inertia_
        
        silhouette_scores.append(silhouette)
        db_scores.append(db)
        ch_scores.append(ch)
        inertias.append(inertia)

    plt.figure(figsize=(12, 12))

    plt.subplot(2, 2, 1)
    plt.plot(range(2, max_k + 1), inertias, marker='o')
    plt.xlabel('Number of clusters')
    plt.ylabel('Inertia')
    plt.title('Elbow Method for Optimal k')
    
    plt.subplot(2, 2, 2)
    plt.plot(range(2, max_k + 1), silhouette_scores, marker='o')
    plt.xlabel('Number of clusters')
    plt.ylabel('Silhouette Score')
    plt.title('Silhouette Score for Optimal k')

    plt.subplot(2, 2, 3)
    plt.plot(range(2, max_k + 1), db_scores, marker='o')
    plt.xlabel('Number of clusters')
    plt.ylabel('Davies-Bouldin Index')
    plt.title('Davies-Bouldin Index for Optimal k')

    plt.subplot(2, 2, 4)
    plt.plot(range(2, max_k + 1), ch_scores, marker='o')
    plt.xlabel('Number of clusters')
    plt.ylabel('Calinski-Harabasz Index')
    plt.title('Calinski-Harabasz Index for Optimal k')

    plt.tight_layout()
    plt.show()

# Example usage
evaluate_cluster_metrics(reps, 17)

In [None]:
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score

def evaluate_dbscan_metrics(data, eps, min_samples):
    scores = []

    for e in eps:
        for m in min_samples:
            dbscan = DBSCAN(eps=e, min_samples=m)
            labels = dbscan.fit_predict(data)
            
            # Exclude noise points (-1 label) from silhouette score calculation
            if len(set(labels)) > 1:  # Silhouette score requires at least 2 clusters
                score = silhouette_score(data, labels)
                scores.append((e, m, score))

    return scores

# Example usage
eps_values = [0.5, 1.0, 1.5]  # Range of epsilon values to try
min_samples_values = [5, 10, 15]  # Range of min_samples values to try

scores = evaluate_dbscan_metrics(reps, eps_values, min_samples_values)

for e, m, score in scores:
    print(f"Parameters: eps={e}, min_samples={m}, Silhouette Score: {score}")