In [None]:
from sklearn.datasets import make_blobs
import numpy as np
from scipy.spatial.distance import euclidean

# ---------------- KMeans Clustering ---------------- #
class KMeans:
    def __init__(self, k=3, max_iters=100, tol=1e-4):
        self.k = k
        self.max_iters = max_iters
        self.tol = tol
        self.centroids = None

    def fit(self, X):
        n_samples, n_features = X.shape
        random_indices = np.random.choice(n_samples, self.k, replace=False)
        self.centroids = X[random_indices]

        for _ in range(self.max_iters):
            clusters = self._create_clusters(X)
            old_centroids = self.centroids.copy()
            self.centroids = self._calculate_centroids(X, clusters)
            if self._is_converged(old_centroids, self.centroids):
                break

        self.labels_ = self._get_labels(X)

    def _create_clusters(self, X):
        clusters = [[] for _ in range(self.k)]
        for idx, x in enumerate(X):
            cluster_idx = self._closest_centroid(x)
            clusters[cluster_idx].append(idx)
        return clusters

    def _closest_centroid(self, x):
        distances = np.linalg.norm(x - self.centroids, axis=1)
        return np.argmin(distances)

    def _calculate_centroids(self, X, clusters):
        centroids = np.zeros((self.k, X.shape[1]))
        for idx, cluster in enumerate(clusters):
            if cluster:
                centroids[idx] = np.mean(X[cluster], axis=0)
        return centroids

    def _is_converged(self, old_centroids, new_centroids):
        distances = np.linalg.norm(old_centroids - new_centroids, axis=1)
        return np.all(distances < self.tol)

    def _get_labels(self, X):
        return np.array([self._closest_centroid(x) for x in X])


# ---------------- Agglomerative Clustering ---------------- #
class AgglomerativeClustering:
    def __init__(self, n_clusters=2):
        self.n_clusters = n_clusters

    def fit(self, X):
        clusters = [[i] for i in range(len(X))]

        while len(clusters) > self.n_clusters:
            min_dist = float("inf")
            to_merge = (0, 1)

            for i in range(len(clusters)):
                for j in range(i + 1, len(clusters)):
                    dist = self._min_cluster_distance(X, clusters[i], clusters[j])
                    if dist < min_dist:
                        min_dist = dist
                        to_merge = (i, j)

            i, j = to_merge
            clusters[i].extend(clusters[j])
            del clusters[j]

        self.labels_ = np.empty(len(X), dtype=int)
        for cluster_idx, cluster in enumerate(clusters):
            for sample_idx in cluster:
                self.labels_[sample_idx] = cluster_idx

    def _min_cluster_distance(self, X, cluster1, cluster2):
        return min(euclidean(X[i], X[j]) for i in cluster1 for j in cluster2)






In [8]:
# Create synthetic dataset
X, y_true = make_blobs(n_samples=20, centers=3, random_state=42)

# K-Means
kmeans = KMeans(k=3)
kmeans.fit(X)

# Agglomerative
agg = AgglomerativeClustering(n_clusters=3)
agg.fit(X)

# Output
print("KMeans cluster labels:")
print(kmeans.labels_)

print("\nAgglomerative cluster labels:")
print(agg.labels_)

KMeans cluster labels:
[0 0 0 0 1 2 0 1 1 1 2 1 2 0 2 0 2 2 1 1]

Agglomerative cluster labels:
[0 0 0 0 1 2 0 1 1 1 2 1 2 0 2 0 2 2 1 1]
