In [3]:
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.metrics import silhouette_score
from sklearn.cluster import KMeans

# -----------------------------
# 1. Generate Synthetic Dataset
# -----------------------------
X, y_true = make_blobs(n_samples=300, centers=4, cluster_std=1.2, random_state=42)

# -----------------------------
# 2. Distance Function (Euclidean)
# -----------------------------
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

# -----------------------------
# 3. Initialize Centroids
# -----------------------------
def initialize_centroids(X, k):
    indices = np.random.choice(len(X), k, replace=False)
    return X[indices]

# -----------------------------
# 4. Assign Clusters
# -----------------------------
def assign_clusters(X, centroids):
    clusters = []
    for point in X:
        distances = [euclidean_distance(point, c) for c in centroids]
        clusters.append(np.argmin(distances))
    return np.array(clusters)

# -----------------------------
# 5. Update Centroids
# -----------------------------
def update_centroids(X, clusters, k):
    new_centroids = []
    for i in range(k):
        points = X[clusters == i]
        new_centroids.append(points.mean(axis=0))
    return np.array(new_centroids)

# -----------------------------
# 6. WCSS Calculation
# -----------------------------
def calculate_wcss(X, clusters, centroids):
    wcss = 0
    for i in range(len(X)):
        wcss += np.sum((X[i] - centroids[clusters[i]]) ** 2)
    return wcss

# -----------------------------
# 7. K-Means Algorithm
# -----------------------------
def kmeans(X, k, max_iters=100, tol=1e-4):
    centroids = initialize_centroids(X, k)

    for _ in range(max_iters):
        clusters = assign_clusters(X, centroids)
        new_centroids = update_centroids(X, clusters, k)

        if np.all(np.abs(new_centroids - centroids) < tol):
            break

        centroids = new_centroids

    return clusters, centroids

# -----------------------------
# 8. Run Algorithm Multiple Times
# -----------------------------
k = 4
results = []

for seed in [0, 10, 20]:
    np.random.seed(seed)
    clusters, centroids = kmeans(X, k)

    wcss = calculate_wcss(X, clusters, centroids)
    sil_score = silhouette_score(X, clusters)

    results.append((seed, wcss, sil_score))

# -----------------------------
# 9. Print Results
# -----------------------------
print("Custom K-Means Results:")
for r in results:
    print(f"Seed={r[0]}, WCSS={r[1]:.2f}, Silhouette={r[2]:.4f}")

# -----------------------------
# 10. Compare with sklearn
# -----------------------------
sk_kmeans = KMeans(n_clusters=4, random_state=42)
sk_labels = sk_kmeans.fit_predict(X)

sk_wcss = sk_kmeans.inertia_
sk_sil = silhouette_score(X, sk_labels)

print("\nSklearn K-Means Results:")
print(f"WCSS={sk_wcss:.2f}, Silhouette={sk_sil:.4f}")

Custom K-Means Results:
Seed=0, WCSS=2272.97, Silhouette=0.6049
Seed=10, WCSS=805.48, Silhouette=0.7518
Seed=20, WCSS=2273.53, Silhouette=0.6052

Sklearn K-Means Results:
WCSS=805.48, Silhouette=0.7518
