## 📦 Q9. Implement K-Means clustering from scratch using only NumPy for a 2D dataset. No plotting or external libraries allowed.
Goal: Assign each data point to the nearest centroid and iteratively update centroids until convergence.
**np.random.seed(42)
cluster1 = np.random.randn(100, 2) + [5, 5]
cluster2 = np.random.randn(100, 2) + [0, 0]
data = np.vstack((cluster1, cluster2))**

In [2]:
import numpy as np

In [3]:
np.random.seed(42)
cluster1 = np.random.randn(100, 2) + [5, 5]
cluster2 = np.random.randn(100, 2) + [0, 0]
data = np.vstack((cluster1, cluster2))

In [4]:
def initialize_centroids(data, k):
    indices = np.random.choice(data.shape[0], size=k, replace=False)
    return data[indices]

In [5]:
def assign_clusters(data, centroids):
    distances = np.linalg.norm(data[:, np.newaxis] - centroids, axis=2)
    return np.argmin(distances, axis=1)

In [6]:
def update_centroids(data, labels, k):
    return np.array([data[labels == i].mean(axis=0) for i in range(k)])

In [7]:
def k_means_numpy(data, k=2, max_iters=100, tol=1e-4):
    centroids = initialize_centroids(data, k)
    for _ in range(max_iters):
        old_centroids = centroids.copy()
        labels = assign_clusters(data, centroids)
        centroids = update_centroids(data, labels, k)
        if np.allclose(old_centroids, centroids, atol=tol):
            break
    return centroids, labels


In [8]:
final_centroids, cluster_labels = k_means_numpy(data, k=2)

In [9]:
print("Final Centroids:\n", final_centroids)
print("\nCluster Labels (first 10):\n", cluster_labels[:10])

Final Centroids:
 [[0.12824872 0.04348765]
 [4.88443575 5.03402232]]

Cluster Labels (first 10):
 [1 1 1 1 1 1 1 1 1 1]
