In [None]:
import numpy as np

def initialize_centroids(X, k):
    # Randomly choose k data points as initial centroids
    np.random.seed(42)
    indices = np.random.choice(X.shape[0], k, replace=False)
    centroids = X[indices]
    return centroids

def assign_clusters(X, centroids):
    # Assign each data point to the closest centroid
    distances = np.linalg.norm(X[:, np.newaxis] - centroids, axis=2)
    cluster_labels = np.argmin(distances, axis=1)
    return cluster_labels

def update_centroids(X, labels, k):
    # Recompute centroids as the mean of all points in each cluster
    centroids = np.array([X[labels == i].mean(axis=0) for i in range(k)])
    return centroids

def kmeans(X, k, max_iters=100):
    centroids = initialize_centroids(X, k)

    for _ in range(max_iters):
        # Step 1: Assign clusters
        labels = assign_clusters(X, centroids)

        # Step 2: Update centroids
        new_centroids = update_centroids(X, labels, k)

        # Stop if centroids do not change
        if np.all(centroids == new_centroids):
            break

        centroids = new_centroids

    return labels, centroids

# Example usage:
X = np.array([[1, 2], [1, 4], [1, 0],
              [4, 2], [4, 4], [4, 0]])

k = 2
labels, centroids = kmeans(X, k)
print(f"Cluster Labels: {labels}")
print(f"Centroids: {centroids}")


Cluster Labels: [0 1 0 0 1 0]
Centroids: [[2.5 1. ]
 [2.5 4. ]]
