In [4]:
import numpy as np

def initialize_centroids(X, k):
    # Randomly choose k data points as initial centroids
    indices = np.random.choice(X.shape[0], k, replace=False)
    return X[indices]

def assign_clusters(X, centroids):
    # Assign each data point to the nearest centroid
    distances = np.linalg.norm(X[:, np.newaxis] - centroids, axis=2)
    return np.argmin(distances, axis=1)

def update_centroids(X, labels, k):
    # Calculate new centroids as the mean of data points in each cluster
    # print(X[labels == i].mean(axis=0)
    centroids = np.array([X[labels == i].mean(axis=0) for i in range(k)])
    print("centroids: ", centroids.shape)
    return centroids

def kmeans(X, k, max_iters=100, tol=1e-4):
    centroids = initialize_centroids(X, k)
    for i in range(max_iters):
        old_centroids = centroids
        labels = assign_clusters(X, centroids)
        centroids = update_centroids(X, labels, k)
        # Check for convergence
        if np.all(np.linalg.norm(centroids - old_centroids, axis=1) < tol):
            break
    return centroids, labels

# Example usage
if __name__ == "__main__":
    # Sample data
    X = np.array([[1.0, 2.0], [1.5, 1.8], [5.0, 8.0], [8.0, 8.0], [1.0, 0.6], [9.0, 11.0]])

    # Perform K-Means clustering
    k = 2
    centroids, labels = kmeans(X, k)

    print("Centroids:")
    print(centroids)
    print("Labels:")
    print(labels)


centroids:  (2, 2)
centroids:  (2, 2)
centroids:  (2, 2)
Centroids:
[[1.16666667 1.46666667]
 [7.33333333 9.        ]]
Labels:
[0 0 1 1 0 1]


In [11]:
arr = np.arange(0, 20)
arr = arr[::-1]
arr
# np.argmin(distances, axis=1)

array([19, 18, 17, 16, 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,
        2,  1,  0])

In [15]:
# arr[np.newaxis, :]
n = arr.reshape(4, 5)
np.max(n, axis=0)

array([19, 18, 17, 16, 15])

In [19]:
np.argmin(n, axis=0)

array([3, 3, 3, 3, 3])

In [21]:
n.squeeze()

array([[19, 18, 17, 16, 15],
       [14, 13, 12, 11, 10],
       [ 9,  8,  7,  6,  5],
       [ 4,  3,  2,  1,  0]])