In [1]:
import numpy as np

def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

def kmeans(X, k, max_iters=100):
    # Randomly initialize centroids
    centroids = X[np.random.choice(range(len(X)), k, replace=False)]

    for _ in range(max_iters):
        # Assign each data point to the nearest centroid
        labels = np.array([np.argmin([euclidean_distance(x, centroid) for centroid in centroids]) for x in X])
        
        # Update centroids
        new_centroids = np.array([X[labels == i].mean(axis=0) for i in range(k)])
        
        # If centroids have converged, break
        if np.allclose(centroids, new_centroids):
            break
        
        centroids = new_centroids

    return centroids, labels

# Example usage:
if __name__ == "__main__":
    # Example data points
    X = np.array([[1.0, 1.0], [1.5, 2.0], [3.0, 4.0], [5.0, 7.0], [3.5, 5.0], [4.5, 5.0],[3.5, 4.5]])
    
    # Number of clusters
    k = 3
    
    # Running K-means clustering
    centroids, labels = kmeans(X, k)
    print("Centroids:")
    print(centroids)
    print("Labels:")
    print(labels)

Centroids:
[[1.25       1.5       ]
 [4.75       6.        ]
 [3.33333333 4.5       ]]
Labels:
[0 0 2 1 2 1 2]
