In [2]:
import numpy as np

class KMeans:
    def __init__(self, n_clusters, max_iter=100):
        self.n_clusters = n_clusters
        self.max_iter = max_iter
    
    def fit(self, X):
        self.n_samples, self.n_features = X.shape
        
        # Initialize cluster centroids
        self.centroids = self.initialize_centroids(X)
        
        # Iterate until convergence or max iterations reached
        for _ in range(self.max_iter):
            # Assign data points to clusters
            labels = self.assign_clusters(X)
            
            # Update centroids
            new_centroids = self.update_centroids(X, labels)
            
            # Check convergence
            if np.allclose(self.centroids, new_centroids):
                break
            
            self.centroids = new_centroids
    
    def predict(self, X):
        labels = self.assign_clusters(X)
        return labels
    
    def initialize_centroids(self, X):
        random_indices = np.random.choice(self.n_samples, self.n_clusters, replace=False)
        centroids = X[random_indices]
        return centroids
    
    def assign_clusters(self, X):
        labels = []
        for sample in X:
            distances = np.linalg.norm(sample - self.centroids, axis=1)
            cluster_index = np.argmin(distances)
            labels.append(cluster_index)
        return np.array(labels)
    
    def update_centroids(self, X, labels):
        new_centroids = []
        for cluster_index in range(self.n_clusters):
            cluster_points = X[labels == cluster_index]
            if len(cluster_points) > 0:
                centroid = np.mean(cluster_points, axis=0)
            else:
                centroid = self.centroids[cluster_index]
            new_centroids.append(centroid)
        return np.array(new_centroids)
