In [26]:
import numpy as np

class KMeans:
    def __init__(self, n_clusters, max_iter=100, random_state=None):
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.random_state = random_state
        self.centroids = None

    def initialize_centroids(self, X):
        if self.random_state:
            np.random.seed(self.random_state)
        self.centroids = X[np.random.choice(len(X), self.n_clusters, replace=False)]

    def compute_distances(self, X):
        distances = np.zeros((len(X), self.n_clusters))
        for i, point in enumerate(X):
            for j, centroid in enumerate(self.centroids):
                distances[i, j] = np.linalg.norm(point - centroid)
        return distances

    def assign_clusters(self, distances):
        return np.argmin(distances, axis=1)
    
    def compute_dist_and_assing_cluster(self, X):
        distances = np.zeros((len(X), self.n_clusters))
        for i, point in enumerate(X):
            for j, centroid in enumerate(self.centroids):
                distances[i, j] = np.linalg.norm(point - centroid)
        return np.argmin(distances, axis=1)

    def update_centroids(self, X, labels):
        centroids = np.zeros((self.n_clusters, X.shape[1]))
        for k in range(self.n_clusters):
            centroids[k] = np.mean(X[labels == k], axis=0) # take the mean of points with label = k (centroid)
        return centroids

    def fit(self, X):
        self.initialize_centroids(X)
        for _ in range(self.max_iter):
            distances = self.compute_distances(X)
            labels = self.assign_clusters(distances)
            new_centroids = self.update_centroids(X, labels)
            if np.all(self.centroids == new_centroids): 
                break
            self.centroids = new_centroids
        return self
    
    def _fit(self, X):
        self.initialize_centroids(X)
        for _ in range(self.max_iter):
            labels = self.compute_dist_and_assing_cluster(X)
            new_centroids = self.update_centroids(X, labels)
            if np.array_equal(self.centroids,new_centroids):  # early exit
                break
            self.centroids = new_centroids           

    def predict(self, X):
        distances = self.compute_distances(X)
        return self.assign_clusters(distances)

    def _predict(self, X):            
            return self.compute_dist_and_assing_cluster(X)

In [27]:
# Generate some random data
X = np.random.rand(100, 2)

# Initialize the K-Means model
kmeans = KMeans(n_clusters=3, random_state=42)

# Fit the model to the data
kmeans.fit(X)

# Predict the cluster labels for new data
labels = kmeans.predict(X)

In [28]:
labels

array([1, 0, 0, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 0, 1, 1, 1, 0, 2, 2,
       1, 2, 1, 1, 2, 2, 1, 0, 2, 2, 2, 2, 1, 1, 1, 2, 1, 0, 1, 2, 2, 0,
       0, 1, 2, 1, 2, 1, 2, 0, 1, 1, 2, 0, 1, 2, 1, 1, 2, 1, 0, 1, 1, 1,
       1, 0, 2, 1, 2, 2, 1, 0, 1, 0, 1, 1, 2, 1, 0, 1, 0, 0, 1, 1, 1, 1,
       2, 2, 2, 1, 0, 1, 2, 2, 0, 0, 2, 1])

In [32]:
# Generate some random data
#X = np.random.rand(100, 2)

# This example uses the same X as above

# Initialize the K-Means model
kmeans = KMeans(n_clusters=5, random_state=42)

# Fit the model to the data
kmeans._fit(X)

# Predict the cluster labels for new data
_labels = kmeans._predict(X)

In [33]:
_labels

array([1, 0, 0, 3, 2, 1, 0, 1, 1, 3, 2, 1, 2, 1, 1, 4, 0, 1, 3, 0, 2, 2,
       1, 2, 1, 1, 2, 2, 0, 4, 2, 1, 2, 2, 3, 1, 1, 2, 1, 0, 1, 4, 2, 0,
       0, 3, 2, 3, 2, 3, 2, 0, 1, 3, 1, 4, 1, 2, 3, 3, 2, 3, 0, 3, 3, 0,
       1, 4, 2, 1, 2, 2, 1, 4, 0, 4, 3, 1, 2, 0, 4, 1, 4, 0, 1, 3, 1, 3,
       2, 2, 2, 1, 4, 1, 2, 2, 4, 0, 2, 1])

In [31]:
assert np.array_equal(labels, _labels)