In [146]:
# Source : https://kenzotakahashi.github.io/k-means-clustering-from-scratch-in-python.html

In [147]:
import numpy as np

In [148]:
class KMeans(object):
    def __init__(self, n_clusters=8, max_iter=300, random_state=None):
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.random_state = random_state
    
    def fit(self, X):
        # Set random state
        if self.random_state:
            np.random.seed(self.random_state)

        # Set cluster centers to random values in X
        random_choice = np.random.choice(np.arange(0, X.shape[0]), size=self.n_clusters)
        self.cluster_centers_ = np.zeros((self.n_clusters, X.shape[1]))
        for i in range(self.n_clusters):
            self.cluster_centers_[i] = X[random_choice][i]
        
        # Training loop
        for _ in range(self.max_iter):
            self.labels_ = [self._nearest(self.cluster_centers_, x) for x in X]           
            
            indices = [[] for _ in range(self.n_clusters)]
            for idx, l in enumerate(self.labels_):
                indices[l].append(idx)
            
            X_by_cluster = [X[i] for i in indices]
            self.cluster_centers_ = [c.sum(axis=0) / len(c) for c in X_by_cluster]
        
        self.inertia_ = [sum(((self.cluster_centers_[l] - x)**2).sum() for x, l in zip(X, self.labels_))]
        return self
    def score(self, X):
        return -self.inertia_

    def _nearest(self, clusters, x):
        distances = []
        for c in clusters:
            distances.append(self._distance(x, c))
        return np.argmin(distances)
                             
    def _distance(self, a, b):
        return np.sqrt(np.sum((a - b)**2, axis=0))
    
    def predict(self, X):
        return self.labels_

    def transform(self, X):
        return [[self._distance(x, c) for c in self.cluster_centers_] for x in X]

    def fit_predict(self, X):
        return self.fit(X).predict(X)

    def fit_transform(self, X):
        return self.fit(X).transform(X)

In [149]:
X = np.array([[1,1],[1,2],[2,2],[4,5],[5,4]])

kmeans = KMeans(2, 5, 4).fit(X)