In [18]:
import numpy as np

class KMeans:
    def __init__(self, K, init):
        self.K = K
        self.centroids = np.array(init)

    def _euclidean_distance(self, a, b):
        return np.sqrt(np.sum((a - b) ** 2, axis=-1))

    def _update_centroids(self, X, labels):
        new_centroids = np.zeros_like(self.centroids)
        for k in range(self.K):
            new_centroids[k] = np.mean(X[labels == k], axis=0)
        return new_centroids

    def _assign_labels(self, X):
        distances = self._euclidean_distance(X[:, np.newaxis], self.centroids)
        return np.argmin(distances, axis=-1)

    def fit(self, X, tol=0.001, max_iter=300):
        for _ in range(max_iter):
            labels = self._assign_labels(X)
            new_centroids = self._update_centroids(X, labels)
            max_shift = np.max(self._euclidean_distance(self.centroids, new_centroids))
            self.centroids = new_centroids

            if max_shift <= tol:
                break

    def predict(self, X):
        return self._assign_labels(X)


In [29]:
import pandas as pd
from matplotlib import pyplot as plt

df = pd.read_csv("6.csv", index_col=None)
colors = ["red", "green", "blue", "black", "yellow"]
X = df[["a0", "a1"]].to_numpy()
y = df["class"].to_numpy()
n_clusters = np.unique(y).shape[0]
kmeans = KMeans(n_clusters, np.random.randn(n_clusters, 2)).fit(X)
print(KMeans.predict(n_clusters,X))
y_pred = kmeans.predict(X)

plt.figure(figsize=(8, 8))
for j in range(n_clusters):
  color = colors[j]
  res = X[y_pred == j, :]
  plt.scatter(res[:, 0], res[:, 1], c=color)

AttributeError: ignored