In [None]:
import numpy as np

def kmeans(X, k, max_iters=100):
    # Step 1: Initialize centroids randomly
    centroids = X[np.random.choice(len(X), k, replace=False)]

    for _ in range(max_iters):
        clusters = [[] for _ in range(k)]

        # Step 2: Assign clusters
        for x in X:
            distances = [np.linalg.norm(x - centroid) for centroid in centroids]
            closest_centroid = np.argmin(distances)
            clusters[closest_centroid].append(x)

        new_centroids = []
        for cluster in clusters:
            # Step 3: Update centroids
            new_centroid = np.mean(cluster, axis=0)
            new_centroids.append(new_centroid)

        # Check for convergence
        if np.array_equal(centroids, new_centroids):
            break

        centroids = new_centroids

    return np.array(centroids), np.array([np.argmin([np.linalg.norm(x - c) for c in centroids]) for x in X])

# Example usage
# X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
# k = 2
# centroids, assignments = kmeans(X, k)


In [None]:
X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
y = np.random.choice(len(X), 3, replace=False)

In [None]:
X[y]

array([[ 1,  2],
       [ 1,  4],
       [10,  4]])

In [None]:
data = np.array([[1,2],[3,4],[5,6],[7,8],[8,9],[10,11],[12,13],[14,15]])
#inital centroids
# 3 clusters
randomselections = np.random.choice(len(data),3,replace = False)
centroids = data[randomselections]
print(centroids)

[[5 6]
 [7 8]
 [8 9]]


In [None]:
newcentroids =[]
max_iter = 20

for _ in range(max_iter):
  clusters = [[] for _ in range(3)]
  for x in data:
    distances = [np.linalg.norm(x - centroid) for centroid in centroids]
    closest = np.argmin(distances)
    clusters[closest].append(x)
  newcentroids = []
  for cluster in clusters:
    newcent = np.mean(cluster)
    newcentroids.append(newcent)
  centroids = newcentroids


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [None]:
import numpy as np
from collections import Counter

def euclidean_distance(point1, point2):
    return np.sqrt(np.sum((point1 - point2) ** 2))

class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)

    def _predict(self, x):
        # Compute distances between x and all examples in the training set
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]

        # Sort by distance and return indices of the first k neighbors
        k_indices = np.argsort(distances)[:self.k]

        # Extract the labels of the k nearest neighbor training samples
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        # Return the most common class label
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]

# Example usage
if __name__ == "__main__":
    from sklearn import datasets
    from sklearn.model_selection import train_test_split

    # Load dataset
    iris = datasets.load_iris()
    X, y = iris.data, iris.target

    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

    # Create and train the classifier
    clf = KNN(k=3)
    clf.fit(X_train, y_train)

    # Make predictions
    predictions = clf.predict(X_test)

    # Calculate accuracy
    accuracy = np.mean(predictions == y_test)
    print(f"Accuracy: {accuracy}")
