In [None]:
import math
import random
from typing import List

In [None]:
class KNN:
    def __init__(self, data:List[List[int]], k: int):
        self.data = data
        self.k = k
        self.clusters ={i: { 'points': []} for i in random.sample(range(len(self.data)), self.k)}
        self.initialize()
        self.train()

    def initialize(self):
        self.assign_points_to_centroids()

    def train(self):
        for i in range(10):
            self.find_new_centroids()
            self.assign_points_to_centroids()


    def assign_points_to_centroids(self):
        for i in range(len(self.data)):
            if i not in self.clusters:
                point = self.data[i]
                closest_centroid = None
                closed_centroid_distance = float('inf')
                for centroid_index in self.clusters:
                    centroid = self.data[centroid_index]
                    euclindean_distance = self._get_euclidean_distance(point, centroid)
                    if euclindean_distance < closed_centroid_distance:
                        closed_centroid_distance = euclindean_distance
                        closest_centroid = centroid_index

                self.clusters[closest_centroid]['points'].append(i)
    
                
    def _get_euclidean_distance(self, point: List[int], centroid: List[int] ) -> int:
        return math.sqrt(sum((point[i] - centroid[i]) ** 2 for i in range(len(point))))   


    def find_new_centroids(self):
        new_centroids = {}

        for i in self.clusters:
            points = [self.data[i] for i in self.clusters[i]['points']]
            centroid_index = self.find_centroid_index(points)
            new_centroids[centroid_index] = { 'points': []}

        self.clusters = new_centroids
        

    def find_centroid_index(self, points):
        dimension_count = len(points[0]) 
        mean_coords = [sum(point[dim] for point in points) / len(points) for dim in range(dimension_count)]
        
        def euclidean_distance(point):
            return math.sqrt(sum((point[dim] - mean_coords[dim]) ** 2 for dim in range(dimension_count)))
        
        closest_index = min(range(len(points)), key=lambda i: euclidean_distance(points[i]))
        return closest_index



In [None]:
def generate_data():
    outer_length = 100 
    inner_length = 5   
    
    return [[random.randint(0, 100) for _ in range(inner_length)] for _ in range(outer_length)]


In [None]:
data = generate_data()
k = 5


In [None]:

knn = KNN(data, k)