In [13]:
import random
import math

random.seed(0)

def kmeans(data, k):
    # Initialize centroids randomly
    centroids = random.sample(data, k)

    # Initialize empty clusters
    clusters = [[] for _ in range(k)]

    # Loop until convergence
    while True:
        # Assign each data point to the closest centroid
        for point in data:
            distances = [euclidean_distance(point, centroid) for centroid in centroids]
            cluster_index = distances.index(min(distances))
            clusters[cluster_index].append(point)

        # Save the old centroids
        old_centroids = centroids

        # Update the centroids
        for i, cluster in enumerate(clusters):
            centroids[i] = find_centroid(cluster)

        # If the centroids haven't changed, we have converged
        if old_centroids == centroids:
            break

    return clusters, centroids


def euclidean_distance(p1, p2):
    # Calculate the Euclidean distance between two points
    return math.sqrt(sum([(a - b) ** 2 for a, b in zip(p1, p2)]))


def mean(points):
    # Calculate the mean of a list of points
    return sum(points) / len(points)

def find_centroid(points):
  # Initialize variables to keep track of the minimum distance and the centroid
  min_distance = float('inf')
  centroid = None

  # Iterate over each point and calculate the sum of distances to all other points
  for point in points:
    distance = 0
    for other_point in points:
      distance += euclidean_distance(point, other_point)
    # If the distance is smaller than the current minimum, update the minimum and the centroid
    if distance < min_distance:
      min_distance = distance
      centroid = point

  return centroid


In [14]:
data = ((0,1,1),(1,1,0),(2,2,0),(0,1,2),(2,2,2),(2,3,0))
k = 3

clusters, centroids = kmeans(data, k)
for cluster, centroid in zip(clusters, centroids):
    print("Cluster: {}, Centroid: {}".format(cluster, centroid))

Cluster: [(0, 1, 2), (2, 2, 2)], Centroid: (0, 1, 2)
Cluster: [(2, 2, 0), (2, 3, 0)], Centroid: (2, 2, 0)
Cluster: [(0, 1, 1), (1, 1, 0)], Centroid: (0, 1, 1)
