In [1]:
from pprint import pprint
from typing import *

In [2]:
from functools import partial
from collections import defaultdict
from math import fsum, sqrt
from random import sample

In [3]:
Point = Tuple[int, ...]
Centroid = Point

In [4]:
def mean(data: Iterable[float]) -> float:
    'Accurate arithmetic mean'
    data = list(data)
    return fsum(data) / len(data)

fsum, sqrt, zip as arguments so that can be loaded locally in the function -> faster than load globally. 

Use ```from dis import dis; dis(dist);``` to see local/global

In [5]:
def dist(p: Point, q: Point, fsum=fsum, sqrt=sqrt, zip=zip) -> float:
    'Euclidean distance function for multi-dimensional data'
    return sqrt(fsum([(x - y) ** 2 for x, y in zip(p, q)]))

In [6]:
def assign_data(centroids: Sequence[Point], data: Iterable[Point]) -> Dict[Centroid, List[Point]]:
    'Group the data points to the closest centroid'
    d = defaultdict(list)
    for point in data:
        closest_centroid = min(centroids, key=partial(dist, point))
        d[closest_centroid].append(point)
    return dict(d)

In [7]:
def compute_centroids(groups: Iterable[Sequence[Point]]) -> List[Centroid]:
    'Compute the centroid of each group'
    return [tuple(map(mean, zip(*group))) for group in groups]

In [25]:
def k_means(data: Iterable[Point], k: int=2, iterations: int=50) -> List[Centroid]:
    data = list(data)
    centroids = sample(data, k)
    for i in range(iterations):
        labeled = assign_data(centroids, data)
        centroids = compute_centroids(labeled.values())
    return centroids

In [39]:
points = [
    (10, 41, 23),
    (22, 30, 29),
    (11, 42, 5),
    (20, 32, 4),
    (12, 40, 12),
    (21, 36, 23),
]

centroids = k_means(points, k=3)

d = assign_data(centroids, points)

In [44]:
pprint(d, width=60)

{(10.0, 41.0, 23.0): [(10, 41, 23)],
 (14.333333333333334, 38.0, 7.0): [(11, 42, 5),
                                   (20, 32, 4),
                                   (12, 40, 12)],
 (21.5, 33.0, 26.0): [(22, 30, 29), (21, 36, 23)]}
