In [18]:
# Kmeans 
from pprint import pprint 
from typing import *
from math import hypot, sqrt, fsum

In [32]:
# Function Needs for Kmeans 
# ------------------------------------

# mean(data)
# dist(point, point)
# assign_data(centroids, points)
# compute_centroids(groups)
# k_means(points)

In [19]:
points = [
    (10, 41, 23),
    (22, 30, 29),
    (11, 42, 5),
    (20, 32, 4),
    (12, 40, 12),
    (21, 36, 23),
]

pprint(points)

[(10, 41, 23),
 (22, 30, 29),
 (11, 42, 5),
 (20, 32, 4),
 (12, 40, 12),
 (21, 36, 23)]


In [20]:

def mean(data: Iterable[float]) -> float:
    from math import fsum
    "Accurate arithmetic mean"
    data = list(data)
    return fsum(data) / len(data)

mean([10, 20, 61])


30.333333333333332

In [31]:
# Make an alias of points
Point = Tuple[int, ...]


# Optimization go from global import to local imports
# by adding sqrt=sqrt, fsum=fsum, zip=zip 
def dist(p: Point, q: Point, sqrt=sqrt, fsum=fsum, zip=zip) -> float:
    "Ecuclidean distance function for multi-dimensional data"
    return sqrt(fsum([(x - y)**2 for x, y in zip(p, q)]))


p = (10,20,30)
q = (10,20,35)


dist(p,q)

5.0

In [30]:
for point in points:
    print(point, dist(point, (9, 39, 20)))

(10, 41, 23) 3.7416573867739413
(22, 30, 29) 18.193405398660254
(11, 42, 5) 15.427248620541512
(20, 32, 4) 20.639767440550294
(12, 40, 12) 8.602325267042627
(21, 36, 23) 12.727922061357855


In [26]:
dis(dist)

  5           0 LOAD_FAST                2 (sqrt)
              2 LOAD_FAST                3 (fsum)
              4 LOAD_CONST               1 (<code object <listcomp> at 0x00000133FBAF0660, file "C:\Users\muj\AppData\Local\Temp\ipykernel_25568\2937151640.py", line 5>)
              6 LOAD_CONST               2 ('dist.<locals>.<listcomp>')
              8 MAKE_FUNCTION            0
             10 LOAD_FAST                4 (zip)
             12 LOAD_FAST                0 (p)
             14 LOAD_FAST                1 (q)
             16 CALL_FUNCTION            2
             18 GET_ITER
             20 CALL_FUNCTION            1
             22 CALL_FUNCTION            1
             24 CALL_FUNCTION            1
             26 RETURN_VALUE

Disassembly of <code object <listcomp> at 0x00000133FBAF0660, file "C:\Users\muj\AppData\Local\Temp\ipykernel_25568\2937151640.py", line 5>:
  5           0 BUILD_LIST               0
              2 LOAD_FAST                0 (.0)
        >>   

In [24]:
from  dis import dis 

dis(dist)

  2           0 LOAD_GLOBAL              0 (sqrt)
              2 LOAD_GLOBAL              1 (fsum)
              4 LOAD_CONST               1 (<code object <listcomp> at 0x0000013380FEBB50, file "C:\Users\muj\AppData\Local\Temp\ipykernel_25568\2912106024.py", line 2>)
              6 LOAD_CONST               2 ('dist.<locals>.<listcomp>')
              8 MAKE_FUNCTION            0
             10 LOAD_GLOBAL              2 (zip)
             12 LOAD_FAST                0 (p)
             14 LOAD_FAST                1 (q)
             16 CALL_FUNCTION            2
             18 GET_ITER
             20 CALL_FUNCTION            1
             22 CALL_FUNCTION            1
             24 CALL_FUNCTION            1
             26 RETURN_VALUE

Disassembly of <code object <listcomp> at 0x0000013380FEBB50, file "C:\Users\muj\AppData\Local\Temp\ipykernel_25568\2912106024.py", line 2>:
  2           0 BUILD_LIST               0
              2 LOAD_FAST                0 (.0)
        >>   

In [None]:
from collections import defaultdict
from functools import partial

def assign_data(centroids, data):
    d = defaultdict(list)
    for point in data:
        closest_centroid = min(centroids, key=partial(dist, point))
        d[closest_centroid].append(point)
    return d


def compute_centroids(groups):
    "Compute the centroid of each group"
    return [tuple(map(mean, zip(*group))) for group in groups]


centroids = [(9, 39, 20), (12, 36, 25)]
pprint(assign_data(centroids, points), width=45)


defaultdict(<class 'list'>,
            {(9, 39, 20): [(10, 41, 23),
                           (11, 42, 5),
                           (20, 32, 4),
                           (12, 40, 12)],
             (12, 36, 25): [(22, 30, 29),
                            (21, 36, 23)]})


In [None]:
# partial means partial function evaluation
from functools import partial

# First way: min(centroids, key=lambda centroid: dist(point, centroid))
# Second way: min(centroids, key=partial(dist, point))

