In [7]:
import math

In [76]:
class Point(object):
    def __init__(self, name=None, coords=None):
        self.name = name
        self.coords = coords
    
    @property
    def x(self):
        return self.coords[0]
    
    @property
    def y(self):
        return self.coords[1]

In [140]:
class Cluster(object):
    def __init__(self, name=None, center=None):
        self.name = name
        self.center = center
        self.members = []
        
    def recalculate_center(self):
        if not self.members:
            raise ValueError("No members; cannot recalculate center")
        total_x = 0.0
        total_y = 0.0
        for member in self.members:
            total_x += member.x
            total_y += member.y
        num_members = len(self.members)
        self.center = Point(coords=(total_x/num_members*1., total_y/num_members*1.))
        
    def calc_distance(self, p):
        d = calc_dist(self.center, p)
        #print("d=%.2f" % (d))
        return d
    
    def add_point(self, p):
        self.members.append(p)
        print("Putting %s (%.2f, %.2f) in cluster %s" % (p.name, p.x, p.y, self.name))
        
    def print_members(self):
        print("%s: %s" % (self.name, [m.name for m in self.members]))

In [72]:
def calc_dist(p1, p2):
    return math.sqrt((p1.x-p2.x)**2 + (p1.y-p2.y)**2)

In [125]:
def cluster_observations(clusters, observations):
    for c in clusters:
        c.members = []
    for o in observations:
        min_dist = None
        cluster_idx = None
        #print("Observation %s (%.2f, %.2f)" % (o.name, o.x, o.y))
        for i, c in enumerate(clusters):
            if min_dist is None:
                min_dist = c.calc_distance(o)
                cluster_idx = i
            else:
                d = c.calc_distance(o)
                if d  < min_dist:
                    min_dist = d
                    cluster_idx = i
            #print("  Min dist = %.2f, cluster = %d" % (min_dist, cluster_idx))
        clusters[cluster_idx].add_point(o)

In [99]:
def recalculate_centers(clusters):
    for c in clusters:
        c.recalculate_center()
        print("Updated center for cluster %s: (%.2f, %.2f)" % (c.name, c.center.x, c.center.y))

In [141]:
points = [Point(name='P1', coords=(-1.88,2.05)), 
          Point(name='P2', coords=(-0.71, 0.42)), 
          Point(name='P3', coords=(2.41,-0.67)), 
          Point(name='P4', coords=(1.85, -3.8)), 
          Point(name='P5', coords=(-3.69, -1.33))]

clusters = [Cluster(name='C1', center=Point(coords=(2, 2))), 
            Cluster(name='C2', center=Point(coords=(-2, -2)))]

In [142]:
for i in range(1,4):
    print
    print("ITERATION %d" % (i))
    cluster_observations(clusters, points)
    recalculate_centers(clusters)
    for c in clusters:
        c.print_members()


ITERATION 1
Putting P1 (-1.88, 2.05) in cluster C1
Putting P2 (-0.71, 0.42) in cluster C2
Putting P3 (2.41, -0.67) in cluster C1
Putting P4 (1.85, -3.80) in cluster C2
Putting P5 (-3.69, -1.33) in cluster C2
Updated center for cluster C1: (0.27, 0.69)
Updated center for cluster C2: (-0.85, -1.57)
C1: ['P1', 'P3']
C2: ['P2', 'P4', 'P5']

ITERATION 2
Putting P1 (-1.88, 2.05) in cluster C1
Putting P2 (-0.71, 0.42) in cluster C1
Putting P3 (2.41, -0.67) in cluster C1
Putting P4 (1.85, -3.80) in cluster C2
Putting P5 (-3.69, -1.33) in cluster C2
Updated center for cluster C1: (-0.06, 0.60)
Updated center for cluster C2: (-0.92, -2.56)
C1: ['P1', 'P2', 'P3']
C2: ['P4', 'P5']

ITERATION 3
Putting P1 (-1.88, 2.05) in cluster C1
Putting P2 (-0.71, 0.42) in cluster C1
Putting P3 (2.41, -0.67) in cluster C1
Putting P4 (1.85, -3.80) in cluster C2
Putting P5 (-3.69, -1.33) in cluster C2
Updated center for cluster C1: (-0.06, 0.60)
Updated center for cluster C2: (-0.92, -2.56)
C1: ['P1', 'P2', 'P3'