In [98]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from math import sqrt, pow

In [99]:
coords = [
    {'label' : 'A1', 'x' : 2, 'y' : 10},
    {'label' : 'A2', 'x' : 2, 'y' : 5},
    {'label' : 'A3', 'x' : 8, 'y' : 4},
    {'label' : 'B1', 'x' : 5, 'y' : 8},
    {'label' : 'B2', 'x' : 7, 'y' : 5},
    {'label' : 'B3', 'x' : 6, 'y' : 4},
    {'label' : 'C1', 'x' : 1, 'y' : 2},
    {'label' : 'C2', 'x' : 4, 'y' : 9},
]

In [100]:
initailCenteriods = [
    (2, 10),
    (5, 8),
    (1, 2)
]

In [101]:
def euclideanDistance(x1, y1, x2, y2):
    return sqrt(pow(x1 - x2, 2) + pow(y1 - y2, 2))

In [102]:
df = pd.DataFrame(coords)
df

# Calculate distance between each point and centeriods
for centeriod in initailCenteriods:
    label = str(centeriod)
    df[label] = df.apply(lambda row: euclideanDistance(row['x'], row['y'], centeriod[0], centeriod[1]), axis=1)

# Mark the closest centeriod
df['closest'] = df[[str(centeriod) for centeriod in initailCenteriods]].idxmin(axis=1)

df

Unnamed: 0,label,x,y,"(2, 10)","(5, 8)","(1, 2)",closest
0,A1,2,10,0.0,3.605551,8.062258,"(2, 10)"
1,A2,2,5,5.0,4.242641,3.162278,"(1, 2)"
2,A3,8,4,8.485281,5.0,7.28011,"(5, 8)"
3,B1,5,8,3.605551,0.0,7.211103,"(5, 8)"
4,B2,7,5,7.071068,3.605551,6.708204,"(5, 8)"
5,B3,6,4,7.211103,4.123106,5.385165,"(5, 8)"
6,C1,1,2,8.062258,7.211103,0.0,"(1, 2)"
7,C2,4,9,2.236068,1.414214,7.615773,"(5, 8)"


In [103]:
# Calculate new centeriods by taking the average of the assigned points
cluster = df.groupby('closest')

for key, group in cluster:
    print(key)
    print(group)
    print("New centeriod: ")
    print(group[['x', 'y']].mean())
    print()

(1, 2)
  label  x  y   (2, 10)    (5, 8)    (1, 2) closest
1    A2  2  5  5.000000  4.242641  3.162278  (1, 2)
6    C1  1  2  8.062258  7.211103  0.000000  (1, 2)
New centeriod: 
x    1.5
y    3.5
dtype: float64

(2, 10)
  label  x   y  (2, 10)    (5, 8)    (1, 2)  closest
0    A1  2  10      0.0  3.605551  8.062258  (2, 10)
New centeriod: 
x     2.0
y    10.0
dtype: float64

(5, 8)
  label  x  y   (2, 10)    (5, 8)    (1, 2) closest
2    A3  8  4  8.485281  5.000000  7.280110  (5, 8)
3    B1  5  8  3.605551  0.000000  7.211103  (5, 8)
4    B2  7  5  7.071068  3.605551  6.708204  (5, 8)
5    B3  6  4  7.211103  4.123106  5.385165  (5, 8)
7    C2  4  9  2.236068  1.414214  7.615773  (5, 8)
New centeriod: 
x    6.0
y    6.0
dtype: float64



In [104]:
newCenteriods = cluster[['x', 'y']].mean().values.tolist()
print(newCenteriods)

[[1.5, 3.5], [2.0, 10.0], [6.0, 6.0]]


In [105]:
# Remove old centeriods
for centeriod in initailCenteriods:
    label = str(centeriod)
    del df[label]

# Add new centeriods
for centeriod in newCenteriods:
    label = str(centeriod)
    df[label] = df.apply(lambda row: euclideanDistance(row['x'], row['y'], centeriod[0], centeriod[1]), axis=1)

df['closest'] = df[[str(centeriod) for centeriod in newCenteriods]].idxmin(axis=1)
df

Unnamed: 0,label,x,y,closest,"[1.5, 3.5]","[2.0, 10.0]","[6.0, 6.0]"
0,A1,2,10,"[2.0, 10.0]",6.519202,0.0,5.656854
1,A2,2,5,"[1.5, 3.5]",1.581139,5.0,4.123106
2,A3,8,4,"[6.0, 6.0]",6.519202,8.485281,2.828427
3,B1,5,8,"[6.0, 6.0]",5.700877,3.605551,2.236068
4,B2,7,5,"[6.0, 6.0]",5.700877,7.071068,1.414214
5,B3,6,4,"[6.0, 6.0]",4.527693,7.211103,2.0
6,C1,1,2,"[1.5, 3.5]",1.581139,8.062258,6.403124
7,C2,4,9,"[2.0, 10.0]",6.041523,2.236068,3.605551


In [106]:
# Calculate new centeriods by taking the average of the assigned points
cluster = df.groupby('closest')

for key, group in cluster:
    print(key)
    print(group)
    print("New centeriod: ")
    print(group[['x', 'y']].mean())
    print()

# Remove old centeriods
for centeriod in newCenteriods:
    label = str(centeriod)
    del df[label]

[1.5, 3.5]
  label  x  y     closest  [1.5, 3.5]  [2.0, 10.0]  [6.0, 6.0]
1    A2  2  5  [1.5, 3.5]    1.581139     5.000000    4.123106
6    C1  1  2  [1.5, 3.5]    1.581139     8.062258    6.403124
New centeriod: 
x    1.5
y    3.5
dtype: float64

[2.0, 10.0]
  label  x   y      closest  [1.5, 3.5]  [2.0, 10.0]  [6.0, 6.0]
0    A1  2  10  [2.0, 10.0]    6.519202     0.000000    5.656854
7    C2  4   9  [2.0, 10.0]    6.041523     2.236068    3.605551
New centeriod: 
x    3.0
y    9.5
dtype: float64

[6.0, 6.0]
  label  x  y     closest  [1.5, 3.5]  [2.0, 10.0]  [6.0, 6.0]
2    A3  8  4  [6.0, 6.0]    6.519202     8.485281    2.828427
3    B1  5  8  [6.0, 6.0]    5.700877     3.605551    2.236068
4    B2  7  5  [6.0, 6.0]    5.700877     7.071068    1.414214
5    B3  6  4  [6.0, 6.0]    4.527693     7.211103    2.000000
New centeriod: 
x    6.50
y    5.25
dtype: float64



In [107]:
newCenteriods = cluster[['x', 'y']].mean().values.tolist()

# Add new centeriods
for centeriod in newCenteriods:
    label = str(centeriod)
    df[label] = df.apply(lambda row: euclideanDistance(row['x'], row['y'], centeriod[0], centeriod[1]), axis=1)

df['closest'] = df[[str(centeriod) for centeriod in newCenteriods]].idxmin(axis=1)

df

Unnamed: 0,label,x,y,closest,"[1.5, 3.5]","[3.0, 9.5]","[6.5, 5.25]"
0,A1,2,10,"[3.0, 9.5]",6.519202,1.118034,6.543126
1,A2,2,5,"[1.5, 3.5]",1.581139,4.609772,4.506939
2,A3,8,4,"[6.5, 5.25]",6.519202,7.433034,1.952562
3,B1,5,8,"[3.0, 9.5]",5.700877,2.5,3.132491
4,B2,7,5,"[6.5, 5.25]",5.700877,6.020797,0.559017
5,B3,6,4,"[6.5, 5.25]",4.527693,6.264982,1.346291
6,C1,1,2,"[1.5, 3.5]",1.581139,7.762087,6.388466
7,C2,4,9,"[3.0, 9.5]",6.041523,1.118034,4.506939


In [108]:
# Calculate new centeriods by taking the average of the assigned points
cluster = df.groupby('closest')

for key, group in cluster:
    print(key)
    print(group)
    print("New centeriod: ")
    print(group[['x', 'y']].mean())
    print()

# Remove old centeriods
for centeriod in newCenteriods:
    label = str(centeriod)
    del df[label]

[1.5, 3.5]
  label  x  y     closest  [1.5, 3.5]  [3.0, 9.5]  [6.5, 5.25]
1    A2  2  5  [1.5, 3.5]    1.581139    4.609772     4.506939
6    C1  1  2  [1.5, 3.5]    1.581139    7.762087     6.388466
New centeriod: 
x    1.5
y    3.5
dtype: float64

[3.0, 9.5]
  label  x   y     closest  [1.5, 3.5]  [3.0, 9.5]  [6.5, 5.25]
0    A1  2  10  [3.0, 9.5]    6.519202    1.118034     6.543126
3    B1  5   8  [3.0, 9.5]    5.700877    2.500000     3.132491
7    C2  4   9  [3.0, 9.5]    6.041523    1.118034     4.506939
New centeriod: 
x    3.666667
y    9.000000
dtype: float64

[6.5, 5.25]
  label  x  y      closest  [1.5, 3.5]  [3.0, 9.5]  [6.5, 5.25]
2    A3  8  4  [6.5, 5.25]    6.519202    7.433034     1.952562
4    B2  7  5  [6.5, 5.25]    5.700877    6.020797     0.559017
5    B3  6  4  [6.5, 5.25]    4.527693    6.264982     1.346291
New centeriod: 
x    7.000000
y    4.333333
dtype: float64



In [109]:
newCenteriods = cluster[['x', 'y']].mean().values.tolist()

# Add new centeriods
for centeriod in newCenteriods:
    label = str(centeriod)
    df[label] = df.apply(lambda row: euclideanDistance(row['x'], row['y'], centeriod[0], centeriod[1]), axis=1)

df['closest'] = df[[str(centeriod) for centeriod in newCenteriods]].idxmin(axis=1)

df

Unnamed: 0,label,x,y,closest,"[1.5, 3.5]","[3.6666666666666665, 9.0]","[7.0, 4.333333333333333]"
0,A1,2,10,"[3.6666666666666665, 9.0]",6.519202,1.943651,7.557189
1,A2,2,5,"[1.5, 3.5]",1.581139,4.333333,5.044249
2,A3,8,4,"[7.0, 4.333333333333333]",6.519202,6.616478,1.054093
3,B1,5,8,"[3.6666666666666665, 9.0]",5.700877,1.666667,4.176655
4,B2,7,5,"[7.0, 4.333333333333333]",5.700877,5.206833,0.666667
5,B3,6,4,"[7.0, 4.333333333333333]",4.527693,5.517648,1.054093
6,C1,1,2,"[1.5, 3.5]",1.581139,7.490735,6.437736
7,C2,4,9,"[3.6666666666666665, 9.0]",6.041523,0.333333,5.547772
