In [4]:
import numpy as np
import pandas as pd
import scipy.spatial.distance as dist


In [5]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [12]:
class KMeans:
    def __init__(self,  points, centroids, distance='Euclidean'):
        self.points = points
        self.centroids = centroids
        self.distance = distance
        self.n_clusters = len(self.centroids)
        self.centroid_list = [self.centroids]

    def cal_distance(self, x1, x2):
        if self.distance == 'Euclidean':
            return round(dist.euclidean(x1,x2),2)
        elif self.distance == 'Manhattan':
            return sum([abs(i-j) for i,j in zip(x1,x2)])

    def cal_new_centroid(self, cluster_df):
        self.cluster_group_df = cluster_df.groupby('Cluster')['Points'].apply(list).reset_index(name='point_list')
        centroids =  self.cluster_group_df['point_list'].apply(lambda x: self.average_tuple(x)).values
        self.cluster_group_df['New Centroid'] = centroids
        return centroids

    def average_tuple(self, points):
        return tuple(map(lambda x: round(sum(x) / float(len(x)),2), zip(*points)))


    def cluster(self):
        iteration = 1
        while len(self.centroid_list) == 1 or self.centroid_list[iteration-1] != self.centroid_list[iteration -2]:
            print(f'\n\nIteration {iteration}:\n')
            cluster_df = pd.DataFrame()
            cluster_df['Points'] = self.points
            for i, centroid in enumerate(self.centroid_list[iteration-1]):
                cluster_df[f'Dist From Centroid {i+1} {centroid}'] = cluster_df['Points'].apply(lambda x: self.cal_distance(x, centroid))
            cluster_df['Cluster'] = cluster_df.drop(columns='Points').apply(np.argmin, axis=1)
            cluster_df['Cluster'] +=1
            iteration+=1
            new_centroid = self.cal_new_centroid(cluster_df)
            self.centroid_list.append(list(new_centroid))
            print(f'Distance Matrix: \n')
            display(cluster_df)
            display(self.cluster_group_df)
            print(f'\nNew Centroid: {new_centroid}')
            print('---------------------------------------------------------------------------------')
        print(f'\nCentroids have not changed during iteration {iteration-1}.\nTerminating condition satisfied.')

In [13]:
#KMeans(points=[(2,10), (2,5), (8,4), (5,8), (7,5),(6,4), (1,2), (4,9)] 
#, centroids=[(2,10), (5,8), (1,2)], distance='Euclidean').cluster()

KMeans(points=[(2, 10), (2, 5), (8, 4), (5, 8), (7, 5), (6, 4), (1, 2), (4, 9)]
       , centroids=[(2, 10), (5, 8), (1, 2)], distance= 'Manhattan').cluster()



Iteration 1:

Distance Matrix: 



Unnamed: 0,Points,"Dist From Centroid 1 (2, 10)","Dist From Centroid 2 (5, 8)","Dist From Centroid 3 (1, 2)",Cluster
0,"(2, 10)",0,5,9,1
1,"(2, 5)",5,6,4,3
2,"(8, 4)",12,7,9,2
3,"(5, 8)",5,0,10,2
4,"(7, 5)",10,5,9,2
5,"(6, 4)",10,5,7,2
6,"(1, 2)",9,10,0,3
7,"(4, 9)",3,2,10,2


Unnamed: 0,Cluster,point_list,New Centroid
0,1,"[(2, 10)]","(2.0, 10.0)"
1,2,"[(8, 4), (5, 8), (7, 5), (6, 4), (4, 9)]","(6.0, 6.0)"
2,3,"[(2, 5), (1, 2)]","(1.5, 3.5)"



New Centroid: [(2.0, 10.0) (6.0, 6.0) (1.5, 3.5)]
---------------------------------------------------------------------------------


Iteration 2:

Distance Matrix: 



Unnamed: 0,Points,"Dist From Centroid 1 (2.0, 10.0)","Dist From Centroid 2 (6.0, 6.0)","Dist From Centroid 3 (1.5, 3.5)",Cluster
0,"(2, 10)",0.0,8.0,7.0,1
1,"(2, 5)",5.0,5.0,2.0,3
2,"(8, 4)",12.0,4.0,7.0,2
3,"(5, 8)",5.0,3.0,8.0,2
4,"(7, 5)",10.0,2.0,7.0,2
5,"(6, 4)",10.0,2.0,5.0,2
6,"(1, 2)",9.0,9.0,2.0,3
7,"(4, 9)",3.0,5.0,8.0,1


Unnamed: 0,Cluster,point_list,New Centroid
0,1,"[(2, 10), (4, 9)]","(3.0, 9.5)"
1,2,"[(8, 4), (5, 8), (7, 5), (6, 4)]","(6.5, 5.25)"
2,3,"[(2, 5), (1, 2)]","(1.5, 3.5)"



New Centroid: [(3.0, 9.5) (6.5, 5.25) (1.5, 3.5)]
---------------------------------------------------------------------------------


Iteration 3:

Distance Matrix: 



Unnamed: 0,Points,"Dist From Centroid 1 (3.0, 9.5)","Dist From Centroid 2 (6.5, 5.25)","Dist From Centroid 3 (1.5, 3.5)",Cluster
0,"(2, 10)",1.5,9.25,7.0,1
1,"(2, 5)",5.5,4.75,2.0,3
2,"(8, 4)",10.5,2.75,7.0,2
3,"(5, 8)",3.5,4.25,8.0,1
4,"(7, 5)",8.5,0.75,7.0,2
5,"(6, 4)",8.5,1.75,5.0,2
6,"(1, 2)",9.5,8.75,2.0,3
7,"(4, 9)",1.5,6.25,8.0,1


Unnamed: 0,Cluster,point_list,New Centroid
0,1,"[(2, 10), (5, 8), (4, 9)]","(3.67, 9.0)"
1,2,"[(8, 4), (7, 5), (6, 4)]","(7.0, 4.33)"
2,3,"[(2, 5), (1, 2)]","(1.5, 3.5)"



New Centroid: [(3.67, 9.0) (7.0, 4.33) (1.5, 3.5)]
---------------------------------------------------------------------------------


Iteration 4:

Distance Matrix: 



Unnamed: 0,Points,"Dist From Centroid 1 (3.67, 9.0)","Dist From Centroid 2 (7.0, 4.33)","Dist From Centroid 3 (1.5, 3.5)",Cluster
0,"(2, 10)",2.67,10.67,7.0,1
1,"(2, 5)",5.67,5.67,2.0,3
2,"(8, 4)",9.33,1.33,7.0,2
3,"(5, 8)",2.33,5.67,8.0,1
4,"(7, 5)",7.33,0.67,7.0,2
5,"(6, 4)",7.33,1.33,5.0,2
6,"(1, 2)",9.67,8.33,2.0,3
7,"(4, 9)",0.33,7.67,8.0,1


Unnamed: 0,Cluster,point_list,New Centroid
0,1,"[(2, 10), (5, 8), (4, 9)]","(3.67, 9.0)"
1,2,"[(8, 4), (7, 5), (6, 4)]","(7.0, 4.33)"
2,3,"[(2, 5), (1, 2)]","(1.5, 3.5)"



New Centroid: [(3.67, 9.0) (7.0, 4.33) (1.5, 3.5)]
---------------------------------------------------------------------------------

Centroids have not changed during iteration 4.
Terminating condition satisfied.
