In [88]:
import numpy as np
import matplotlib.pyplot as plt

In [89]:
def choose_initial_cluster_centers(points, clusters):
    us = np.zeros((clusters,784))         # centers of each cluster
    indicies = np.zeros(clusters)

    index = np.random.choice(points.shape[0], 1)[0]         # choose first point randomly
    indicies[0] = index
    us[0] =  points[index]

    for i in range(1,clusters):
        max_distance = 0
        for j in range(points.shape[0]):    # choose the center with the biggest distancce form last center
            if j not in indicies:           # check if the the point is already a center to cluster
                point = np.array(points[j])
                last_point = us[i-1]
                distance = np.linalg.norm(point-last_point)     # Eucledian distance

                if distance > max_distance:
                    max_distance = distance
                    index = j
                    u = point
        
        
        indicies[i] = index
        us[i] = u

    return us

In [90]:
def compute_membership(centers, point, rnks):
    min_distance = np.linalg.norm(point-centers[0])

    index = 0
    for i in range(1,centers.shape[0]):
        
        center = centers[i]
        distance = np.linalg.norm(point-center)

        if distance < min_distance and distance > 0:
            min_distance = distance
            index = i
        elif distance == min_distance and distance > 0:      # if the distances are the same between two centers, choose the one with more members
            if rnks[i] > rnks[index]:
                index = i

    return index, min_distance

In [102]:
def kmeans(points, clusters):

    old_centers = np.zeros((clusters,784))
    centers = choose_initial_cluster_centers(points, clusters)
    points_sum = np.zeros((10,784))
    distances = np.zeros(clusters)  # sum of distances in each cluster from the points to the cluster centers

    
    for limit in range(1000):
        
        if (np.array_equal(centers, old_centers)):      # stop if the algorithm converges
            break

        print("Iteration Number : ")
        print(limit)

        points_sum = np.zeros((10,784))
        rnks = np.zeros(clusters)       # used for counting number of members in each cluster
        distances = np.zeros(clusters)  # sum of distances in each cluster from the points to the cluster centers
        old_centers = np.copy(centers)

        for i in range(points.shape[0]):
            point = points[i]
            rnk, distance = compute_membership(centers, point, rnks)    # returns the cluster number of the point and its distance from the mean of the cluster
            rnks[rnk] += 1
            distances[rnk] += distance
            points_sum[rnk] += point
        
        for i in range(clusters): 
            centers[i] = points_sum[i] / rnks[i]         # Adjust the cluster centers

    sum_distances = np.sum(distances)
    
    return centers, sum_distances, rnks

In [92]:
images = []

for i in range(2400):
    img = plt.imread("Images/" + str((i+1)) + ".jpg")
    pixels = []
    for j in range(28):
        for k in range(28):
            pixels.append(img[j,k])
    
    images.append(pixels)
    
images = np.asarray(images)
print(images.shape)

(2400, 784)


In [93]:
imagesT = np.transpose(images)
print(imagesT.shape)

(784, 2400)


In [94]:
labels = np.genfromtxt('Images/Training Labels.txt')
print(labels)

[0. 0. 0. ... 9. 9. 9.]


In [103]:
centers_arr = np.zeros((30,10,784))
distances_arr = np.zeros(30)
rnks = np.zeros((30,10))

for i in range(30):
    centers_arr[i], distances_arr[i], rnks[i] = kmeans(images,10)
    
print(np.min(distances_arr))

Iteration Number : 
0
Iteration Number : 
1
Iteration Number : 
2
Iteration Number : 
3
Iteration Number : 
4
Iteration Number : 
5
Iteration Number : 
6
Iteration Number : 
7
Iteration Number : 
8
Iteration Number : 
9
Iteration Number : 
10
Iteration Number : 
11
Iteration Number : 
12
Iteration Number : 
13
Iteration Number : 
14
Iteration Number : 
15
Iteration Number : 
16
Iteration Number : 
17
Iteration Number : 
18
Iteration Number : 
19
Iteration Number : 
20
Iteration Number : 
21
Iteration Number : 
22
Iteration Number : 
23
Iteration Number : 
24
Iteration Number : 
25
Iteration Number : 
26
Iteration Number : 
27
Iteration Number : 
28
Iteration Number : 
29
Iteration Number : 
30
Iteration Number : 
31
Iteration Number : 
32
Iteration Number : 
33
Iteration Number : 
34
Iteration Number : 
35
Iteration Number : 
36
Iteration Number : 
37
[[0.09090909 0.08300395 0.07114625 ... 0.         0.         0.        ]
 [0.21052632 0.23684211 0.36842105 ... 0.         0.         0.

Iteration Number : 
0
Iteration Number : 
1
Iteration Number : 
2
Iteration Number : 
3
Iteration Number : 
4
Iteration Number : 
5
Iteration Number : 
6
Iteration Number : 
7
Iteration Number : 
8
Iteration Number : 
9
Iteration Number : 
10
Iteration Number : 
11
Iteration Number : 
12
Iteration Number : 
13
Iteration Number : 
14
Iteration Number : 
15
Iteration Number : 
16
Iteration Number : 
17
[[0.31161972 0.15669014 0.20246479 ... 0.         0.         0.        ]
 [0.07826087 0.37391304 0.36521739 ... 0.         0.         0.        ]
 [0.03896104 0.02597403 0.01948052 ... 0.         0.         0.        ]
 ...
 [0.04291845 0.07725322 0.07725322 ... 0.         0.         0.        ]
 [0.13306452 0.18951613 0.2016129  ... 0.         0.         0.        ]
 [0.03030303 0.07878788 0.00606061 ... 0.         0.         0.        ]]
Iteration Number : 
0
Iteration Number : 
1
Iteration Number : 
2
Iteration Number : 
3
Iteration Number : 
4
Iteration Number : 
5
Iteration Number : 


Iteration Number : 
19
Iteration Number : 
20
Iteration Number : 
21
Iteration Number : 
22
Iteration Number : 
23
Iteration Number : 
24
Iteration Number : 
25
Iteration Number : 
26
Iteration Number : 
27
Iteration Number : 
28
[[0.08760684 0.07905983 0.05555556 ... 0.         0.         0.        ]
 [0.17777778 0.54444444 0.51111111 ... 0.         0.         0.        ]
 [0.37269373 0.19188192 0.19188192 ... 0.         0.         0.        ]
 ...
 [0.13181818 0.06363636 0.17272727 ... 0.         0.         0.        ]
 [0.03910615 0.16201117 0.08938547 ... 0.         0.         0.        ]
 [0.21666667 0.19722222 0.2        ... 0.         0.         0.        ]]
Iteration Number : 
0
Iteration Number : 
1
Iteration Number : 
2
Iteration Number : 
3
Iteration Number : 
4
Iteration Number : 
5
Iteration Number : 
6
Iteration Number : 
7
Iteration Number : 
8
Iteration Number : 
9
Iteration Number : 
10
Iteration Number : 
11
Iteration Number : 
12
Iteration Number : 
13
Iteration Numb

Iteration Number : 
0
Iteration Number : 
1
Iteration Number : 
2
Iteration Number : 
3
Iteration Number : 
4
Iteration Number : 
5
Iteration Number : 
6
Iteration Number : 
7
Iteration Number : 
8
Iteration Number : 
9
Iteration Number : 
10
Iteration Number : 
11
Iteration Number : 
12
Iteration Number : 
13
Iteration Number : 
14
Iteration Number : 
15
Iteration Number : 
16
Iteration Number : 
17
Iteration Number : 
18
Iteration Number : 
19
Iteration Number : 
20
Iteration Number : 
21
Iteration Number : 
22
Iteration Number : 
23
Iteration Number : 
24
Iteration Number : 
25
Iteration Number : 
26
Iteration Number : 
27
Iteration Number : 
28
Iteration Number : 
29
Iteration Number : 
30
Iteration Number : 
31
Iteration Number : 
32
Iteration Number : 
33
Iteration Number : 
34
[[0.10704961 0.07832898 0.06788512 ... 0.         0.         0.        ]
 [0.17777778 0.54444444 0.51111111 ... 0.         0.         0.        ]
 [0.41433022 0.17445483 0.25856698 ... 0.         0.       

Iteration Number : 
0
Iteration Number : 
1
Iteration Number : 
2
Iteration Number : 
3
Iteration Number : 
4
Iteration Number : 
5
Iteration Number : 
6
Iteration Number : 
7
Iteration Number : 
8
Iteration Number : 
9
Iteration Number : 
10
Iteration Number : 
11
Iteration Number : 
12
Iteration Number : 
13
Iteration Number : 
14
Iteration Number : 
15
Iteration Number : 
16
Iteration Number : 
17
Iteration Number : 
18
Iteration Number : 
19
Iteration Number : 
20
Iteration Number : 
21
Iteration Number : 
22
Iteration Number : 
23
Iteration Number : 
24
Iteration Number : 
25
Iteration Number : 
26
Iteration Number : 
27
Iteration Number : 
28
Iteration Number : 
29
Iteration Number : 
30
Iteration Number : 
31
Iteration Number : 
32
Iteration Number : 
33
Iteration Number : 
34
Iteration Number : 
35
Iteration Number : 
36
Iteration Number : 
37
Iteration Number : 
38
Iteration Number : 
39
Iteration Number : 
40
Iteration Number : 
41
[[0.03162055 0.07905138 0.01976285 ... 0.   