In [1]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

In [2]:
all_vectors= []
for i in range(5):
    img = Image.open(f'./usps_{i+1}.jpg')
    img_array = np.array(img)
    row_size , column_size = img_array.shape
    for row_start in range(0,row_size , 16):
        for col_start in range(0, column_size, 16):
            # Extract the 16*16 submatrix
            submatrix = img_array[row_start:row_start + 16, col_start:col_start + 16]
            flattened_vec = submatrix.flatten()
            all_vectors.append(flattened_vec)
datas = np.array(all_vectors)
datas.shape

(5610, 256)

In [3]:

def initialize_centroids(data, k):
    indices = np.random.choice(len(data), k, replace=False)
    return data[indices]

def assign_to_clusters(data, centroids):
    distances = np.linalg.norm(data[:, np.newaxis] - centroids, axis=2)
    return np.argmin(distances, axis=1)

def update_centroids(data, labels, k):
    new_centroids = np.zeros((k, data.shape[1]))
    for i in range(k):
        cluster_points = data[labels == i]
        if len(cluster_points) > 0:
            new_centroids[i] = np.mean(cluster_points, axis=0)
    return new_centroids

def kmeans(data, k, max_iters=100):
    centroids = initialize_centroids(data, k)
    
    for _ in range(max_iters):
        labels = assign_to_clusters(data, centroids)
        new_centroids = update_centroids(data, labels, k)
        
        # Check for convergence
        if np.array_equal(centroids, new_centroids):
            break
        
        centroids = new_centroids
    
    return centroids, labels

In [25]:
k = 7

# Run k-means clustering
centroids, labels = kmeans(datas, k)

print("Final Centroids:\n", centroids)
print("Labels:\n", labels)



Final Centroids:
 [[ 2.40263158  3.87368421  5.77894737 ... 26.68289474 27.04342105
  11.18947368]
 [ 6.64986737 19.32360743 30.12732095 ... 13.45225464  6.71087533
   2.8806366 ]
 [ 2.45714286  2.68214286  2.06785714 ...  3.12142857  2.61785714
   2.19642857]
 ...
 [ 1.6912114   2.83729216  2.51187648 ...  2.43230404  2.85985748
   1.64489311]
 [ 3.54739162  6.63041881  6.6098457  ... 11.21675239 12.33063924
   5.66862601]
 [ 2.7595582   4.31605777  5.81053526 ...  3.26168224  3.50382328
   2.16227698]]
Labels:
 [4 4 4 ... 6 1 5]


In [27]:
for i in range(k):
    tmp_centroid = centroids[i,:]
    centroid_matrix = tmp_centroid.reshape((16,16))
    image_data = np.uint8(centroid_matrix)

    image = Image.fromarray(image_data)

    image.save(f"mel2/k=7/centroids/centroid{i+1}.png")
    image.show()

In [28]:
clustered_data = {}

for i in range(k):
    clustered_data[i] = np.where(labels == i)[0].tolist()

# Print the clustered data
for cluster, data_points in clustered_data.items():
    print(f"Cluster {cluster}: {data_points}")

Cluster 0: [47, 80, 202, 206, 280, 307, 313, 394, 446, 520, 586, 936, 1123, 1127, 1128, 1130, 1133, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1147, 1148, 1154, 1155, 1161, 1162, 1163, 1164, 1166, 1167, 1168, 1169, 1170, 1172, 1173, 1174, 1176, 1177, 1178, 1180, 1181, 1182, 1184, 1185, 1188, 1189, 1192, 1194, 1197, 1198, 1201, 1204, 1205, 1207, 1210, 1211, 1212, 1213, 1214, 1216, 1217, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1227, 1229, 1233, 1234, 1237, 1238, 1244, 1247, 1251, 1253, 1255, 1256, 1258, 1259, 1261, 1262, 1263, 1265, 1268, 1272, 1277, 1280, 1286, 1291, 1293, 1296, 1297, 1298, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1312, 1313, 1314, 1316, 1318, 1319, 1323, 1324, 1325, 1326, 1331, 1332, 1333, 1334, 1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, 1349, 1350, 1352, 1353, 1354, 1356, 1359, 1361, 1362, 1365, 1367, 1368, 1370, 1373, 1375, 1376, 1377, 1378, 1379, 1380, 1382, 1385, 1387, 1389, 1391, 1395, 1396, 1398, 1399, 1400, 1401, 1402, 1404, 

In [24]:
import random


dict = {}
dict['0-1'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[0])]).reshape((16,16))))
dict['0-2'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[0])]).reshape((16,16))))
dict['0-3'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[0])]).reshape((16,16))))
dict['0-4'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[0])]).reshape((16,16))))
dict['0-5'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[0])]).reshape((16,16))))

dict['1-1'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[1])]).reshape((16,16))))
dict['1-2'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[1])]).reshape((16,16))))
dict['1-3'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[1])]).reshape((16,16))))
dict['1-4'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[1])]).reshape((16,16))))
dict['1-5'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[1])]).reshape((16,16))))

dict['2-1'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[2])]).reshape((16,16))))
dict['2-2'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[2])]).reshape((16,16))))
dict['2-3'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[2])]).reshape((16,16))))
dict['2-4'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[2])]).reshape((16,16))))
dict['2-5'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[2])]).reshape((16,16))))

dict['3-1'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[3])]).reshape((16,16))))
dict['3-2'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[3])]).reshape((16,16))))
dict['3-3'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[3])]).reshape((16,16))))
dict['3-4'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[3])]).reshape((16,16))))
dict['3-5'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[3])]).reshape((16,16))))

dict['4-1'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[4])]).reshape((16,16))))
dict['4-2'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[4])]).reshape((16,16))))
dict['4-3'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[4])]).reshape((16,16))))
dict['4-4'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[4])]).reshape((16,16))))
dict['4-5'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[4])]).reshape((16,16))))

dict['5-1'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[5])]).reshape((16,16))))
dict['5-2'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[5])]).reshape((16,16))))
dict['5-3'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[5])]).reshape((16,16))))
dict['5-4'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[5])]).reshape((16,16))))
dict['5-5'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[5])]).reshape((16,16))))

dict['6-1'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[6])]).reshape((16,16))))
dict['6-2'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[6])]).reshape((16,16))))
dict['6-3'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[6])]).reshape((16,16))))
dict['6-4'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[6])]).reshape((16,16))))
dict['6-5'] = Image.fromarray(np.uint8(np.array(datas[random.choice(clustered_data[6])]).reshape((16,16))))


for key , val in dict.items():
    val.save(f"mel2/k=7/cluster{key}.png")
