In [None]:
# Importing the all necessary packages
import scipy.io
import imageio.v2 as imageio
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import cv2
import time


### First working with a dataset to get hands on KNN

In [None]:
mat = scipy.io.loadmat('data.mat')

In [None]:
X = mat['X']

In [None]:
# We can see data set as 3 clusters
plt.scatter(X[:,0],X[:,1])
plt.show()

In [None]:
def assignCluster(X,centroid):
    k = centroid.shape[0]
    l = X.shape[0]
    cluster = np.zeros((l,1), dtype=int) # to store to which cluster does x[i] belong
    for i in range(l):
        clus = np.zeros((1,k))
        for j in range(k):
            clus[:,j] = np.sqrt(np.sum(np.power((X[i,:] - centroid[j,:]),2))) # finding distance between x[i] and all centroids
        cluster[i] = np.argmin(clus) + 1 # getting the smallest distance between x[i] and all centroids
    return cluster

In [None]:
# finding centroid of all cluster dataset and moving current centroid to new point
def findCentroid(X, cluster, k):
    l,m = X.shape
    centroids = np.zeros((k,m))
    total = np.zeros((k,1))
    for i in range(l):
        idx = cluster[i]-1
        centroids[idx,:] += X[i,:]
        total[idx] +=1
    centroids = centroids/total
    return centroids

In [None]:
# Getting inital centroids as any K random dataset from X
def getCentroids(clusterNum,X):
    centroids = np.zeros([clusterNum,X.shape[1]])
    for i in range(clusterNum):
        index = random.randint(0,X.shape[0]-1)
        centroids[i] = X[index]

    return centroids

In [None]:
class Kmeans:
    def __init__(self, K, cluster, minDist, centroids, iterations):
        self.K = K
        self.cluster = cluster
        self.minDist = minDist
        self.centroids = centroids
        self.iterations = iterations
        

In [None]:
def KNN(X, clusterNum):
    converge = True
    i = 0
    centroids = getCentroids(clusterNum,X)
    while converge:
        i=i+1
        minDist = 0
        centroids_previous = centroids
        start = time.time()
        cluster = assignCluster(X,centroids)
        mid = time.time()
        centroids = findCentroid(X, cluster, clusterNum)
        end = time.time()
        print(mid-start,end-mid)
        if (centroids_previous == centroids).all() or i==10:
            converge = False
        
    kmeans = Kmeans(clusterNum,cluster,minDist,centroids,i)
    return kmeans

In [None]:
kmeans = KNN(X, 3)

In [None]:
data = [[] for i in range(len(kmeans.centroids))]
for j,clus in zip(X,kmeans.cluster):
    data[clus[0]-1].append(j)

for i in data:
    d = np.array(i,dtype='float64')
    plt.scatter(d[:,0],d[:,1])
plt.scatter(kmeans.centroids[:,0],kmeans.centroids[:,1])
plt.show()

### Image Compression Start

In [None]:
image = imageio.imread('woof_meow.jpg')
image.shape

In [None]:
# reshaping data to 2d
image_2D = image.reshape(image.shape[0]*image.shape[1],image.shape[2])
image_2D.shape

In [None]:
kmeans_image = KNN(image_2D, 16)

In [None]:
# Compression of image
image_compress = image_2D.copy()
for i in range(image_compress.shape[0]):
    image_compress[i] = kmeans_image.centroids[kmeans_image.cluster[i]-1]

In [None]:
image_compress.shape

In [None]:
image_compress = image_compress.reshape(image.shape[0],image.shape[1],image.shape[2])

In [None]:
# Setting up plot size
fig, ax = plt.subplots(1,2,figsize=(9, 9))
# Function to load the image on plot 
ax[0].imshow(image)
ax[0].set_title('Original')
ax[1].imshow(image_compress)
ax[1].set_title('Compressed with K='+str(kmeans_image.centroids.shape[0]))
fig.tight_layout()
plt.show()

In [None]:
image_comp = cv2.cvtColor(image_compress, cv2.COLOR_BGR2RGB)
cv2.imwrite('output.jpg', image_comp)