## K-Means Image compression from scratch practice tensorflow

In [94]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [95]:
def load_data():
    data = np.load('../datasets/kmeans_image_compression.npy')
    return data

In [107]:

def initialize_centroids(X, k):
    X_min = np.min(X[:, 0])
    X_max = np.max(X[:, 0])

    Y_min = np.min(X[:, 1])
    Y_max = np.max(X[:, 1])

    centroids = np.empty((1, 2))
    for i in range(k):
        x = np.random.uniform(X_min, X_max)
        y = np.random.uniform(Y_min, Y_max)
        np.append(centroids, [x, y])
    return centroids



def calculate_centroids(X, centroids):
    X_centroids = np.zeros(X.shape[0], dtype=int)
    for i in range(X.shape[0]):
        distance = []
        for j in range(len(centroids)):
            norm = np.linalg.norm(X[i] - centroids[j])
            distance.append(norm)

        X_centroids[i] = int(np.argmin(distance))
    return X_centroids


### Implementing Kmeans

In [108]:
'''
Initialize K (number of clusters)
cost_array = []
Iterate through K:
    lowest cost = 10000
    Initialize that number of centroids N times:
        while (centroid is moving):
            Calculate the loss function
            Group the data to the centroid they are closest to
            take mean in every group
            if (centroids === means):
                break
            centroids = means
        calculate cost
        if (cur_cost < lowest_cost):
            cur_cost = lowest_cost
    cost_array.push(lowest_cost)

Plot the elbow graph and select the value of K and its cost
'''

K = 7
iterations = 100
cost_array = []
X = load_data()

for k in range(2, K+1):
    lowest_cost = 100000
    # randomly initializing 100 times for each value of K
    for iteration in range(iterations):
        cost = 0
        centroids = initialize_centroids(X, k)
        while(True):
            X_centroids = calculate_centroids(X, centroids)
            ## calculate mean of each cluster
            means = []
            for kk in range(k):
                cluster_mean = np.zeros((1, 2))
                for i in range(X.shape[0]):
                    cluster_number = X_centroids[i]
                    if cluster_number == kk:
                        cluster_mean += X[i]
                cluster_mean = cluster_mean / X.shape[0]
                means.append(cluster_mean)


            if np.allclose(centroids, means):
                #print('used')
                break
            else:
                #print(f"centroids: {centroids} || means: {means}")
                centroids = means

        ## calculate cost of the final centroids
        X_centroids = calculate_centroids(X, centroids)
        print(f"iteration: {iteration} || X_centroid: {X_centroids}")
        for i in range(len(X_centroids)):
            cost += np.linalg.norm(X[i] - centroids[X_centroids[i]])
        
        if lowest_cost > cost:
            lowest_cost = cost
    print(f"Lowest cost for K = {k}: {lowest_cost}")

iteration: 0 || X_centroid: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0
 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0
 0 0 0 1 0 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0]
iteration: 1 || X_centroid: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0
 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0

### Image compression

In [98]:
original_image = plt.imread('../datasets/pizza.jpg')