In [None]:
import numpy as np
import matplotlib.pyplot as plt
from utils import *

%matplotlib inline

In [None]:
def find_closest_centroids(X, centroids):
    K = centroids.shape[0]
    idx = np.zeros(X.shape[0], dtype=int)

    for i in range(X.shape[0]):
          distance = [] 
          for j in range(centroids.shape[0]):
              norm_ij = np.linalg.norm(X[i] - centroids[j])
              distance.append(norm_ij)

          idx[i] = np.argmin(distance)
    
    return idx

In [None]:
X = load_data()

In [None]:
print("First five elements of X are:\n", X[:5]) 
print('The shape of X is:', X.shape)

In [None]:
initial_centroids = np.array([[3,3], [6,2], [8,5]])

idx = find_closest_centroids(X, initial_centroids)

print("First three elements in idx are:", idx[:3])

from public_tests import *

find_closest_centroids_test(find_closest_centroids)

In [None]:
def compute_centroids(X, idx, K):
    m, n = X.shape
    centroids = np.zeros((K, n))

    for k in range(K):   
          points = X[idx == k]  
          centroids[k] = np.mean(points, axis = 0)
    
    return centroids

In [None]:
K = 3
centroids = compute_centroids(X, idx, K)

print("The centroids are:", centroids)

compute_centroids_test(compute_centroids)

In [None]:
def run_kMeans(X, initial_centroids, max_iters=10, plot_progress=False):
    m, n = X.shape
    K = initial_centroids.shape[0]
    centroids = initial_centroids
    previous_centroids = centroids    
    idx = np.zeros(m)
    
    for i in range(max_iters):
        print("K-Means iteration %d/%d" % (i, max_iters-1))
        idx = find_closest_centroids(X, centroids)
        if plot_progress:
            plot_progress_kMeans(X, centroids, previous_centroids, idx, K, i)
            previous_centroids = centroids
        centroids = compute_centroids(X, idx, K)
    plt.show() 
    return centroids, idx

X = load_data()

initial_centroids = np.array([[3,3],[6,2],[8,5]])
K = 3

max_iters = 10

centroids, idx = run_kMeans(X, initial_centroids, max_iters, plot_progress=True)

In [None]:
def kMeans_init_centroids(X, K):
    randidx = np.random.permutation(X.shape[0])
    
    centroids = X[randidx[:K]]
    
    return centroids

In [None]:
original_img = plt.imread('bird_small.png')

In [None]:
plt.imshow(original_img)

In [None]:
print("Shape of original_img is:", original_img.shape)

In [None]:
original_img = original_img / 255
X_img = np.reshape(original_img, (original_img.shape[0] * original_img.shape[1], 3))

In [None]:
K = 16                       
max_iters = 10               

initial_centroids = kMeans_init_centroids(X_img, K) 

centroids, idx = run_kMeans(X_img, initial_centroids, max_iters) 

In [None]:
print("Shape of idx:", idx.shape)
print("Closest centroid for the first five elements:", idx[:5])

In [None]:
X_recovered = centroids[idx, :] 
X_recovered = np.reshape(X_recovered, original_img.shape) 

In [None]:
fig, ax = plt.subplots(1,2, figsize=(8,8))
plt.axis('off')

ax[0].imshow(original_img*255)
ax[0].set_title('Original')
ax[0].set_axis_off()

ax[1].imshow(X_recovered*255)
ax[1].set_title('Compressed with %d colours'%K)
ax[1].set_axis_off()