In [26]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
%matplotlib inline

# smiley = np.array(Image.open('smiley.jpg'))

In [25]:
def readImage(img):
    
    #preallocate size for X
    X = np.zeros([256, 1024])
    
    #initialize k 
    k = 0
    
    #step size 
    step = 16

    #loop through pixels of image 
    for i in range(0, 512, step):
        for j in range(0, 512, step):
            #split into 1024 blocks of 16x16
            block = img[i:i+step, j:j+step]
            #reshape each block into a vector of length 256
            X[:,k] = np.reshape(block, (1, 256))#total of 1024 vectors
            #sum up k
            k +=1
            
    #return 1024 vectors of length 256
    return X

In [34]:
#function to calculate the closest centroids 
def closestCentroids(img, centroids):
    
    #initialize empty list to hold minimum generators 
    generator = []
    
    #for loop to dynamically create generator lists
    #for different number of clusters 
    for i in range(len(centroids)):
        generator.append([])
        
    #iterate through length of the original image 
    for i in range(len(img)):
        
        #variable iterates and stores each row of image matrix 
        vals = img[i]
        
        #using list comprehension to calculate the euclidean distance from image and each centroid 
        euclid = [abs(vals[0] - centroids[j][0]) + abs(vals[1] - centroids[j][1])
                  + abs(vals[2] - centroids[j][2]) for j in range(len(centroids))]
        
        #add min euclidian distance 
        generator[np.argmin(euclid)].append(vals)
    
    #return min generators 
    return generator

In [35]:
#function to update generators for each iteration of k means 
def updateGenerators(generator, nclusters):
    
    #empty array to hold new generators 
    new_generators = []
    
    #loop through the length of number of clusters 
    for i in range(nclusters):
        
        #append the average generator for each dimension 
        #return a vector with 3 vals 
        new_generators.append(np.average(np.array(generator[i]),axis=0))
        
    #return new generators 
    return new_generators

In [36]:
#import external modules 
import random

#function to perform k means clustering 
def kmeans(img, nclusters):
    
    #create initial generators from original image matrix 
    generator = random.sample(img.tolist(), nclusters)
    
    #define iterations 
    iterations = 10
    
    #run k means for 10 iterations 
    for i in range(iterations):
        
        #calculate closest generators  
        generator= closestCentroids(img, generator)
        #update new generators 
        generator = updateGenerators(generator, nclusters)
        
    return generator

In [50]:
#function to return image to original shape 
def replacePixel(img, generator):
    
    #initialize empty matrix the same size as 
    #the original image matrix 
    new_img = np.zeros(img.shape)
    
    #iterate fot the length of the original matrix 
    for i in range(len(img)):
        
        #variable iterates and stores each row of pixels 
        vals = img[i]
        
        #calculate euclidean distance 
        euclid = [abs(vals[0] - generator[j][0]) + abs(vals[1] - generator[j][1])
                  + abs(vals[2] - generator[j][2]) for j in range(len(generator))]

        #construct new image matrix 
        new_img[i,:]=generator[np.argmin(euclid)]
        
    #return new image 
    return new_img

In [51]:
#function to plot new image vs original image for different number of k 
def plotImages(new_image, nclusters):
    
    original_image = np.array(Image.open('mandrill.png'))

    fig, ax = plt.subplots(1, 2, figsize=(20,20))
    ax[0].imshow(new_image)
    ax[0].set_title('Compressed Image when k = ' + str(nclusters), fontsize=18)
    ax[1].imshow(original_image) 
    ax[1].set_title('Original Image', fontsize=18)
    plt.show()

In [53]:
#read in original image 
img = np.array(Image.open('TrumanEatsLunch.jpg'))
image = readImage(img)

#kmeans with 2 clusters 
nclusters = 20
generator_2 = kmeans(image, nclusters)

# #reconstruct new image
# new_img = replacePixel(img, generator_2)
# #reshape
# new_img=np.reshape(new_img, (512,512,3))
# #plot compared images 
# plotImages(new_img, 2)

In [48]:
generator_2

[array([215.33333333,  78.33333333,  72.        , ...,  39.33333333,
         39.66666667,  34.33333333]),
 array([122.09090909,  70.90909091,  73.45454545, ...,  36.45454545,
         38.45454545,  37.72727273]),
 array([83.05263158, 74.        , 59.73684211, ..., 38.47368421,
        39.36842105, 36.31578947]),
 array([38.9047619 , 37.42857143, 35.        , ..., 43.19047619,
        40.47619048, 46.57142857]),
 array([227.5  ,  85.625,  59.   , ...,  41.125,  40.625,  42.125]),
 array([235.6,  94.4,  82.9, ...,  37.8,  37.5,  40.3]),
 array([238.94117647,  85.52941176,  73.29411765, ...,  39.82352941,
         39.58823529,  38.52941176]),
 array([149. ,  46.5,  34.5, ...,  39. ,  41.5,  39.5]),
 array([115.30434783,  90.91304348,  84.13043478, ...,  37.82608696,
         39.34782609,  29.7826087 ]),
 array([241.28571429,  56.42857143,  37.71428571, ...,  39.42857143,
         41.14285714,  46.57142857]),
 array([240.5       ,  40.54545455,  33.22727273, ...,  39.31818182,
         43