## Importing Libraries

In [1]:
import cv2
import numpy as np
import random
import os

## Required Functions

In [5]:
# function to generate random points
def generateRandomPoints(inputImage, k = 10):
  pixelData = []

  H, W, C = inputImage.shape

  randomX10 = random.sample(range(H), k)
  randomY10 = random.sample(range(W), k)

  randomK10 = list(zip(randomX10, randomY10))

  for point in randomK10:
    pixelData.append([point[0], point[1], inputImage[point[0], point[1], 0], inputImage[point[0], point[1], 1], inputImage[point[0], point[1], 2]])

  return np.array(pixelData)

# function to assign clusters based on distance to each centroid
def assignClusters(inputData, centroids):
  distance2Centroids = []
  for cent in centroids:
    distance = np.sqrt((inputData[2] - cent[2])**2 + (inputData[3] - cent[3])**2 + (inputData[3] - cent[3])**2)
    distance2Centroids.append(distance)

  return distance2Centroids.index(min(distance2Centroids))

def KMeans(inputImage, k = 10, maxIterations = 100):
    # initiating random centroids
    print("Generating Random Centroids...")
    initialCentroids = generateRandomPoints(inputImage, 10)
    print("Generating Random Centroids...Done")

    # while loop to find optimal centroids
    print("\nFinding Optimal Centroids...")
    counter = 1
    while maxIterations > 0: # while loop starts here
      print(f'\n====================Iteration: {counter}====================')
      print(f"Current Centroids (Cluster 1 - 10)(Format: [x, y, r, g, b]): \n{initialCentroids}\n")
      # dictionary to store points belonging to each centroid
      clusterStorage = {i: [] for i in range(k)}
      # loop to assign cluster to each pixel in the image based on rgb values
      for i in range(inputImage.shape[0]): # loop starts here
        for j in range(inputImage.shape[1]): # loop starts here
          # we store x, y, r, g, b values in currentPixelData but only use the RGB values in the assignClusters function
          currentPixelData = [i, j, inputImage[i, j, 0], inputImage[i, j, 1], inputImage[i, j, 2]]
          # assignCluster function returns the cluster number to which the current pixel belongs to
          cluster = assignClusters(currentPixelData, initialCentroids)
          # storing the pixel data in that cluster key in cluster storage
          clusterStorage[cluster].append(currentPixelData)
          # loop ends here
      # loop ends here

      # computing new centroids based on the current set of pixels in a cluster
      newCentroids = [0] * 10
      # looping through cluster storage to calculate new centroids based on mean
      for i in range(k): # loop starts here
        currentClusterCoordinates = clusterStorage[i]
        # if there's only one pixel belonging to a cluster, then that pixel becomes the new centroid
        # else take the mean of all the pixels
        if len(currentClusterCoordinates) > 0:
          if len(currentClusterCoordinates) > 1:
            newCentroids[i] = np.mean(currentClusterCoordinates, axis = 0, dtype = 'int')
          else:
            newCentroids[i] = currentClusterCoordinates[0]
        else:
          newCentroids[i] = initialCentroids[i]
      newCentroids = np.array(newCentroids)
      # loop ends here

      # checking if new centroids are same as the previous centroids
      if (newCentroids == initialCentroids).all() == False:
        initialCentroids = newCentroids
        counter += 1
        maxIterations -= 1
        print("Optimal Centroids Not Found. Reiterating...")
      else:
        print("Optimal Centroids Found. KMeans Converged\n")
        break
    # while loop ends here

    print("\nComputing RGB Values of the final image")
    # taking average of rgb values of all the pixels belonging to a cluster
    inputImageCopy = np.copy(inputImage)
    # looping through each cluster in cluster storage and storing rgb values
    for i in range(k): # loop starts here
        r = []
        g = []
        b = []

        currentClusterCoordinates = clusterStorage[i]
        for coordinate in currentClusterCoordinates: # loop starts here
            r.append(inputImage[coordinate[0], coordinate[1], 0])
            g.append(inputImage[coordinate[0], coordinate[1], 1])
            b.append(inputImage[coordinate[0], coordinate[1], 2])
        # loop ends here

        # taking the average of rbg values
        rVal = np.mean(r)
        gVal = np.mean(g)
        bVal = np.mean(b)

        # replacing the current rgb values with average of rgb values in a cluster
        for coordinate in currentClusterCoordinates: # loop starts here
            inputImageCopy[coordinate[0], coordinate[1], 0] = rVal
            inputImageCopy[coordinate[0], coordinate[1], 1] = gVal
            inputImageCopy[coordinate[0], coordinate[1], 2] = bVal
        # loop ends here
    # loop ends here
    print("Computing RGB values of the final image...Done")

    # saving the output image
    if os.path.exists('whiteTowerKmeans.png'):
        print("File Already Exists. Overwriting New Results")
        os.remove('whiteTowerKmeans.png')
        cv2.imwrite('whiteTowerKmeans.png', inputImageCopy)
    else:
        print("Saving Segmented Image by the name: 'whiteTowerKmeans.png'")
        cv2.imwrite('whiteTowerKmeans.png', inputImageCopy)

## Main Function

In [6]:
def main():

    # loading input image
    white_tower = cv2.imread('white-tower.png')

    # calling the KMeans Function
    KMeans(white_tower, k = 10, maxIterations = 100)

main()

Generating Random Centroids...
Generating Random Centroids...Done

Finding Optimal Centroids...

Current Centroids (Cluster 1 - 10)(Format: [x, y, r, g, b]): 
[[ 262  493  153  150  142]
 [ 149  543  161  137  119]
 [ 190  705  154  146  133]
 [ 213  417  160  152  139]
 [   7 1228  161  122   94]
 [ 354  644  105   94   67]
 [ 595  814   56   55   64]
 [ 528 1232  136  164  198]
 [ 491  753   19   27   27]
 [ 534  305   74  106  117]]

Optimal Centroids Not Found. Reiterating...

Current Centroids (Cluster 1 - 10)(Format: [x, y, r, g, b]): 
[[280 726 150 149 148]
 [107 654 163 136 112]
 [338 684 140 142 145]
 [185 452 170 157 141]
 [108 651 147 120  94]
 [389 662 107  98  76]
 [541 731  50  64  72]
 [544 711 136 166 197]
 [583 589  16  24  27]
 [539 594  71 101 116]]

Optimal Centroids Not Found. Reiterating...

Current Centroids (Cluster 1 - 10)(Format: [x, y, r, g, b]): 
[[237 722 154 148 142]
 [ 97 675 163 135 112]
 [525 588 120 141 165]
 [201 423 172 160 146]
 [131 604 144 118  93