<a href="https://colab.research.google.com/github/radwaahmed20112000/Image-Segmentation/blob/main/Assignment_2_Image_Segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Data Preparation**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.io import imread, imshow
import sys
import math
import scipy.io
import cv2
import os
import zipfile
import io
from PIL import Image

In [None]:
zf = zipfile.ZipFile('data_.zip')
zf.extractall('dataSet')

In [None]:
PATH = '/content/dataSet/data_/images/'

Read image and create data frame

In [None]:
df = None
df1 = None

In [None]:
def prepare_image(image_path, bonus=False):

  global df, df1

  colourImg = Image.open(image_path)
  colourPixels = colourImg.convert("RGB")
  size = colourImg.size[::-1]

  colourPixels = colourPixels.resize(size, Image.ANTIALIAS)
  colourArray = np.array(colourPixels.getdata()).reshape(size + (3,))

  indicesArray = np.moveaxis(np.indices(size), 0, 2)
  allArray = np.dstack((indicesArray, colourArray)).reshape((-1, 5))

  df1 = pd.DataFrame(allArray, columns=["row", "column", "red", "green", "blue"])
  if not bonus:
    df = df1.iloc[:,2:]
  else:
    df = df1
    df['row'] = df['row'] * 0.02
    df['column'] = df['column'] * 0.02

In [None]:
def read_segmentations(path, i):
  file = scipy.io.loadmat(path + ".mat")

  if len(file['groundTruth'][0]) == i:
    return -1

  segmap = file['groundTruth'][0][i][0][0][0]
  segmap = segmap.flatten()

  max_class_label = max(segmap)

  df1['labels'] = segmap

  return max_class_label

# **Ground Truth Visualisation**

Plot ground truth of labels

In [None]:
def visualize_image(img):

  file = scipy.io.loadmat(img + ".mat")
  image = img + ".jpg"

  segmap1 = file['groundTruth'][0][0][0][0][0]
  segmap2 = file['groundTruth'][0][1][0][0][0]
  segmap3= file['groundTruth'][0][2][0][0][0]
  segmap4 = file['groundTruth'][0][3][0][0][0]
  segmap5 = file['groundTruth'][0][4][0][0][0]
  img2=plt.imread(image)

  fig = plt.figure(figsize=(20, 40))
    
  rows = 6
  columns = 3

  fig.add_subplot(rows, columns, 1)
    
  plt.imshow(img2)
  plt.title("Image",size = 30)


  fig.add_subplot(rows, columns, 4)
    
  plt.imshow(segmap1)
  plt.title("Segmentation 1",size = 20)

  fig.add_subplot(rows, columns, 5)
    
  plt.imshow(segmap2)
  plt.title("Segmentation 2",size = 20)

  fig.add_subplot(rows, columns, 6)
    
  plt.imshow(segmap3)
  plt.title("Segmentation 3",size = 20)

  fig.add_subplot(rows, columns, 7)
    
  plt.imshow(segmap4)
  plt.title("Segmentation 4",size = 20)


  fig.add_subplot(rows, columns, 8)
    
  plt.imshow(segmap5)
  plt.title("Segmentation 5",size = 20)

#**K_means Algorithm**

In [None]:
import random
def random_initialize(D, U):
 for i in range (U.shape[0]):
   for j in range(U.shape[1]):
     U[i][j] = random.uniform(D.min().min(), D.max().max())

In [None]:
def mean_calc(C, D):

  mu = np.zeros(len(D.columns))
  for c in C:
    row = D.iloc[c]
    mu = mu + row
    
  return mu/3

In [None]:
def is_convergent(U, U_new, thres):
  for i in range(len(U)):
    if  np.linalg.norm(U[i] - U_new[i]) > thres:
      return False
  return True

In [None]:
from sklearn.metrics.pairwise import euclidean_distances

def cluster_assignment(D, U, C,labels):

   distances = euclidean_distances(D, U)

   for count, l in enumerate(distances):
      cluster_number = np.argmin(l)
      C[cluster_number].append(count)
      labels[count] = cluster_number

In [None]:
def k_means(D, K, T, th = 0):
  col = len(D.columns)
  
  U = np.zeros((K, col), dtype = float)
  random_initialize(D, U)
  clusters = []
  
  labels = [0] * 154401
  
  for t in range(T):
    C = []
    
    for i in range(K):
      C.append([])
      
    cluster_assignment(D, U, C,labels)
    U_new = np.zeros((K, col), dtype = float)
    for i in range (K):
      U_new[i] = mean_calc(C[i], D)
    
    if is_convergent(U, U_new, th):
      return C

  clusters = C
  return clusters,labels

# **Entropy**

In [None]:
max_class_label = 1

In [None]:
def cluster_entropy(cluster):

  entropy = 0
  n_cluster = len(cluster)
  cluster = np.array(cluster)
  for i in range(max_class_label + 1):

    ni = cluster[cluster[:, -1] == i+1, :].shape[0]

    prob = ni/n_cluster

    if prob != 0:
      entropy -= prob * math.log2(prob)

  return entropy

In [None]:
def calculate_segmentation_entropy(clusters):

  n_dataset = df.shape[0]
  total_entropy = 0

  for cluster in clusters:

    if len(cluster) == 0:
      continue
      
    prob = len(cluster)/n_dataset
    
    total_entropy += prob * cluster_entropy(cluster) 

  return total_entropy

In [None]:
def calculate_entropy(image_path, segmentations):
  global max_class_label

  average = 0

  for i in range(5):
  
    # print("Entropy For Segmentation " + str(i+1) + " :")
  
    max_class_label = read_segmentations(image_path, i)
    if max_class_label == -1 : break
  
    entropy = calculate_segmentation_entropy(segmentations)
    # print(entropy)

    average += entropy
  total_avg = average/5
  print(total_avg)
  return total_avg

# **F-Measure**

In [None]:
def get_values_of_labels(path, i):
  file = scipy.io.loadmat(path + ".mat")
  if len(file['groundTruth'][0]) == i : return -1, -1

  segmap = file['groundTruth'][0][i][0][0][0]

  segmap = segmap.flatten()
  unique,counts = np.unique(segmap, return_counts=True)
  
  return unique,counts

In [None]:
def get_segmentation(path, i):
  file = scipy.io.loadmat(path + ".mat")

  segmap = file['groundTruth'][0][i][0][0][0]
  segmap = segmap.flatten()

  return segmap

In [None]:
def change_clusters(segmentation,labels):

  j = 0
  for i in labels:
    segmentation = np.where(segmentation == i, j, segmentation) 
    j+=1

  return segmentation  

In [None]:
def prepare_for_f_measure(clusters,segmentation):

  i = 0;
  for cluster in clusters:
    for j in range(0, len(cluster)):
      cluster[j] = i
    i+=1

  new_clusters = []
  for sublist in clusters:
    for cluster in sublist:
      new_clusters.append(cluster)

  clusters = np.array(new_clusters)
  clusters = np.vstack((clusters,segmentation))

  return clusters

In [None]:
def generate_clusters_for_f_measure(clusters, labels):

  true_clusters = []
  predicted_clusters = []

  for i in range(0,labels.size):
    predicted_clusters.append([])
    true_clusters.append([])

  for i in range(0,len(clusters[0])):
      predicted_clusters[clusters[1][i]].append(clusters[0][i])
  
  for i in range(0,labels.size):
    for j in range(0, len(predicted_clusters[i])):
      true_clusters[i].append(i)

  return true_clusters,predicted_clusters

In [None]:
from sklearn.metrics import f1_score
from sklearn.metrics import fbeta_score
def f_measure(true_clusters, predicted_clusters):

  f = 0
  for i in range(0,len(true_clusters)):
    f += np.amax(fbeta_score(true_clusters[1], predicted_clusters[1], average=None, beta=0.5))

  return f/len(true_clusters)

In [None]:
import collections
def accuracy(unique,count,clusters):
  elements_count = collections.Counter(clusters)
  print(elements_count)
  return

# **Segmentation**

In [None]:
from PIL import Image
import numpy as np
from random import randint

In [None]:
def create_segment(clusters, D):
  segmentations = []

  for cluster in clusters:
    segmentation = []

    for c in cluster:
      row = D.iloc[c]
      segmentation.append(row)

    segmentations.append(segmentation)
    
  return segmentations

In [None]:
from pandas import *

def plot_segmentations(labels, image_path):
  im = Image.open(image_path)
  im = im.convert('RGB')
  
  
  labels = np.array(labels)
  labels = labels.reshape((481, 321))

  plt.imshow(labels)
  plt.show()

# **K-ways Implementation**

In [None]:
import pandas as pd 
from sklearn.neighbors import kneighbors_graph
from scipy.sparse import csr_matrix 
from scipy import linalg 
import scipy.sparse
from scipy.sparse.linalg import inv 
from scipy.sparse.linalg import eigs 
from scipy.sparse import diags
from sklearn.preprocessing import normalize

In [None]:
def k_ways(X, k, t):

  A = kneighbors_graph(X, 5, mode='connectivity', include_self=False)

  delta =np.empty(X.shape[0])
  delta.fill(k)
  delta = diags(delta)
 
  
  inv_delta = inv(delta)

  L = delta - A
  B = L.multiply(inv_delta)

  eigen_values, U = eigs(B, k=k)
  U = U.real

  Y = normalize(U, norm='l1', axis=1)
  Y = pd.DataFrame(Y)
  
  return k_means(Y, k, t)

In [None]:
def cluster_data(model, data, k, t):
  clusters ,labels = model(data, k, t)
  return create_segment(clusters, df1),labels

# **Big Picture**

In [None]:
images = ["12003", "12074", "15004", "15088", "16052"]

Select a set of five images and display their corresponding ground truth against your segmentation results using K-means at K=5.

In [None]:
for image in images:
  prepare_image(image + ".jpg")
  segmentations = cluster_data(k_means, df, 5, 2)
  print("Model Segmenatations:")
  plot_segmentations(segmentations, image)
  print("Ground Truth:")
  visualize_image(image)

Select the same five images and display their corresponding ground
truth against your segmentation results using Normalized-cut for the 5-NN graph, at K=5. 

In [None]:
#SARAAAAAAAAAAAAAAAAAAAAAAAAA RUN THIS
for image in images:
  prepare_image(image + ".jpg")
  segmentations = cluster_data(k_ways, df, 5, 2)
  print("Normalized Cut Segmenatations:")
  entropy = calculate_entropy(image, segmentations)
  print("Ground Truth:")
  visualize_image(image)

Select the same five images and contrast your segmentation results using Normalized-cut for the 5-NN graph, at K=5 versus using K-means at K=5.

In [None]:
for image in images:
  prepare_image(image + ".jpg")
  segmentations, labels = cluster_data(k_means, df, 5, 2)
  print("k-means Segmenatations:")
  entropy = calculate_entropy(image, segmentations)
  segmentations, labels = cluster_data(k_ways, df, 5, 2)
  print("Normalized-Cuts Segmenatations:")
  entropy = calculate_entropy(image, segmentations)

k-means Segmenatations:
Entropy For Segmentation 1 :
0.022728536162962155
Entropy For Segmentation 2 :
0.022728536162962155
Entropy For Segmentation 3 :
4.642851305939226
Entropy For Segmentation 4 :
0.022728536162962155
Entropy For Segmentation 5 :
0.029904565549490045
0.9481882959955206




Normalized-Cuts Segmenatations:
Entropy For Segmentation 1 :
1.8517122506428028
Entropy For Segmentation 2 :
1.8517122506428028
Entropy For Segmentation 3 :
1.8517122506428028
Entropy For Segmentation 4 :
1.8517122506428028
Entropy For Segmentation 5 :
1.8517122506428028
1.8517122506428028
k-means Segmenatations:
Entropy For Segmentation 1 :
0.0
Entropy For Segmentation 2 :
0.0005222669713557943
Entropy For Segmentation 3 :
0.0005222669713557943
Entropy For Segmentation 4 :
0.0
Entropy For Segmentation 5 :
0.0
0.0002089067885423177




Normalized-Cuts Segmenatations:
Entropy For Segmentation 1 :
1.192740092159276
Entropy For Segmentation 2 :
2.1657912013750167
Entropy For Segmentation 3 :
2.1657912013750167
Entropy For Segmentation 4 :
2.165683551948847
Entropy For Segmentation 5 :
2.1657912013750167
1.9711594496466347
k-means Segmenatations:
Entropy For Segmentation 1 :
1.5405950355979479
Entropy For Segmentation 2 :
1.8070024758406813
Entropy For Segmentation 3 :
0.836443922495028
Entropy For Segmentation 4 :
1.7673279033072342
Entropy For Segmentation 5 :
1.4621676312946863
1.4827073937071156




Normalized-Cuts Segmenatations:
Entropy For Segmentation 1 :
4.108838067628112
Entropy For Segmentation 2 :
4.108838067628112
Entropy For Segmentation 3 :
2.167281505047353
Entropy For Segmentation 4 :
4.108838067628112
Entropy For Segmentation 5 :
4.108838067628112
3.7205267551119605
k-means Segmenatations:
Entropy For Segmentation 1 :
0.24160106292971112
Entropy For Segmentation 2 :
0.18099873666712196
Entropy For Segmentation 3 :
0.27863796289658177
Entropy For Segmentation 4 :
0.1271633209617461
Entropy For Segmentation 5 :
0.2094149685483987
0.2075632104007119




Normalized-Cuts Segmenatations:
Entropy For Segmentation 1 :
0.47359067561790696
Entropy For Segmentation 2 :
0.4699124913879095
Entropy For Segmentation 3 :
0.47359067561790696
Entropy For Segmentation 4 :
0.45978143555369927
Entropy For Segmentation 5 :
0.47359067561790696
0.4700931907590659
k-means Segmenatations:
Entropy For Segmentation 1 :
0.04821198356493735
Entropy For Segmentation 2 :
0.02885880706214397
Entropy For Segmentation 3 :
0.01745115914721027
Entropy For Segmentation 4 :
0.010118610155292718
Entropy For Segmentation 5 :
0.021982147070198684
0.025324541399956595




Normalized-Cuts Segmenatations:
Entropy For Segmentation 1 :
1.7260806972279037
Entropy For Segmentation 2 :
1.7260806972279037
Entropy For Segmentation 3 :
1.7260806972279037
Entropy For Segmentation 4 :
1.0602716832352443
Entropy For Segmentation 5 :
1.7260806972279037
1.592918894429372


In [None]:
from sklearn.cluster import SpectralClustering

for image in images:
  prepare_image(image + ".jpg")
  segmentations = cluster_data(k_means, df, 5, 2)
  print("k-means Segmenatations:")
  plot_segmentations(segmentations, image)

  clustering = SpectralClustering(n_clusters=5, n_neighbors=5,
      random_state=0).fit(df)
  segementations = create_segment(clustering, df1)
  plot_segmentations(segmentations, image)

# **Testing to Get The Best K (50 Images)**

In [None]:
def train(folder):

  count = [0, 0, 0, 0, 0]
  ks = [3, 5, 7, 9, 11]
  l = 0
  entropy_average = [0, 0, 0, 0, 0]
  f_measure_average = [0, 0, 0, 0, 0]
  for filename in os.listdir(folder):
    print("Image " + filename)
    l += 1
    image = folder + filename
    image_mat = '/content/dataSet/data_/ground_Truth/' +  filename[:-4]
    prepare_image(image)

    k_entropys = []
    f_measures = []
    
    counter = 0
    for k in ks:
      print("k = " + str(k))
      segmentations, labels = cluster_data(k_means, df, k, 1)
      entropy = calculate_entropy(image_mat, segmentations)
      print("Average Entropy = " + str(entropy))
      k_entropys.append(entropy)

      average = 0
      for i in range(5):
        # print("F-Score For Segmentation " + str(i+1) + " :")
        unique, c = get_values_of_labels(image_mat, i)
        if isinstance(c, int): break
        seg = get_segmentation(image_mat, i)
        seg = change_clusters(seg, unique)
        clusters = prepare_for_f_measure(segmentations, seg)

        true_clusters,predicted_clusters = generate_clusters_for_f_measure(clusters, unique)
      
        f = f_measure(true_clusters, predicted_clusters)
        # print(f)
        average += f
      print("Average F-Measure = " + str(average/5))
      f_measures.append(average/5)
      entropy_average[counter] += entropy
      f_measure_average[counter] += average/5
      counter += 1

    
    index_min_entropy = k_entropys.index(min(k_entropys))
    index_max_f = f_measures.index(max(f_measures))
    count[index_min_entropy] += 1
    count[index_max_f] += 1

  
  index_max_count = count.index(max(count))
  print("Best K = " + str(ks[index_max_count]))
  print("Average entropy for each k = [3, 5, 7, 9, 11] :")
  print(entropy_average/50)
  print("Average F-measure for each k = [3, 5, 7, 9, 11] :")
  print(f_measure_average/50)

In [None]:
train(PATH)

In [None]:
prepare_image("353013.jpg", bonus=True)
segmentations = cluster_data(k_means, df, 4, 1)
print(calculate_entropy("353013", segmentations))

In [None]:
prepare_image("353013.jpg", bonus=False)
segmentations, labels = cluster_data(k_means, df, 4, 1)
print(calculate_entropy("353013", segmentations))

_____________________________________________________


Good Results and Bad Results

In [None]:
k = 5

bad_results = ["15004", "16052", "55067"]
good_results = ["12003", "124084", "25098"]

print("Bad Results: ")
# for img in bad_results:
  
