# Segmentation


## Import 

In [1]:
import pandas
import numpy as np
import random
from scipy.signal import convolve2d
import cv2 

ModuleNotFoundError: No module named 'cv2'

## k-means algorithm via lloyd's algorithm
* For grey images
* based on lloyds algorithm from slides
* the feature used must be pixel intensity

In [None]:
def Kmeans_lloyd(k, image, random_seed, patience = 2):

    random.seed(random_seed) # set random seed

    # ectract intensity values, the relative position of the pixels does not matter
    shape = image.shape
    pixels = image.flatten().reshape(-1,1) # 1d array of pixels, shape (N, 1)
    centroids = np.random.uniform(pixels.min(), pixels.max(), size = k).reshape(1, -1) # shape (1, N)


    change_count = 0 # part of the stopping condition

    while True:

        # compute distance for all points to all centroids
        distances = np.abs(pixels - centroids) # (N,1) x (1, k) = (N , k)

        #put each pixel in the cluster belonging to the nearest centroid
        clusters = np.argmin(distances, axis= 1)

        # recompute centroids (position)
        new_centroids = np.zeros((1,k)) # empty numpy array
        
        for i in range(k):
                cluster_pixels = pixels[clusters == i]
                new_centroids[0, i] = cluster_pixels.mean()
                    
        

        # STOPPING CONDITION
        if np.allclose(new_centroids, centroids): # checks if the values have moved within a small margin. It returns True if the centroids have stopped moving
            change_count += 1
        else:
            change_count =0

        if change_count >= patience:
            break

        centroids = new_centroids
    
    # save points to points to clusters
    clusters = clusters.reshape(shape)

    return clusters, centroids


# Otsu thresholding algorithm
* Based on the 1979_otsu_IEEESys.pdf document and the slides segmentation.pdf

It was pretty hard to understand the algorithm, so i have written which equation from the 1979_otsu_IEEESys paper, when relevant.


In [None]:
def Otsu(image):

    # Compute histogram
    histogram, bin_edges = np.histogram(image, bins=256, range=(0, 256))

    # Normalize historgram and regard it as a probability distribution P(i) = h(i) /N
    p_i = histogram / np.sum(histogram) 

    total_mean = np.mean(image) # mean intensity of the image

    max_variance = 0 # initial maximum variance
    threshold = 0   # initial threshold
    w0 = 0  # initial  probability for class 0
    w1 = 0  # initial probability for class 1 
    L = len(p_i)  # number of bins = 256

    for i in range(L):
        w0 += p_i[i]  #  probability for class 0 cumulative (equation 2)
        w1 = 1 - w0  #  probability for class 1 (equation 3)

        if w0 == 0 or w1 == 0:
            continue

        mu_k = i * p_i[i]  # cumulative mean up to bin i

        mu0 =  mu_k / w0  # mean for class 0 (equation 4)

        mu1 = (total_mean - mu_k) / w1  # mean for class 1 (equation 5) 

        # Between class variance
        variance = w0 * w1 * (mu1 - mu0) ** 2  #(equation 14)

        if variance > max_variance: # find the maximum variance and corresponding threshold
            max_variance = variance
            threshold = i

    return threshold

## cleaning/denoising algorithm

after reading the description i realised that you could basically just perform a convolution operation using a kernel to sum, and then choosing the class of the given pixel by using the threshold

In [None]:
def denoising_algorithm(image, vote_threshold):

  kernel = np.array([
        [1, 1, 1],
        [1, 0, 1],
        [1, 1, 1]
    ])

  votes = convolve2d(image, kernel, mode='same', boundary='fill', fillvalue=0)  # returns the votes for each pixel

  denoised_image = (votes >= vote_threshold).astype(np.uint8)

  return denoised_image
    


## Feature extraction function

In [None]:
def grey_scale_conversion(image):
    # Convert to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return gray_image

# Import images

## Perform segmentation