In [1]:
## Import necessary libraries here (You can add libraries you want to use here)
from skimage.util import img_as_float
from skimage import io, color
from skimage import draw
import matplotlib.pyplot as plt
import argparse, time
import math, copy
from math import sqrt
import numpy as np
import scipy.io
import cv2
from google.colab.patches import cv2_imshow
from PIL import Image, ImageDraw
import time
from sklearn.mixture import GaussianMixture
import plotly.graph_objects as go

In [2]:
def show_image(img, scale=1.0):
    plt.figure(figsize=scale* plt.figaspect(1))
    plt.imshow(img, interpolation='nearest')
    plt.gray() 
    plt.axis('off')
    plt.show()

# Part 1: SLIC Superpixels (50 Points)

## Overview

Superpixel algorithms group pixels into perceptually meaningful regions while respecting potential object contours, and thereby can replace the rigid pixel grid structure. Due to the reduced complexity, superpixels are becoming popular for various computer vision applications, e.g., multiclass object segmentation, depth estimation, human pose estimation, and object localization.

In this problem, you will implement a simple superpixel algorithm called Simple Linear Iterative Clustering (SLIC) that clusters pixels in the five-dimensional color and pixel coordinate space (e.g., r, g, b, x, y). The algorithm starts with a collection of K cluster centers initialized at an equally sampled regular grid on the image of N pixels. For each cluster, you define for a localized window 2S x 2S centered at the cluster center, where S = sqrt(N/K) is the roughly the space between the seed cluster centers. Then, you check whether the pixel within the 2S x 2S local window should be assigned to the cluster center or not (by comparing the distance in 5D space to the cluster center). Once you loop through all the clusters, you can update the cluster center by averaging over the cluster members. Iterate the pixel-to-cluster assignment process till convergence or maximum iterations reached.

Reference Paper: http://www.kev-smith.com/papers/SMITH_TPAMI12.pdf

You can refer to the following slide covered in Lecture_17_Segmentation.pptx here: https://drive.google.com/file/d/1bRmNBXgK1_kWcY-nhw_As5fP10aRhuDC/view?usp=share_link






### Data
**WARNING: Colab deletes all files everytime runtime is disconnected. Make sure to re-download the inputs when it happens.**

In [None]:
# Download Data -- run this cell only one time per runtime
!gdown 1jbg2VMZ9yAJMHQNRCTgqZM1PyQRtcPyV
!unzip "/content/Part1_SLIC.zip" -d "/content/" 

### Helper Functions:

In [90]:
# A class to initialize the super pixels, of the form - [l,a,b,h,w].
class SuperPixel(object):

    def __init__(self, l=0, a=0, b=0, h=0, w=0):
        self.update(l, a, b, h, w)
        self.pixels = []

    def update(self, l, a, b, h, w):
        self.l = l
        self.a = a
        self.b = b
        self.h = h
        self.w = w

# Function which returns an object of class SuperPixel
def make_SuperPixel(h, w,img):
    return SuperPixel(img[h,w][0],img[h,w][1],img[h,w][2], h, w)

def display_clusters(img, clusters):
    image = np.copy(img)
    unq_clusters = []
    count = 0
    for c in clusters:
        for p in c.pixels:
            image[p[0],p[1]][0] = c.l
            image[p[0],p[1]][1] = c.a
            image[p[0],p[1]][2] = c.b
        image[int(c.h), int(c.w)][0] = 0
        image[int(c.h), int(c.w)][1] = 0
        image[int(c.h), int(c.w)][2] = 0
        count += 1
        cur_cl = [int(c.h), int(c.w)]
        if cur_cl not in unq_clusters:
          unq_clusters.append(cur_cl)
    rgb_arr = color.lab2rgb(image) 
    show_image(rgb_arr)

### Code (15 pts)

In [100]:
# Functions for Simple Linear Iterative Clustering (SLIC)

def initialize_cluster_centers(S, image, img_h, img_w, clusters):
    # Initialize "clusters" by sampling pixels at an equally sampled regular grid (distanced by S)
    # on the image of N pixels. N = img_h X img_w. Hint: clusters.append(make_superPixel(h, w, image)) to create each cluster/super-pixel.
    for h in range(1, img_h-1, S):
      for w in range(1, img_w-1, S):
        clusters.append(make_SuperPixel(h, w, image))
    return clusters

def relocate_cluster_center_at_lowgrad(clusters, image):
    # To Do:
    # for each cluster c, reassign cluster to the pixel having smallest gradient value.
    # Step 1: compute gradient wrt cluster-center c.h, c.w in 3X3 neighborhood of cluster center.
    # Step 2: Similarly, compute gradient for each pixel in 3X3 spatial neighborhood of cluster c.
    # Step 3. Reassign cluster-center to the pixel (x,y) having the lowest gradient. 
    # Hint: c.update(img[x,y][0], img[h,w][1], img[x,y][2], x, y)
    gray = image[:,:,0]
    for cluster in clusters:
      grad_x = gray[cluster.h-1:cluster.h+2, cluster.w-1:cluster.w+2] - gray[cluster.h-1:cluster.h+2, cluster.w:cluster.w+3]
      grad_y = gray[cluster.h-1:cluster.h+2, cluster.w-1:cluster.w+2] - gray[cluster.h:cluster.h+3, cluster.w-1:cluster.w+2]
      grad = abs(grad_x) + abs(grad_y)
      cluster_center = np.argmin(grad)
      cluster_center = [cluster_center//3, cluster_center%3]
      new_h = cluster.h+cluster_center[0]-1
      new_w = cluster.w+cluster_center[1]-1
      img_pixel = image[new_h, new_w]
      cluster.update(img_pixel[0], img_pixel[1], img_pixel[2], new_h, new_w)
    return None

# def assign_cluster(clusters, S, image, img_h, img_w, cluster_tag, dis):
def assign_cluster(clusters, S, image, img_h, img_w, dis):
    # To Do: Compare each pixel to cluster center within 2S pixel distance and assign to nearest cluster using the "distance metric"
    # (involving both color and spatial dimensions of pixel and cluster, 
    # Hints: 
    # 1. use "dis" matrix for comparing distances.
    # 2. You can use c.pixels.append((h, w)) to keep track of cluster-assignments.
    global l_range, a_range, b_range
    two_S = 2*S
    four_S_sq = two_S**2
    for h in range(img_h):
      for w in range(img_w):
        min_distance = np.inf
        selected_cluster = None
        for cluster in clusters:
          diff_h = h-cluster.h
          diff_w = w-cluster.w
          if (diff_h**2 + diff_w**2) > four_S_sq:
            continue
          distance = (abs(diff_h)/img_h + abs(diff_w)/img_w)/2 +\
                     3*(abs(image[h,w][0]-cluster.l)/l_range + abs(image[h,w][1]-cluster.a)/a_range +\
                     abs(image[h,w][2]-cluster.b)/b_range)/3
          if distance < min_distance:
            min_distance = distance
            selected_cluster = cluster
        selected_cluster.pixels.append([h, w])
    return None

def update_clusters(clusters):
    # To Do: For each cluster, update the cluster center with mean of the pixels assigned (c.pixels)
    for cluster in clusters:
      # Get the center location at sub-pixel level.
      (new_h, new_w) = np.mean(cluster.pixels, axis=0)
      nbd = image[int(new_h//1):int((new_h+2)//1), int(new_w//1):int((new_w+2)//1)]
      # Get the lab-value at the sub-pixel level using bilinear interpolation.
      new_l = (new_h%1)*((new_w%1)*nbd[1,1,0]+(1-(new_w%1))*nbd[1,0,0]) + (1-(new_h%1))*((new_w%1)*nbd[0,1,0]+(1-(new_w%1))*nbd[0,0,0])
      new_a = (new_h%1)*((new_w%1)*nbd[1,1,1]+(1-(new_w%1))*nbd[1,0,1]) + (1-(new_h%1))*((new_w%1)*nbd[0,1,1]+(1-(new_w%1))*nbd[0,0,1])
      new_b = (new_h%1)*((new_w%1)*nbd[1,1,2]+(1-(new_w%1))*nbd[1,0,2]) + (1-(new_h%1))*((new_w%1)*nbd[0,1,2]+(1-(new_w%1))*nbd[0,0,2])
      cluster.update(new_l, new_a, new_b, new_h, new_w)
    return None

def compute_res_error(old_clusters, new_clusters):
    error = 0.0
    # error = Compute L1 distance between previous cluster-centres and new cluster centers.
    assert len(old_clusters) == len(new_clusters), 'Cluster sizes ({}&{}) not matching! :('.formta(len(old_clusters),len(new_clusters))
    for i in range(len(old_clusters)):
      error += abs(old_clusters[i].h-new_clusters[i].h)+abs(old_clusters[i].w-new_clusters[i].w) + \
               abs(old_clusters[i].l-new_clusters[i].l) + abs(old_clusters[i].a-new_clusters[i].a) + abs(old_clusters[i].b-new_clusters[i].b)
    return error

def slic_algorithm(S, image, img_h, img_w, clusters,dis,k):

    clusters = initialize_cluster_centers(S, image, img_h, img_w, clusters)

    # Move centers to position in 3x3 window with smallest gradient.
    relocate_cluster_center_at_lowgrad(clusters, image)

    res_err = 123456789.0 # init residual_error with a very large value (choose as per your understanding.)
    threshold = 10000*k/64

    iter = 0
    iter_times = []
    while(1):
      start_time = time.time()
      iter += 1
      assign_cluster(clusters, S, image, img_h, img_w, dis)
      old_clusters = copy.deepcopy(clusters)
      update_clusters(clusters)
      res_err = compute_res_error(old_clusters, clusters)
      iter_time = time.time() - start_time      
      print('Res Err after {} iters = {}. Last iter took {} s.'.format(iter, res_err, iter_time))
      iter_times.append(iter_time)
      if (res_err<threshold) or (iter>25):
        break

    return clusters, iter_times

In [None]:
all_iter_times = []
for img_name in [10081]: #10081, 3063, 70011, 80085, 43033, 20069, 65084, 49024, 70090, 69000
  img_path = "/content/Part1_SLIC/BSD_data/images/{}.jpg".format(img_name)

  # Load image and convert it from an unsigned 8-bit integer to a floating point data type.
  image = img_as_float(io.imread(img_path))

  # convert RGB to LAB
  image = color.rgb2lab(image)
  l_range = np.amax(image[:,:,0]) - np.amin(image[:,:,0])
  a_range = np.amax(image[:,:,1]) - np.amin(image[:,:,1])
  b_range = np.amax(image[:,:,2]) - np.amin(image[:,:,2])
  img_h = image.shape[0] #  Height
  img_w = image.shape[1] #  Width

  # k: Number of clusters/superpixels.
  for k in [64]:#, 256, 1024]:
    N = img_h * img_w  # Total pixels in the image
    S = int(math.sqrt(N /k)) # Average size of each superpixel

    clusters = []
    dis = np.full((img_h, img_w), np.inf) # Distance bwteen pixels and cluster is initialized as infinity at the beginning.

    clusters, iter_times = slic_algorithm(S, image, img_h, img_w, clusters, dis, k)
    iter_time = np.mean(iter_times)
    print('Mean runtime per iter for K={} for image {} = {}'.format(k, img_name, iter_time))
    all_iter_times.append(iter_time)
    display_clusters(image, clusters)
print('Mean runtime across all iterations across all k values across 10 imgs =', np.mean(all_iter_times))

### Write-up (35 pts)
* a) [5 points] Explain your distance function for measuring the similarity between a pixel and cluster in the 5D space.
* b) [5 points] Choose one image, try three different weights on the color and spatial feature and show the three segmentation results. Describe what you observe.
* c) [5 points] Choose one image, show the error (1) at the initialization and (2) at convergence. Note: error - distance to cluster center in the 5D space.
* d) [10 points] Choose one image and show three superpixel results with different number of K, e.g., 64, 256, 1024 and run time for each K.
* e)  [10 points] Run your algorithms on the subset (50 images) of Berkeley Segmentation Dataset (BSD) with K = 64, 256 and 1024 and report averaged run-time per image for the BSD.


- a) `distance_in_5D = color_weight * (dis_in_l + dis_in_a + dis_in_b)/3 + spatial_weight * (dis_in_h + dis_in_w)/2`   --- (1)
  - `dis_in_x` is the normalized distance in the dimension corresponding to `x`. i.e, `dis_in_x` can vary from `0` to `1`.
  - Which is achieved by using the following formmula: `Xi = (Xi-min_X) / (max_X-min_X)`
  - Finally, the expression in (1) calculates distance by giving custom weights to the spatial & color features (More in next section of the write-up). 
  - Since, there are `3` color dimensions, the sum of the distances in color dimensions is divided by `3`. Similarly, the sum of the distances in spatial dimensions is divided by `2` - so that if same value is chosen for `color_weight` & `spatial_weight`, color & spatial features are weighted equally.
- b) Three different weights are experimented with (k=64 for all the below imgs):
  - (i)  Higher weight to color features (`color_weight=2` & `spatial_weight=1`):
    - <img src="https://drive.google.com/uc?id=1wh-pakEeC30ZlSm6avfiCnoDitW-ci7f" width="400"/>
  - (ii) Higher weight to spatial features (`color_weight=1` & `spatial_weight=2`):
    - <img src="https://drive.google.com/uc?id=17uLylnz-bEwtVHR5jakx86dIFU_Qq0Sg" width="400"/>
  - (ii)   Equal weights (`color_weight=1` & `spatial_weight=1`):
    - <img src="https://drive.google.com/uc?id=1jvB6UyIwyiBaocwl79ttJuKLsKUOIaAD" width="400"/>
  
  - <u>OBSERVATIONS</u>:
    - If higher weight is given to spatial features, nearby pixels are getting clustered, without a lot of consideration to the color-similarity of the pixels. The resultant image has bigger super-pixel for each color & some colors are vanishing as well. If the weight for spatial features is further increased, more & more colors are expected to vanish.
    - Higher weight to color features caused the super-pixels to be smaller in size and show more of the true colors. If the weight for color features is further increased, super pixels are expected to get even smaller & more true colors are expected to be retained.
    - Finally, equal weights to spatial & color features finds the middle ground between both.

- c) For the image 10081.jpg for `k=64` and with equal weights to spatial & color features, the change in cluster coordinates in the 5D space due to the 1st iteration of the algo & due to the last iteration are `1796.94` & `31.19` respectively. This shows that the algorithm is converging.
- d) The following are the superpixel results along with average run-time per iteration for the image 10081.jpg with:
  - `k=64` (runtime per iteration = `31.14` s):
    - <img src="https://drive.google.com/uc?id=1jvB6UyIwyiBaocwl79ttJuKLsKUOIaAD" width="400"/>
  - `k=256` (runtime per iteration = `68.12` s):
    - <img src="https://drive.google.com/uc?id=1PxAyy6YS8TIFfmibENXXcapfgsSEPXBM" width="400"/>
  - `k=1024` (runtime per iteration = `225.6` s):
    - <img src="https://drive.google.com/uc?id=1YSIJ5WKUqdtlgnJOYtQp5ytFumTbuGPK" width="400"/>

- e) The average runtime per image per iteration is `104.07` s.
- The average runtime per image per iteration for:
  - `k=64`   is `29.2` s
  - `k=256`  is `66.4` s
  - `k=1024` is `216.6` s



NOTE:
1) I am updating cluster centers at sub-pixel level after every iteration and getting the pixel values using BILINEAR INTERPOLATION.

### Extra Credit (20 pts)
* f) [upto 10 points] Implement and report two types of metrics (1) boundary recall and (2) under-segmentation error with K = 64, 256 and 1024 on the BSD (last part of write-up).
* g) [upto 10 points] Try to improve your result from first part of extra-credit. You may try different color space (e.g., CIELab, HSV) (See Sec 4.5 in the paper), richer image features (e.g., gradients) or any other ideas you come up with. Report the accuracy on boundary recall and under-segmentation error with K = 256. Compare the results with first-part of extra-credit and explain why you get better results.

### Hints 
- For main part, you CAN NOT use any library such as skimage / cv2 to perform or implement SLIC segmentation.
- For better visualization, you may use external function (skimage.segmentation.mark_boundaries) for your purpose.
- For EXTRA CREDIT part, you are allowed to use external library for metrics (boundary recall and under-segmentation error)

# Part 2: Graph-cut Segmentation (50 points)

## Overview

Let us apply Graph-cuts for foreground/background segmentation. In the “cat” image, you are given a rough polygon of a foreground cat. Apply graph-cut based method to see if we can get a better segmentation!

Firstly, use the provided polygon to obtain an estimate of foreground and background color likelihood. You may choose the likelihood distribution (e.g., color histograms or color mixture of Gaussians.).

Secondly, apply graph-cut code (cv2.grabcut) to do better segmentation. You can use the provided bounding-boxes and use "cv2 package" for the implementation sake. You are required to draw interesting conclusions on how it performs on different samples.

## Data 

In [3]:
# Download Data -- run this cell only one time per runtime
# !gdown 1ObpNoshjKMcB7SFvrTuoKe61IE-HG95n
!gdown 1Tw_gk0yylwl2X1leubz0mlERpxsqLiub
!unzip "/content/Part2_GraphCut.zip" -d "/content/"

Downloading...
From: https://drive.google.com/uc?id=1Tw_gk0yylwl2X1leubz0mlERpxsqLiub
To: /content/Part2_GraphCut.zip
  0% 0.00/264k [00:00<?, ?B/s]100% 264k/264k [00:00<00:00, 90.6MB/s]
Archive:  /content/Part2_GraphCut.zip
   creating: /content/Part2_GraphCut/
  inflating: /content/Part2_GraphCut/dog.jpg  
  inflating: /content/__MACOSX/Part2_GraphCut/._dog.jpg  
  inflating: /content/Part2_GraphCut/messi.jpg  
  inflating: /content/__MACOSX/Part2_GraphCut/._messi.jpg  
  inflating: /content/Part2_GraphCut/.DS_Store  
  inflating: /content/__MACOSX/Part2_GraphCut/._.DS_Store  
  inflating: /content/Part2_GraphCut/plane.jpg  
  inflating: /content/__MACOSX/Part2_GraphCut/._plane.jpg  
  inflating: /content/Part2_GraphCut/bird.jpg  
  inflating: /content/__MACOSX/Part2_GraphCut/._bird.jpg  
  inflating: /content/Part2_GraphCut/cat.jpg  
  inflating: /content/__MACOSX/Part2_GraphCut/._cat.jpg  
  inflating: /content/Part2_GraphCut/cat_poly.mat  
  inflating: /content/__MACOSX/Part2_Gr

## Code (15 pts)

In [30]:
def poly2mask(vertex_row_coords, vertex_col_coords, shape):
    fill_row_coords, fill_col_coords = draw.polygon(vertex_row_coords, vertex_col_coords, shape)
    mask = np.zeros(shape, dtype=np.bool)
    mask[fill_row_coords, fill_col_coords] = True
    return mask

# Get a normalized version of the given histograms (divide by sum)
def normalize_histograms(histograms):
  return np.float32([h / h.sum() for h in histograms])

In [86]:
def mask_for_fg(img, img_mask, fg_image):
  # to generate foreground region (and everything else masked out).
  fg_image = np.zeros_like(img)
  fg_image[img_mask] = img[img_mask]
  return fg_image

def mask_for_bg(img, img_mask):
  # generate background region (and everything else masked out).
  bg_image = np.zeros_like(img)
  bg_image[img_mask] = img[img_mask]
  return bg_image

def visualize_likelihood_map(gmm_bg, gmm_fg, img, num_gaussians):
  reshaped_img = img.reshape((-1,3))
  log_likelihood_bg = -gmm_bg.score_samples(reshaped_img).reshape((img.shape[0], -1))
  log_likelihood_fg = -gmm_fg.score_samples(reshaped_img).reshape((img.shape[0], -1))
  x=np.array([i for i in range(log_likelihood_fg.shape[1])])
  y=[i for i in range(log_likelihood_fg.shape[0])]
  y.reverse()
  y=np.array(y)
  fig = go.Figure(data=go.Heatmap(x=x, y=y, z=log_likelihood_fg, text=np.around(log_likelihood_fg, 2).astype('<U4'), texttemplate="%{text}", textfont={'color':'black'}))
  fig.update_layout(title=dict(text='NLL map for fg with a GMM of {} Gaussians'.format(num_gaussians), x=0.5), width=img.shape[1], height=img.shape[0], \
                    xaxis_visible=False, xaxis_showticklabels=False, yaxis_visible=False, yaxis_showticklabels=False)
  fig.show()
  fig = go.Figure(data=go.Heatmap(x=x, y=y, z=log_likelihood_bg, text=np.around(log_likelihood_bg, 2).astype('<U4'), texttemplate="%{text}", textfont={'color':'black'}))
  fig.update_layout(title=dict(text='NLL map for bg with a GMM of {} Gaussians'.format(num_gaussians), x=0.5), width=img.shape[1], height=img.shape[0], \
                    xaxis_visible=False, xaxis_showticklabels=False, yaxis_visible=False, yaxis_showticklabels=False)
  fig.show()
  return None

def gaussian_model(img, num_gaussians):
  # fit gaussian model on a given image.
  reshaped_img = img.reshape((-1,3))
  gmm_model = GaussianMixture(num_gaussians, random_state=2)
  gmm_labels = gmm_model.fit_predict(reshaped_img)
  return gmm_model, gmm_labels

def run_grabcut(img, bbox):
  # You can use opencv cv2.grabCut algorithm with "cv2.GC_INIT_WITH_RECT" to implement this.
  grabcut_mask = np.zeros(img.shape[:2],np.uint8)
  grabcut_mask[bbox[0]:bbox[0]+bbox[2], bbox[1]:bbox[1]+bbox[3]] = 1
  rect = (bbox[1], bbox[0], bbox[3], bbox[2])
  bgModel = np.zeros((1, 65), np.float64)
  fgModel = np.zeros((1, 65), np.float64)
  grabcut_mask, bgModel, fgModel = cv2.grabCut(img, grabcut_mask, rect, None, None, 1000, cv2.GC_INIT_WITH_RECT)
  grabcut_mask = np.where((grabcut_mask == 2)|(grabcut_mask == 0), 0, 1).astype('uint8')
  return (grabcut_mask, bgModel, fgModel)

In [87]:
### Main Block: ###

# read image
img_name = 'cat'
img_path = '/content/Part2_GraphCut/cat.jpg'
img = cv2.imread(img_path)

# read image_mask
poly = scipy.io.loadmat('/content/Part2_GraphCut/cat_poly.mat')['poly']
img_mask = poly2mask(poly[:,1], poly[:,0], (img.shape[0],img.shape[1]))


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations



In [72]:
# 1. Fit Gaussian mixture model for foreground regions.

# Generate image (fg_image) with background masked using img_mask
fg_image = None
fg_image = mask_for_fg(img, img_mask, fg_image)
num_gaussians = 18
gmm_fg, fg_gmm_labels = gaussian_model(fg_image, num_gaussians)


# 2. Fit Gaussian mixture model for background regions.
# Generate image (fg_image) with foreground masked using img_mask
bg_image = mask_for_bg(img, ~img_mask)
gmm_bg, bg_gmm_labels = gaussian_model(bg_image, num_gaussians)

In [None]:
# 3. Compute and visualize the foreground cost and background cost as an image.
# Foreground label cost: -log Pr[Image | foreground model]
# Foreground label cost: -log Pr[Image | background model]
visualize_likelihood_map(gmm_bg, gmm_fg, img, num_gaussians)

In [None]:
# 4. Run grabcut algorithm.
bboxes = {'cat':(80,80,320,250), 'dog':(50,300,600,300), 'plane': (180,80,250,800), 'messi':(10,80,600,500), 'bird':(100,100,350,250)}
bboxes = {'cat':(80,80,320,250)}
for img_name in bboxes.keys():
  img_path = '/content/Part2_GraphCut/{}.jpg'.format(img_name)
  img = io.imread(img_path)
  bbox = bboxes[img_name]
  (grabcut_mask, bgModel, fgModel) = run_grabcut(img, bbox)
  show_image(grabcut_mask)
  masked_img = img*grabcut_mask[:,:,np.newaxis]
  show_image(masked_img)

### Write-up (35 points)

- a) [5 points] Explain your foreground and background likelihood function.
- b) [10 points] Your foreground and background likelihood map. 

Display P(foreground|pixel) as an intensity map (bright = confident foreground).
- c) [15 points] Implement grabcut segmentation using cv2 package and draw insights on how the segmentation performs on different sample images (hard v/s easy).
- d) [5 points] Try grabcut algorithm with different "iterCount" to see if results improve in certain cases.

- a) I modelled the foreground & background pixels separately as a mixture of n-Gaussians (I tried various values for `n`) using the polygons provided for the ground truth of cat. This is the Maximum Likelihood Estimation step.
  - The RGB values of each pixel are used to form a 3D feature vector for each pixel. This feature vector is used for the said modelling.
- b) Then, the obtained models for the foreground & the background are used to perform probabilistic inference to generate the probability that a pixel belongs either to the foreground or to the background.
  - Later on, negative of log is applied on these probabilities to obtain the final maps.
  - The below images show the Negative log-likelihood (NLL) maps for foreground & background when different number of gaussians are used for modelling.
  - <img src="https://drive.google.com/uc?id=1wAFDZMl7iwoyvQFcGkP-ig6NdMzdudnM" width="400"/>
  - <img src="https://drive.google.com/uc?id=1LBtD0Xc5l_cjJKsPpxc55b8AppfzFz-z" width="400"/>
  - <img src="https://drive.google.com/uc?id=1-G9TzrKxDPsipn-qjCsDaRQG5NctPu0Z" width="400"/>
  - <img src="https://drive.google.com/uc?id=18rrsM66nqf3z2WNcG4gBadEppdzWVxRq" width="400"/>
  - <u> NOTE: </u>
    - Since these are NLL maps, smaller value (darker color) means higher probability.
  - <u> OBSERVATIONS: </u>
    - The sky-blue colored windowsill is predicted as background more confidently when the background is modelled using 5 Gaussians.
  - <u> INFERENCE: </u>
    - The distribution of pixel intensities of the background is multi-modal with at least 5 modes.

- c) Grabcut Segmentation:
  - <img src="https://drive.google.com/uc?id=1ZOG3bxXikTX3Mc6HWFQGavVhlW7yTLst" width="400"/>
  - <img src="https://drive.google.com/uc?id=1hgz_Yt3l44RQ5mFZrbbhrvjztVXn_cJE" width="400"/>
  - <img src="https://drive.google.com/uc?id=1qoFxpw8Lo8szqOAYqPAb6nNwyQxecSQu" width="400"/>
  - <img src="https://drive.google.com/uc?id=1HgQF3tWyQF6VT7wBZ_bHNN19j0MyYyga" width="400"/>
  - <img src="https://drive.google.com/uc?id=1rH-irMgizbpVpR94RQAdEzg0EslNxPtU" width="400"/>
  - <u>NOTE1:</u>
    - messi.jpg is an easy photo as the foreground is distinclty different from the background. But, as can be seen in the above segmentation result, Messi's left palm didn't get segmented-out even though the palm has similarities with those parts of the foreground that did get segmented out. This is due to the provided window (the one with width=500).
    - The window is supposed to cover the foreground object completely (as the algorithm considers everything which lies outside the window as background) but the provided window didn't do so (as can be seen below):
    - <img src="https://drive.google.com/uc?id=1PC8enrtTGHd-mRgyXHhBgcxTeQuiQN-b" width="400"/>
    - Instead, if the window passed to the `grab-cut` function completely covers the foreground object, the left palm did get segmented out. The below images are the rectified window & the segmentation result using the rectified window.
    - <img src="https://drive.google.com/uc?id=1KNTGVq7UxWLC2T8g1XYVYpM1524APz87" width="400"/>
    - <img src="https://drive.google.com/uc?id=1uKNimq5SuJ_xyMrQTgAtvZDiVkbBzqw7" width="400"/>
    - Further, it can be observed in the above image that the grass between the left fingers has a brownish hue (unlike the rest of the background) and consequently got segmented as foreground. And, the grass between right fingers has a green hue (like the rest of the background) and didn't get segmented-out even though it's a very small patch.
  - <u>NOTE2:</u>
    - A similar argument can be made about the cat image in which the brick wall has a similar color distribution as the fur of the cat & hence it got segmented out.
    - But, there's <i>some</i> distinction between the color distributions and it can be expected that the brick wall can be segmented away as background with a little feedback to the grab-cut function.
  - <u>NOTE3:</u>
    - The segmentation of the plane image is noisy because the image is blurry.
  - d) Improvement with more iterations:
    - The above results are generated for `iterCount=5`. The below image is the segmentation result for running with `iterCount=1000`.
    - <img src="https://drive.google.com/uc?id=16FR8ipLJg6Tj2XEod0kA-dFeTp3bv0OG" width="400"/>
    - As can be observed, this result is cleaner than the previous result. Particularly, the edges of the windowsill are not included in the result & the edges of the cat's ears got rid of the noise.

### Hints 
- You may refer to https://docs.opencv.org/4.x/dd/dfc/tutorial_js_grabcut.html to implement run_grabcut()