# Imports

In [None]:
## Import necessary libraries here
import cv2
import numpy as np
from scipy.io import loadmat
from scipy import ndimage
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow
import copy
import os
import time
%matplotlib inline

# Feature Detection & Tracking

## Overview

- This problem involves implementing a corner detector and feature tracker that track features from a sequence of hotel images.
- The below image on left shows the result of 1st part & the right image shows the result of the 2nd part.

<img src="https://drive.google.com/uc?id=11Vc092sR0YPG68Jvca724951CrrEGb31" width="700"/>

## Data

**WARNING: Colab deletes all files everytime runtime is disconnected. Make sure to re-download the inputs when it happens.**

In [None]:
# Download Data -- run this cell only one time per runtime
if not os.path.exists('/content/part1_images/hotel.seq41.png'):
  !gdown 1fT0H-FbbDZnjMfCJHZcscpcwAXHhGgNw
  !unzip "/content/part1_images.zip" -d "/content/"
  !gdown 1r-Pdino6MRLCEWX_sQOgd8D5AVsRc7Ym
  # Load Initial Key Points
  data = loadmat('/content/initial_keypoints.mat')
  X0 = data['Xo']
  Y0 = data['Yo']

## Helper Functions

In [None]:
def readImages(folder, num_images):
  arr_images = []
  for i in range(num_images):
    arr_images.append(cv2.imread(f'{folder}hotel.seq{i}.png'))
  return np.array(arr_images, dtype=np.float32)

def compute_H(patch): #Given a patch, compute the 2nd moment matrix within the patch
  Ix = patch[:, 1:] - patch[:, :-1]
  Iy = patch[1:, :] - patch[:-1, :]
  Ix = Ix[:-1, :]
  Iy = Iy[:, :-1]
  Ix_sq = np.sum(np.square(Ix))
  Iy_sq = np.sum(np.square(Iy))
  Ix_Iy = np.sum(Ix * Iy)
  H = np.array([[Ix_sq, Ix_Iy], [Ix_Iy, Iy_sq]])
  return H, Ix, Iy

def pad_the_patch(patch): #pad the image by replicating the last row & last column. Resultant patch will have 1 additional row & column than the original one.
  patch = np.concatenate((patch, np.expand_dims(patch[:, -1], axis=1)), axis=1)
  patch = np.concatenate((patch, np.expand_dims(patch[-1, :], axis=0)), axis=0)
  return patch

# read all 51 sequences of images
folder = '/content/part1_images/'
im = readImages(folder, 51)


## 1.1 Keypoint Selection

- For the first frame in the sequemce, the second moment matrix is used to locate strong corners to use as keypoints.
- These points will be tracked throughout the sequence in the second part of the problem.
- A fine-tuned threshold is choosen so that edges and noisy patches are ignored.
- Then a local non-maxima suppression (NMS) is performed over a 5x5 window centered at each point.
- This gave several hundred good points to track.

### Code

In [None]:
def apply_NMS(f_values, window_size):
  '''
  Apply local Non-Maxima Suppression:
  1) Slide a 5x5 window all over the image.
  2) Retain the center of those 5x5 windows as keypoints in which the center pixel is the local maxima in that window.
  '''
  (rows, cols) = f_values.shape
  half_size = window_size//2
  keypoints = np.zeros((rows, cols))
  for i in range(half_size, rows-half_size):
    for j in range(half_size, cols-half_size):
      patch = f_values[i-half_size:i+half_size+1, j-half_size:j+half_size+1]
      uniq, counts = np.unique(patch, return_counts=True)
      if counts[-1] > 1:
        continue
      if uniq[-1] == f_values[i, j]:
        keypoints[i, j] = 255
  return keypoints

def getKeypoints(img, tau):
  '''
  Detecting keypoints using Harris corner criterion
  img: input image
  tau: threshold 
  
  output: (N,2) array of [x,y] keypoints (x & y are 0-indexed with upper left pixel of the image being (0,0). i.e, OpenCV frame convention).
  '''
  img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  (rows, cols) = img.shape
  window_size = 5 
  half_size = window_size//2
  f_values = np.zeros_like(img)
  for i in range(half_size, rows-half_size):
    for j in range(half_size, cols-half_size):
      patch = img[i-half_size:i+half_size+1, j-half_size:j+half_size+1]
      padded_patch = pad_the_patch(patch)
      H, _, _ = compute_H(padded_patch)
      harris_operator = np.linalg.det(H) / np.trace(H)
      if harris_operator>tau:
        f_values[i, j] = harris_operator
  f_values = f_values/np.amax(f_values)
  keypoints = apply_NMS(f_values, window_size)
  print('Number of keypoints selected:', np.sum(keypoints)/255)

  keypoints_opencv = []
  for i in range(half_size, rows-half_size):
    for j in range(half_size, cols-half_size):
      if keypoints[i,j] == 255:
        keypoints_opencv.append([j, i])
  return keypoints_opencv

tau = 3.15*255
# tau is tuned as below: <tau> -> <number of keypoints it gave>
# 0.17->1102 0.25->1061 0.55->991 0.85->865 0.95->813 1.15->740 3.15->201
keypoints_opencv = getKeypoints(im[0], tau)

# add plots for the write-up
display_img = cv2.cvtColor(im[0], cv2.COLOR_BGR2GRAY)
(rows, cols) = display_img.shape
display_img = cv2.cvtColor(display_img, cv2.COLOR_GRAY2BGR)
for key_pt in keypoints_opencv:
  display_img = cv2.circle(display_img, (key_pt[0], key_pt[1]), radius=3, color=(0, 255, 0), thickness=-1)
cv2_imshow(display_img)

### Write-up

#### Algorithm to get KeyPoints:
- A 5x5 window is slid on the grayscale image (with a stride of 1) & the 2nd moment matrix `H` is calculated using the below formula:

  - <img src="https://drive.google.com/uc?id=1_W_a9FwMjDlyTWAXLOrXvsFBcIEeZfli" height=400 align="center"/>
  - where:
    - `i_x` & `i_y` are the gradients in x & y directions.
    - The gradient `i_x` is calculated as `intensity of pixel (p+1,q) - intensity of pixel (p,q)`
    - The summations in the formulas for `A`,`B`&`C` run over the 5x5 window.

- The harris operator is calculated by the below formula:
  - <img src="https://drive.google.com/uc?id=1gBxEZQ1yttjpwMHdpNpbFhq4kZlQJPUr" height=200 align="center"/>
  - It is calculated over all the 5x5 windows.
- ##### Thresholding:
  - Those pixels for which harris operator yielded a value of greater than `tau` were marked as potential keypoints & the rest are discarded.
  - `tau = 803.25` was selected after a bit of tuning.
- ##### Normalization:
  - Then, the resultant harris operator matrix (of the same size as image) is divided by its maximum value.
- Finally, non-maxima suppression is done (in the below way):
  - If a pixel is strictly greater than every other pixel in the 5x5 neighborhood centered around it, it is retained. Else, it is discarded.
- Note:
  - No padding is done on the original image.
  - Due to this, no keypoints will be detected in the `window_size//2` = 2 columns or rows along the border.

#### Visualization of Keypoints detected:
- The below image shows the final keypoints obtained by following the above method.
- `201` keypoints were selected for `tau = 803.25`.
- <img src="https://drive.google.com/uc?id=1thCSU_TWlgDcGmzrcDMjZ43CszK2cQl3" align="center"/>

## 1.2 Feature Tracking

- Apply the Kanade-Lucas-Tomasi tracking procedure to track the keypoints found in part 1 throughout the hotel sequence. 

<img src="https://drive.google.com/uc?id=1JsyiJlWH6xnXW0Jf0rTHkRoeNn2Mo-Hm" width="500"/>

- Some keypoints will move out of the image frame over the course of the sequence.
- Portions of the trajectories of such points which fall outside the image frame are discarded.

In [None]:
def trackPoints(pt_x, pt_y, im, ws, out_of_bound_pts):
  '''
  Tracking initial points (pt_x, pt_y) across the image sequence
  Outputs:
    track_x: [Number of keypoints] x [2]
    track_y: [Number of keypoints] x [2]
  '''
  pt_x = pt_x.astype('float32')
  pt_y = pt_y.astype('float32')
  N = np.prod(pt_x.shape)
  nim = len(im)
  track_x = np.zeros((N, nim))
  track_y = np.zeros((N, nim))
  track_x[:,0] = pt_x
  track_y[:,0] = pt_y
  if len(im.shape) == 4:
    im = im[:, :, :, 0].astype('float32')
  else:
    input('No resizing is being done. Continue?')
  for t in range(nim-1):
    track_x[:, t+1], track_y[:, t+1] = getNextPoints(track_x[:, t], track_y[:, t], im[t,:,:], im[t+1,:,:], ws, out_of_bound_pts)

  return np.concatenate((np.expand_dims(track_x, axis=2), np.expand_dims(track_y, axis=2)), axis=2)

def getNextPoints(xs, ys, im1, im2, ws, out_of_bound_pts):
  '''
  Iterative Lucas-Kanade feature tracking
  x,  y : initialized keypoint position in im2
  ws: patch window size

  output: tracked keypoint positions in im2
  '''
  threshold = 0.5
  x_next_frame = []
  y_next_frame = []
  (rows, cols) = im1.shape
  for idx, (x,y) in enumerate(zip(xs, ys)):
    u = 10
    v = 10
    x_dash = copy.copy(x)
    y_dash = copy.copy(y)
    patch = cv2.getRectSubPix(im1, (ws,ws), (x,y))
    H, Ix, Iy = compute_H(pad_the_patch(patch))
    while (u>threshold) or (v>threshold):
      It = cv2.getRectSubPix(im2, (ws,ws), (x_dash,y_dash)) - cv2.getRectSubPix(im1, (ws,ws), (x,y))
      Ix_It = -1 * (np.sum(Ix*It))
      Iy_It = -1 * (np.sum(Iy*It))
      b = [Ix_It, Iy_It] # To solve ax=b, a=H & b is this matrix
      try:
        [u, v] = np.linalg.solve(H, b)
      except:
        print(np.linalg.det(H))
        input('stopping')
      new_x_dash = x_dash + u
      new_y_dash = y_dash + v
      if new_y_dash<0 or new_y_dash>rows-1 or new_x_dash<0 or new_x_dash>cols-1:
        out_of_bound_pts.append(idx)
        break
      else:
        x_dash = new_x_dash
        y_dash = new_y_dash
    x_next_frame.append(x_dash)
    y_next_frame.append(y_dash)
  return x_next_frame, y_next_frame

ws = 7
keypoints_list = copy.copy(np.array(keypoints_opencv))
out_of_bound_pts = []
tracked_pts = trackPoints(pt_x=keypoints_list[:,0], pt_y=keypoints_list[:,1], im=im, ws=ws, out_of_bound_pts=out_of_bound_pts)

# plot your results
display_img1 = copy.copy(im[0])
display_img2 = copy.copy(im[0])
display_img3 = copy.copy(im[0])
rand_20_tracked_pts = np.random.permutation(np.arange(tracked_pts.shape[0]))[:20]
for i in range(tracked_pts.shape[0]):
  tracked_pt = tracked_pts[i]
  display_img1 = cv2.circle(display_img1, (int(tracked_pt[0][0]), int(tracked_pt[0][1])), radius=2, color=(0, 255, 0), thickness=-1)
  display_img1 = cv2.circle(display_img1, (int(tracked_pt[1][0]), int(tracked_pt[1][1])), radius=2, color=(0, 0, 255), thickness=-1)
  if i in rand_20_tracked_pts:
    for consec_frame_pts in tracked_pt:
      display_img2 = cv2.circle(display_img2, (int(consec_frame_pts[0]), int(consec_frame_pts[1])), radius=2, color=(0, 0, 255), thickness=-1)
  if i in out_of_bound_pts:
    for consec_frame_pts in tracked_pt:
      display_img3 = cv2.circle(display_img3, (int(consec_frame_pts[0]), int(consec_frame_pts[1])), radius=2, color=(0, 0, 255), thickness=-1)

cv2_imshow(display_img1)
cv2_imshow(display_img2)
cv2_imshow(display_img3)

### Write-up

- The below results show the following:
  - The keypoints at the first frame (as green) and the tracked keypoints at the second frame (as red) on the first frame of the sequence.
  - The tracked path over the sequence of frames for 20 random keypoints.
  - The points which have moved out of frame at some point along the sequence displayed on the first frame.

- <img src="https://drive.google.com/uc?id=1kz8e8M1_RTHjEpZVAtdn8rew49lDUsbI" height=400 align="center"/>

- <img src="https://drive.google.com/uc?id=1SbVyEFGKRlysslutuPNzkoj2rB0Y12c1" height=400 align="center"/>

- <img src="https://drive.google.com/uc?id=1aRwEDLJ39ZgMVjPk8F1xHjHQ5UZEzRMG" height=400 align="center"/>
