# Imports

In [None]:
## Import necessary libraries here
import cv2
import numpy as np
from scipy.io import loadmat
from scipy import ndimage
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow
import copy
import os
import time
%matplotlib inline

# Object Instance Recognition

## Overview
- This problem explores the Lowe-style object instance recognition.
    - The ratio of the distances of a given feature-vector to its 1st nearest neighbor and to its 2nd nearest neighbor is used as thresholding function.
- Expected Result
<img src="https://drive.google.com/uc?id=1T5-qI0qPk2FHuesLIvWQ1jqRI9T9-WeC" width="1000"/>

## Data

In [None]:
# Download Data -- run this cell only one time per runtime
!gdown 10ByzpFbB-z178VGjwmCwc95wInD8vpNM # SIFT Features
!gdown 1KLWGMtDEMNNrmzd3Qezrs2-NQR52OfoU # Stop sign image 1
!gdown 13y-o1vdGN6CqqPuUcgU7pIxODTxrYS7J # Stop sign image 1

## Code

In [None]:
def compare(x):
  return x[0]

img1 = cv2.imread('/content/stop1.jpg')
img2 = cv2.imread('/content/stop2.jpg')

## inside the sift are:
## Descriptor1, Descriptor2: SIFT features from image 1 and image 2
## Frame1, Frame2: position, scale, rotation of keypoints
data = loadmat('/content/SIFT_features.mat')
Frame1 = data['Frame1']
Descriptor1 = data['Descriptor1']
Frame2 = data['Frame2']
Descriptor2 = data['Descriptor2']

Frame1 = np.transpose(Frame1)
Frame2 = np.transpose(Frame2)
Descriptor1 = np.transpose(Descriptor1).astype('float32')
Descriptor2 = np.transpose(Descriptor2).astype('float32')


matches_NN = []
matches_2NN = []
threshold_NN = 120
threshold_2NN = 0.548
data_type = [['distance', 'float32'], ['idx', int]]
for i in range(Descriptor1.shape[0]):
  diff = Descriptor2 - Descriptor1[i]
  dist = np.expand_dims(np.linalg.norm(diff, axis=1), axis=1)
  temp = np.expand_dims([t for t in range(dist.shape[0])], axis=1)
  dist = np.concatenate((dist, temp), axis=1).tolist()
  dist.sort(key=compare)

  if dist[0][0] < threshold_NN:
    matches_NN.append([i, int(dist[0][1])])
  if (dist[0][0]/dist[1][0]) < threshold_2NN:
    matches_2NN.append([i, int(dist[0][1])])

## Display the matched keypoints
(rows1, cols1, _) = img1.shape
(rows2, cols2, _) = img2.shape
combined_img = np.zeros((max(rows1, rows2), cols1+cols2, 3))
combined_img[:rows1, :cols1, :] = img1
combined_img[:, cols1:cols1+cols2, :] = img2
combined_img2 = copy.copy(combined_img)

print(len(matches_NN), len(matches_2NN))
for match_NN in matches_NN:
  combined_img = cv2.line(combined_img, (int(Frame1[match_NN[0]][0]), int(Frame1[match_NN[0]][1])), (int(Frame2[match_NN[1]][0])+cols1, int(Frame2[match_NN[1]][1])), (0, 255, 0), 2)

for match_NN in matches_2NN:
  combined_img2 = cv2.line(combined_img2, (int(Frame1[match_NN[0]][0]), int(Frame1[match_NN[0]][1])), (int(Frame2[match_NN[1]][0])+cols1, int(Frame2[match_NN[1]][1])), (0, 255, 0), 2)

cv2_imshow(combined_img)
cv2_imshow(combined_img2)

## Write-up
<!-- 
(5 pts) Display:

1. the matches by thresholding nearest neighbor distances.

2. the matches by thresholding the distance ratio. 

(5 pts) Describe the differences of (1) and (2). -->

#### Results:
 - <u>Method-1:</u> Matches by thresholding nearest neighbor distances:
 - <img src="https://drive.google.com/uc?id=1LxXW19UoMUcZb0zmVWYnU1rJh5tREOGh" align="center"/>
 
 - <u>Method-2:</u> Matches by thresholding distance ratio:
 - <img src="https://drive.google.com/uc?id=1EJzAiMPLDfL5VSTBSlUtRp0bEGwdumvo" align="center"/>

#### Comparison:
  - The distance ratio method is better at telling apart close-looking but dissimilar features.
  - It has a better False-Positive rate.
  - It can be observed (as shown below) that the nearest neighbor distance method falsely matched 2 SIFT descriptors on the white border of the stop sign. But the distance ratio method didn't do such a mistake.
  - <img src="https://drive.google.com/uc?id=10vRqvAc6b3J651YPGjcsmMzLKciIoufH" align="center"/>
  