In [153]:
import cv2
import numpy as np
from itertools import count
from skimage.measure import ransac
from skimage.transform import FundamentalMatrixTransform, EssentialMatrixTransform
import matplotlib.pyplot as plt

vid_path = '5.hevc'
w = 1164
h = 874
F = 910

K = np.array([
    [F, 0, w//2],
    [0, F, h//2],
    [0, 0, 1]])
K_inv = np.linalg.inv(K)


def normalize(x):
  x = add_third(x)
  #np.dot(Kinv, add_ones(pts).T).T[:, 0:2]
  return (K_inv @ x.T).T[:, 0:2]
def denormalize(x):
  return (K @ x)[:-1]

def add_third(x):
  return np.pad(x, pad_width = ((0,0), (0,1)) ,constant_values=1)
def remove_third(x):
  return x[:,[0,1]]

def generate_frames(vid_path):
    video = cv2.VideoCapture(vid_path, cv2.CAP_FFMPEG)
    _, prev_frame = video.read()
    for t in count():
      ret, curr_frame = video.read()
      if not ret:
        break
      yield prev_frame, curr_frame
      prev_frame = curr_frame
    video.release()
    cv2.destroyAllWindows()

def extractFeatures(frame):
  orb = cv2.ORB_create()
  # only works on b/w images
  pts = cv2.goodFeaturesToTrack(np.mean(frame,axis=2).astype(np.uint8), 3000, 0.01, minDistance=7)
  # we need kp class to feed it into ORB.compute to get descriptors
  kps = [cv2.KeyPoint(x=f[0][0], y=f[0][1], size=10) for f in pts]
  kps, des = orb.compute(frame, kps)
  return np.array([(kp.pt[0], kp.pt[1]) for kp in kps]), des

def bfmatcher(kps, dess):
  bf = cv2.BFMatcher(cv2.NORM_HAMMING)
  # find closest descriptors between frames, hamming norm for ORB (BRIEF, BRISK...)
  # they are binary string types
  # L1, L2 for SIFT/SURF
  matches = bf.knnMatch(dess[0], dess[1], k=2)
  res = []
  # DMatch obj: distance (the lower the better)
  # trainIdx: index of the descriptor in train desc
  # queryIdx: index of the descriptor in query desc
  # imgIdx: index of the train image
  for m,n in matches:
    # Lowe's ratio test
    # https://stackoverflow.com/questions/51197091/how-does-the-lowes-ratio-test-work
    if m.distance < 0.75 * n.distance:
        kp1 = kps[0][m.queryIdx]
        kp2 = kps[1][m.trainIdx]
        res.append((kp1, kp2))
  res = np.array(res)
  # prune the outliers by fitting
  assert len(res)>=8, 'not enough points'
  model, inliers = ransac((normalize(res[:,0]), normalize(res[:,1])),
                          #FundamentalMatrixTransform, 
                          EssentialMatrixTransform,
                          min_samples=8,
                          residual_threshold=0.0005, 
                          max_trials=500)
  
  print (f'good kps: {len(res[inliers])/len(res)*100:.2f}% out of {len(res)}')
  return res[inliers,0], res[inliers,1]

In [154]:
for i, (p, c) in enumerate(generate_frames(vid_path)):
  
  kps_1, des_1 = extractFeatures(p)
  kps_2, des_2 = extractFeatures(c)

  p1, p2 = bfmatcher([kps_1, kps_2], [des_1, des_2])
  for p in p2:
    cv2.circle(c, (int(p[0]), int(p[1])), 1, (255,255,0)) 
  
  for k1, k2 in zip(p1, p2):
    cv2.line(c, tuple(k1.astype(int)), tuple(k2.astype(int)), (0,200,200), 1)
  
  cv2.imshow('v', c)
  key = cv2.waitKey(1)
  if key == ord('q'):
    break

cv2.destroyAllWindows()

good kps: 35.79% out of 271
good kps: 51.26% out of 199
good kps: 52.53% out of 198
good kps: 60.56% out of 180
good kps: 56.59% out of 182
good kps: 61.50% out of 213
good kps: 64.97% out of 197
good kps: 63.18% out of 201
good kps: 64.50% out of 231
good kps: 68.95% out of 190
good kps: 51.23% out of 162
good kps: 58.14% out of 172
good kps: 63.10% out of 168
good kps: 59.64% out of 166
good kps: 48.04% out of 204
good kps: 62.98% out of 208
good kps: 63.16% out of 190
good kps: 50.00% out of 200
good kps: 55.86% out of 222
good kps: 62.08% out of 240
good kps: 58.57% out of 251
good kps: 49.07% out of 216
good kps: 57.33% out of 232
good kps: 51.23% out of 244
good kps: 54.85% out of 299
good kps: 42.55% out of 235
good kps: 53.25% out of 231
