# Object detection in a video using SIFT

In [1]:
import numpy as np
import cv2
from matplotlib import pyplot as plt
from time import time

class region_selector:
    '''Class for letting the user select a region in the image'''
    
    def __init__(self, img):

        self.img = img
        self.img2draw = img
        self.drawing = False
        self.initial_x = -1
        self.initial_y = -1
        self.end_selection = False

        cv2.namedWindow('image')
        cv2.setMouseCallback('image', self.draw_rectangle)

        while not self.end_selection:
            cv2.imshow('image', self.img2draw)
            if cv2.waitKey(1) & 0xFF == 27:
                break
        cv2.destroyAllWindows()

    def draw_rectangle(self, event, x, y, flags, param):

        if event == cv2.EVENT_LBUTTONDOWN:
            self.drawing = True
            self.initial_x = x
            self.initial_y = y

        elif event == cv2.EVENT_MOUSEMOVE:
            if self.drawing == True:
                ix = self.initial_x
                iy = self.initial_y
                self.img2draw = self.img.copy()
                cv2.rectangle(self.img2draw,(ix,iy),(x,y),(0,255,0),2)

        elif event == cv2.EVENT_LBUTTONUP:
            self.drawing = False
            ix = self.initial_x
            iy = self.initial_y
            self.img2draw = self.img.copy()
            cv2.rectangle(self.img2draw,(ix,iy),(x,y),(0,255,0),2)

            upper_left_x = ix if ix<x else x
            upper_left_y = iy if iy < y else y
            w = np.abs(ix - x)
            h = np.abs(iy - y)
            self.track_window = (upper_left_x, upper_left_y, w, h)
            self.end_selection = True

def get_matched_image(img_scene, kp_obj, des_object, img_obj, sift):
    ''' Match keypoints from the object to keypoints detected in img_scene'''
    
    t = time()
    kp_scene, des_scene = sift.detectAndCompute(img_scene,None)
    t = time()-t
    #print("detect: {}".format(t))

    # crossCheck=True returns only matching points p1 and p2 where p2 is the 
    # closest point to p1 and p1 is also the closest to p2
    bf = cv2.BFMatcher(crossCheck=True)

    t = time()
    matches = bf.knnMatch(des_object, des_scene, k=1)
    t = time()-t
    #print("match1: {}".format(t))

    t = time()
    good = []
    for m in matches:
        if len(m)!=0:
            good.append(m[0])
    t = time()-t
    #print("match2: {}".format(t))	 

    # Find good matches using Lowe's ratio test
    # good = []
    # for m1, m2 in matches:
    #     if m1.distance/m2.distance < 0.7:
    #         good.append(m1)

    if len(good)<=10:
        img_matches_RANSAC = cv2.drawMatches(img_obj, kp_obj, img_scene, kp_scene, good, None)
        #img_matches_RANSAC - img_scene
    else:
        obj_pts = np.zeros([len(good), 2], dtype=np.float32)
        scene_pts = np.zeros([len(good), 2], dtype=np.float32)
        for i, m in enumerate(good):
            obj_pts[i] = kp_obj[m.queryIdx].pt
            scene_pts[i] = kp_scene[m.trainIdx].pt

        # T is the transformation matrix, mask is a boolean array indicating inlier points
        T, mask = cv2.findHomography(obj_pts, scene_pts, cv2.RANSAC, ransacReprojThreshold=3.0) 
        matchesMask = mask.ravel().tolist()

        # Find transformation of rectangle delimiting object
        h, w = img_obj.shape
        obj_bounds = np.float32([[0,0], [0,h-1], [w-1,h-1], [w-1,0] ]).reshape(-1,1,2)
        obj_bounds_in_scene = cv2.perspectiveTransform(obj_bounds, T)

        img_scene_obj = cv2.polylines(img_scene.copy(), [np.int32(obj_bounds_in_scene)], True, 255, 3, cv2.LINE_AA)
        img_matches_RANSAC = img_scene_obj
        #img_matches_RANSAC = cv2.drawMatches(img_obj, kp_obj, img_scene_obj, kp_scene, good, 
            #      None, matchesMask=matchesMask, matchColor=(255,0,0), singlePointColor=(0,0,255))


    return img_matches_RANSAC

In [2]:
# Capture first frame for object selection
cap = cv2.VideoCapture(0)
while(True):
    ret, frame = cap.read()
    cv2.imshow('frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Let the user select the object
rs = region_selector(frame)        
upper_left_x, upper_left_y, w, h = rs.track_window

img_obj = frame[upper_left_y:upper_left_y+h, upper_left_x:upper_left_x+w]
img_obj_g = cv2.cvtColor(img_obj, cv2.COLOR_RGB2GRAY)

# SIFT detector object for both images
sift = cv2.xfeatures2d.SIFT_create(nfeatures=0, nOctaveLayers=3, contrastThreshold=0.02, 
                                   edgeThreshold=10, sigma=1.6)
kp_obj, des_obj = sift.detectAndCompute(img_obj_g, None)

while(True):
    # Capture frame
    ret, frame = cap.read()
    frame_g = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
    # Resize image for better performance
    frame_g_res = cv2.resize(frame_g, (640, 360))
    t = time()
    img_matches_RANSAC = get_matched_image(frame_g_res, 
                                           kp_obj, des_obj, 
                                           img_obj_g, sift)  
    t = time()-t
    #print("function: {}".format(t))
    # Display the resulting frame
    cv2.imshow('frame', img_matches_RANSAC)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()