## Object detection using SIFT (Not available in openCV 3.0+)

In [None]:
import cv2
import numpy as np


def sift_detector(new_image, image_template):
    # Function that compares input image to template
    # It then returns the number of SIFT matches between them
    
    image1 = cv2.cvtColor(new_image, cv2.COLOR_BGR2GRAY)
    image2 = image_template
    
    # Create SIFT detector object
    sift = cv2.SIFT()

    # Obtain the keypoints and descriptors using SIFT
    # descriptor는, 이전에 언급했듯, vectors that store information about the key points. match의 대상이 되는 애들임.
    keypoints_1, descriptors_1 = sift.detectAndCompute(image1, None)
    keypoints_2, descriptors_2 = sift.detectAndCompute(image2, None)

    ### Flann_Matcher 사용.
    # Define parameters for our Flann Matcher
    FLANN_INDEX_KDTREE = 0 # 초기값 0으로 설정
    index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 3) # tree가 많을수록 complicated, slower
    search_params = dict(checks = 100) # how many matches are going to try to compute

    # Create the Flann Matcher object
    flann = cv2.FlannBasedMatcher(index_params, search_params)

    # Obtain matches using K-Nearest Neighbor Method
    # the result 'matchs' is the number of similar matches found in both images
    # descriptor 1과 2가 가장 가까운 것들. (similar matches found in both img.)
    matches = flann.knnMatch(descriptors_1, descriptors_2, k=2)

    # Store good matches using Lowe's ratio test
    # flannbasedmatcher는 정확도가 높지 않은 대신 속도가 빠르다. 정확성을 높이기 위해 사용하는 게 lowe's ratio test.
    # distance는 numpy func이고, m.distance가 기준을 통과하면 good_match에 append한다.
    good_matches = []
    for m,n in matches:
        if m.distance < 0.7 * n.distance:
            good_matches.append(m) 

    # 결과적으로 중요한 건 'how many matches are in'이기 때문에 len() 결과값을 리턴한다.
    return len(good_matches)

# 1. 웹캠 Stream을 실행한다.
cap = cv2.VideoCapture(0)

# Load our image template, this is our reference image
# 2. template를 불러온다. 
image_template = cv2.imread('images/box_in_scene.png', 0) 

while True:

    # 3. Get webcam images
    ret, frame = cap.read()

    # 4. Get height and width of webcam frame
    height, width = frame.shape[:2]

    # 4.1 Define ROI Box Dimensions. 
    top_left_x = width / 3
    top_left_y = (height / 2) + (height / 4)
    bottom_right_x = (width / 3) * 2
    bottom_right_y = (height / 2) - (height / 4)
    
    # 4.2 Draw rectangular window for our region of interest   
    cv2.rectangle(frame, (top_left_x,top_left_y), (bottom_right_x,bottom_right_y), 255, 3)
    
    # 5. Crop window of observation we defined above. 이 위치를 가져와서 template과 비교할 예정.
    cropped = frame[bottom_right_y:top_left_y , top_left_x:bottom_right_x]
    
    # 6. Flip frame orientation horizontally. 거울모드로 돌려놓은 거라고 보면 된다. 이게 more natural하다고.
    frame = cv2.flip(frame,1)
    
    # 7. Get number of SIFT matches. cropped 이미지와 image template를 받아서 match 작업을 진행함.
    matches = sift_detector(cropped, image_template)

    # 8. Display status string showing the current no. of matches. 얼마나 많은 match / key points가 detected되는지.
    # image와 target image 사이의.
    cv2.putText(frame,str(matches),(450,450), cv2.FONT_HERSHEY_COMPLEX, 2,(0,255,0),1)
    
    # 9. Our threshold to indicate object deteciton
    # We use 10 since the SIFT detector returns little false positves. 
    # 10으로 설정했다는 건, 10개 이상 match가 감지될 시 'object detected'로 인식하는 것.
    # 그게 바로 아래 if문이다.
    threshold = 10
    
    # If matches exceed our threshold then object has been detected
    if matches > threshold:
        cv2.rectangle(frame, (top_left_x,top_left_y), (bottom_right_x,bottom_right_y), (0,255,0), 3)
        cv2.putText(frame,'Object Found',(50,50), cv2.FONT_HERSHEY_COMPLEX, 2 ,(0,255,0), 2)
    
    cv2.imshow('Object Detector using SIFT', frame)
    if cv2.waitKey(1) == 13: #13 is the Enter Key
        break

cap.release()
cv2.destroyAllWindows()   

#### Flannbased matching is quite fast, but not the most accurate. Other matching methods include:

BruteForce... Flannbased보다는 accurate하다고.

- BruteForce
- BruteForce-SL2 (not in the documentation, BUT this is the one that skeeps the squared root !)
- BruteForce-L1
- BruteForce-Hamming
- BruteForce-Hamming(2)


## ORB 사용하기.

그런데, ORB를 써서 결과를 내보면 위의 SIFT보다는 결과가 좋지 않다. 잘못된 이미지를 detected표시할 수 있음.

& 단순 이미지를 떠나서, 만약 rotation of image같은 걸 detect하고 싶다면 CNN 형태의 solution이 필요할 수 있다. ORB 대신.

https://www.cs.toronto.edu/~guerzhoy/oriviz/crv17.pdf



In [14]:
import cv2
import numpy as np


def ORB_detector(new_image, image_template):
    # Function that compares input image to template
    # It then returns the number of ORB matches between them
    
    image1 = cv2.cvtColor(new_image, cv2.COLOR_BGR2GRAY)

    # Create ORB detector with 1000 keypoints with a scaling pyramid factor of 1.2
    orb = cv2.ORB_create(1000)

    # Detect keypoints of original image
    # None는 image mask를 쓸지 말지 결정하는 파라미터 부분. 안 쓸 거라 None.
    (kp1, des1) = orb.detectAndCompute(image1, None)

    # Detect keypoints of template image
    (kp2, des2) = orb.detectAndCompute(image_template, None)

    # Create matcher 
    # Note we're no longer using Flannbased matching
    # Norm_hamming은 distance matric이며, crossCheck은 True로 설정.
    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

    # Do matching
    matches = bf.match(des1,des2)

    # Sort the matches based on distance.  Least distance
    # is better
    # 정렬할 수 있음을 보여주려고 sorted를 쓴 거고, 실제로는 large distance를 버리면 성능이 올라간다고 함.
    matches = sorted(matches, key=lambda val: val.distance)

    return len(matches)

cap = cv2.VideoCapture(0)

# Load our image template, this is our reference image
image_template = cv2.imread('./MasteringComputerVision-V1.03/Master OpenCV/images/box_in_scene.png', 0) 
# image_template = cv2.imread('images/kitkat.jpg', 0) 

while True:

    # Get webcam images
    ret, frame = cap.read()
    
    # Get height and width of webcam frame
    height, width = frame.shape[:2]

    # Define ROI Box Dimensions (Note some of these things should be outside the loop)
    top_left_x = int(width / 3)
    top_left_y = int((height / 2) + (height / 4))
    bottom_right_x = int((width / 3) * 2)
    bottom_right_y = int((height / 2) - (height / 4))
    
    top = (top_left_x, top_left_y)
    bottom = (bottom_right_x, bottom_right_y)
    # Draw rectangular window for our region of interest
    cv2.rectangle(frame, top, bottom, 255, 3)
    
    # Crop window of observation we defined above
    cropped = frame[bottom_right_y:top_left_y , top_left_x:bottom_right_x]

    # Flip frame orientation horizontally
    frame = cv2.flip(frame,1)
    
    # Get number of ORB matches 
    matches = ORB_detector(cropped, image_template)
    
    # Display status string showing the current no. of matches 
    output_string = "Matches = " + str(matches)
    cv2.putText(frame, output_string, (50,450), cv2.FONT_HERSHEY_COMPLEX, 2, (250,0,150), 2)
    
    # Our threshold to indicate object deteciton
    # For new images or lightening conditions you may need to experiment a bit 
    # Note: The ORB detector to get the top 1000 matches, 350 is essentially a min 35% match
    threshold = 350
    
    # If matches exceed our threshold then object has been detected
    if matches > threshold:
        cv2.rectangle(frame, (top_left_x,top_left_y), (bottom_right_x,bottom_right_y), (0,255,0), 3)
        cv2.putText(frame,'Object Found',(50,50), cv2.FONT_HERSHEY_COMPLEX, 2 ,(0,255,0), 2)
    
    cv2.imshow('Object Detector using ORB', frame)
    
    if cv2.waitKey(1) == 13: #13 is the Enter Key
        break

cap.release()
cv2.destroyAllWindows()   