In [1]:
import cv2
import numpy as np

from utils import displayImage, snap

In [2]:
# Preprocessing the template Image


def templatePrep(imgP, e1, d1, e2, d2):
    
    img = cv2.imread(imgP)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    thresh, threshImage = cv2.threshold(gray,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    mask = (threshImage>0).astype('uint8')
    kernel = np.ones((5,5),np.uint8)
#     mask = cv2.erode(mask,kernel,iterations = e1)
    mask = cv2.dilate(mask,kernel,iterations = d1)
    
#     thresh, threshImage = cv2.threshold(mask*gray,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
#     mask = (threshImage>0).astype('uint8')
#     mask = cv2.erode(mask,kernel,iterations = e2)
#     mask = cv2.dilate(mask,kernel,iterations = d2)
    
    res = cv2.resize(gray*mask, (160, 120), interpolation=cv2.INTER_AREA) # the resolution for camera being used is 640x480. Hence using this size. 
                                                                            # Change the template size according to your camera
    
    res = cv2.Canny(mask*gray, 40, 60, L2gradient=True) 
    return res

hand = templatePrep('./templates/hand.png', None, 12, None, None)
fist = templatePrep('./templates/fist.png', None, 12, None, None)
thumb = templatePrep('./templates/thumbsUp.png', None, 12, None, None)

templates = np.array([hand, fist, thumb])
colors = {
    0: (0, 0, 255),
    1: (0, 255, 0),
    2: (255, 0, 0)
}

In [3]:
displayImage(thumb)
displayImage(fist)
displayImage(hand)

In [4]:
def matchTemplate(img, temp):
    
    res = cv2.matchTemplate(img, temp, cv2.TM_CCOEFF_NORMED)
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
    
    return max_loc, max_val

In [None]:
cap = cv2.VideoCapture(0)

_, ref = cap.read()
ref = cv2.cvtColor(ref, cv2.COLOR_BGR2GRAY) # Keep the initial frame as reference to remove some background elements
kernel = np.ones((3,3),np.uint8)
cannyEdge = lambda x: cv2.Canny(x, 40, 60, L2gradient=True)


while(True):
    
    ret, frame = cap.read()

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    diff = cv2.absdiff(ref, gray)  #Computing the difference between current frame and the reference frame
    
    thresh, threshImage =  cv2.threshold(diff,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) # Binary thresholding the resultant image 
    mask = (threshImage>0).astype('uint8')
    mask = cv2.erode(mask,kernel,iterations = 5)
    mask = cv2.dilate(mask,kernel,iterations = 10)
    
    thresh, threshImage =  cv2.threshold(mask*gray,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) 
    mask = (threshImage>0).astype('uint8')
    mask = cv2.erode(mask,kernel,iterations = 5)
    mask = cv2.dilate(mask,kernel,iterations = 5)
    
    img = mask*gray # masking the original gray image to completely remove background
    pyr = [img] # constructing pyramids
    for i in range(2):
        pyr.append(cv2.resize(img, None, fx=0.5**(i+1), fy=0.5**(i+1), interpolation=cv2.INTER_AREA))
        
    locs, vals = zip(*[matchTemplate(i, templates[j]) for j in range(templates.shape[0]) for i in pyr])
    index = vals.index(max(vals)) 

    tLeft = locs[index]
    bRight = tLeft[0]+160, tLeft[1]+120
    
    pyIndex = index%templates.shape[0]
    tLeft = tuple(np.array(tLeft)*2**pyIndex)
    bRight = tuple(np.array(bRight)*2**pyIndex)
    
    
    cv2.rectangle(frame, tLeft, bRight, colors[index//templates.shape[0]], 2)
    
    cv2.imshow('detections', frame)
    cv2.imshow('img', img)

    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


cap.release()
cv2.destroyAllWindows()