In [1]:
import cv2
import numpy as np

from utils import displayImage, snap

In [2]:
# Preprocessing the template Image


def templatePrep(imgP, e1, d1, e2, d2):
    
    img = cv2.imread(imgP)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    thresh, threshImage = cv2.threshold(gray,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    mask = (threshImage>0).astype('uint8')
    kernel = np.ones((5,5),np.uint8)
#     mask = cv2.erode(mask,kernel,iterations = e1)
    mask = cv2.dilate(mask,kernel,iterations = d1)
    
#     thresh, threshImage = cv2.threshold(mask*gray,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
#     mask = (threshImage>0).astype('uint8')
#     mask = cv2.erode(mask,kernel,iterations = e2)
#     mask = cv2.dilate(mask,kernel,iterations = d2)
    
    res = cv2.resize(gray*mask, (160, 120), interpolation=cv2.INTER_AREA) # the resolution for camera being used is 640x480. Hence using this size. 
                                                                            # Change the template size according to your camera
    
    res = cv2.Canny(res, 40, 60, L2gradient=True) 
    return res

hand = templatePrep('./templates/hand.png', None, 12, None, None)
fist = templatePrep('./templates/fist.png', None, 12, None, None)
thumb = templatePrep('./templates/thumbsUp.png', None, 12, None, None)
yo = templatePrep('./templates/yo.png', None, 12, None, None)

templates = np.array([hand, fist, thumb, yo])
colors = {
    0: (0, 0, 255),
    1: (0, 255, 0),
    2: (255, 0, 0),
    3: (0, 255, 255)
}
km ={
    0:"hand",
    1:"fist",
    2:"thumbs up",
    3: "yo"
}

In [3]:
displayImage(thumb)
displayImage(fist)
displayImage(hand)
displayImage(yo)

In [4]:
def matchTemplate(img, temp):
    
    res = cv2.matchTemplate(img, temp, cv2.TM_CCOEFF_NORMED)
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
    
    return max_loc, max_val

In [5]:
def getBestMatch(frame, refEdge, thresh):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    grayEdge = cannyEdge(gray)
    
    img = grayEdge*(grayEdge!=refEdge)
    
    pyr = [img] # constructing pyramids
    for i in range(2):
        pyr.append(cv2.resize(img, None, fx=0.5**(i+1), fy=0.5**(i+1), interpolation=cv2.INTER_AREA))
        
    locs, vals = [[None for i in range(3)] for j in range(templates.shape[0])], np.zeros((templates.shape[0], 3))
        
    for i in range(templates.shape[0]):
        for j in range(3):
            locs[i][j], vals[i, j] = matchTemplate(pyr[j], templates[i])
    index = np.unravel_index([np.argmax(vals)], vals.shape)
    index = (int(index[0]), int(index[1]))

    if vals[index]>=thresh:
        tLeft = locs[index[0]][index[1]]
        bRight = tLeft[0]+160, tLeft[1]+120

        tLeft = tuple(np.array(tLeft)*2**index[1])
        bRight = tuple(np.array(bRight)*2**index[1])


        cv2.rectangle(frame, tLeft, bRight, colors[index[0]], 2)
        cv2.putText(frame, km[index[0]] , tLeft, cv2.FONT_HERSHEY_SIMPLEX, 0.9, colors[index[0]], 2)
    return vals[index], img


In [8]:
cap = cv2.VideoCapture(0)

_, ref = cap.read()
ref = cv2.medianBlur(ref, 5)
ref = cv2.cvtColor(ref, cv2.COLOR_BGR2GRAY) # Keep the initial frame as reference to remove some background elements
kernel = np.ones((2,2),np.uint8)
cannyEdge = lambda x: cv2.Canny(x, 40, 60, L2gradient=True)

refEdge = cannyEdge(ref)
displayImage(refEdge)

tFrame = cap.read()[1]
tFrame = cv2.medianBlur(tFrame, 5)
thresh, _ = getBestMatch(tFrame, refEdge, np.inf)


fourcc = cv2.VideoWriter_fourcc(*'XVID') 
out = cv2.VideoWriter('output.avi', fourcc, 30, (1280, 480)) 

while(True):
    
    ret, frame = cap.read()
    frame = cv2.medianBlur(frame, 5)
    
    _, img = getBestMatch(frame, refEdge, thresh)
    
    cimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    outF = np.hstack((frame, cimg))
    out.write(outF)
#     cv2.imshow('detections', frame)
#     cv2.imshow('img', img)
    cv2.imshow('out', outF)
    

    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    

cap.release()
cv2.destroyAllWindows()