# PA2: Gestures Recongnition Project

## Requirements

In this assignment, you are asked to design and implement algorithms that recognize at least three hand shapes in a video stream (such as making a fist, thumbs up, thumbs down, pointing with an index finger etc.) or gestures (such as waving with one or both hands, swinging, drawing something in the air etc.). You must design a graphical display that responds to the recognition of the hand shapes or gestures and write a report that includes quantitative results (e.g., a confusion matrix, ROC analysis, etc.). 
*For this assignment, we will use "recognition" and "detection" interchangebly for gestures. We also use the term "gesture" to represent "hand shape."

*To simplify your task, you may want to select gestures that are sufficiently "different." For example, "scissor," "paper," and "rock" are three clealy different hand shapes used in a well-known game. Your gestures, however, should not only include pointing with different numbers of fingers.

*Your gesture should include at least one static hand shape and one dynamic gesture.

For your system, you may want to use some of the following computer vision techniques that we discussed in the computer vision lectures and labs:

1. how to access video camera input with OpenCV
2. background differencing: D(x,y,t) = |I(x,y,t)-I(x,y,0)|
    - https://docs.opencv.org/3.0-beta/modules/imgproc/doc/motion_analysis_and_object_tracking.html
3. frame-to-frame differencing: D’(x,y,t) = |I(x,y,t)-I(x,y,t-1)|
4. template matching (e.g., create templates of a closed hand and an open hand)
5. motion energy templates (union of binary difference images over a window of time)
6. skin-color detection (e.g., thresholding red and green pixel values)
7. horizontal and vertical projections to find bounding boxes of ”movement blobs” or ”skin-color blobs”
8. size, position, and orientation of ”movement blobs” or ”skin-color blobs”
9. circularity of ”movement blobs” or ”skin-color blobs”
10. tracking the position and orientation of moving objects

In [None]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
from collections import deque

In [None]:
def main():
    cap = cv2.VideoCapture(0)

    #if not successful, exit program
    if not cap.isOpened():
        print("Cannot open the video cam")
        return -1

    success, prev_frame = cap.read()
    
    #if not successful, exit program
    if not success:
        print("Cannot read a frame from video stream")
        return -1
  
    #create a window 
    cv2.namedWindow("MyVideo0", cv2.WINDOW_AUTOSIZE)
    cv2.imshow("MyVideo0",prev_frame)
    # read a new frame from video
       
    #create other windows
    cv2.namedWindow("FrameDifferencing", cv2.WINDOW_AUTOSIZE)
    cv2.namedWindow("MotionHistory", cv2.WINDOW_AUTOSIZE)
    cv2.namedWindow("SkinDetection", cv2.WINDOW_AUTOSIZE)
    
    prev_frame = cv2.resize(prev_frame,(130,130))
    fMH1 = np.zeros((prev_frame.shape[0], prev_frame.shape[1], 1), dtype = "uint8")
    fMH2 = fMH1.copy()
    fMH3 = fMH1.copy()
    myMotionHistory = deque([fMH1, fMH2, fMH3])
    
    
    
    

    okayTp = cv2.imread("okay.png",0)
    okayTpCopy = okayTp.copy()
    cv2.threshold(okayTp, 20, 255, cv2.THRESH_BINARY)
    
    coolTp = cv2.imread("thumb.png",0)
    coolTpCopy = coolTp.copy()
    cv2.threshold(coolTp, 20, 255, cv2.THRESH_BINARY)
 
    hiTp = cv2.imread("hi.png",0)
    hiTpCopy = hiTp.copy()
    cv2.threshold(hiTp, 20, 255, cv2.THRESH_BINARY)
    
    
    while(True):
        success, curr_frame = cap.read()
        curr_frame = cv2.resize(curr_frame,(130,130))
        curr_frame=cv2.flip(curr_frame,1)
        
        if not success:
            print("Cannot read a frame from video stream")
            break
            
        cv2.imshow("MyVideo0", prev_frame)    
            
        kernel = np.ones((3,3),np.uint8)
        
           
        #cv2.imshow('frame',curr_frame)

        # b) Skin color detection
        mySkin = mySkinDetect(curr_frame)
        cv2.imshow('SkinDetection',mySkin)
        
        
        rpos = -1
        maxVal = 0.3
        matchLoc = 0
        label = "sign not detected"

        meth = 'cv2.TM_CCOEFF_NORMED'
        
        okayTp = okayTpCopy.copy()
        coolTp = coolTpCopy.copy()
        hiTp = hiTpCopy.copy()

        meth = eval(meth)
        color = (0,0,0)
        
        resOkay = cv2.matchTemplate(mySkin, okayTp, meth)
        min_valOkay, max_valOkay, min_locOkay, max_locOkay = cv2.minMaxLoc(resOkay)
         
        resCool = cv2.matchTemplate(mySkin, coolTp, meth)
        min_valCool, max_valCool, min_locCool, max_locCool = cv2.minMaxLoc(resCool)
        
        resHi = cv2.matchTemplate(mySkin, hiTp, meth)
        min_valHi, max_valHi, min_locHi, max_locHi = cv2.minMaxLoc(resHi)
        
        
        if(max_valOkay > maxVal):
            matchLoc = max_locOkay
            maxVal = max_valOkay
            label = "okay"
            color = (255,0,0)
            
        
        if(max_valCool > maxVal):
            matchLoc = max_locCool
            maxVal = max_valCool
            label = "cool"
            color = (0,0,255)
            
        if(max_valHi > maxVal):
            matchLoc = max_locHi
            maxVal = max_valHi
            label = "hi"
            color = (0,255,0)
        
        
        # motion gesture detection 
        
        
    
        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(curr_frame, label, (50,60), font, 2, color, 3, cv2.LINE_AA)
        maxVal = 0

        cv2.imshow("MyVideo0",prev_frame)
        
        # c) Background differencing
        
        dest = myFrameDifferencing(prev_frame, curr_frame)
        cv2.imshow("FrameDifferencing", dest)
        
        # d) Motion History
        
        latest = myMotionHistory.popleft()
        
        
#         diff = myFrameDifferencing(dest, latest)
#         if (diff < myMotionHistory):
#             label = "waving"
#             color = (255,255,0)
        
        myMotionHistory.append(dest)
        myMH = myMotionEnergy(myMotionHistory)
        cv2.imshow('MotionHistory',myMH)

        prev_frame = curr_frame
    
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
            
        
    cap.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)
    return 0


        
        
        
        

1. find and segment the hand region 
    - separate frontground from background (running averages)
    - 
2. count the number of fingers in the region 

In [None]:
def myFrameDifferencing(prev, curr):
    
    dst = cv2.absdiff(curr, prev)
    dst = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)
    _, dst = cv2.threshold(dst, 50, 255, cv2.THRESH_BINARY)
    
    return dst

In [None]:
def myMotionEnergy(mh):
    # the window of time is 3
    mh0 = mh[0] #d0
    mh1 = mh[1] #d1
    mh2 = mh[2] #d2
    dst = np.zeros((mh0.shape[0], mh0.shape[1], 1), dtype = "uint8")

    for i in range(dst.shape[0]):
        for j in range(dst.shape[1]):
            if mh0[i,j] == 255 or mh1[i,j] == 255 or mh2[i,j] == 255:
                dst[i,j] = 255

    return dst

In [None]:
def mySkinDetect(src):
    dst = np.zeros((src.shape[0], src.shape[1], 1), dtype = "uint8")
    for i in range(src.shape[0]):
        for j in range(src.shape[1]):
            #b,g,r = src[i,j]
            b = int(src[i,j][0])
            g = int(src[i,j][1])
            r = int(src[i,j][2])
            if(r>95 and g>40 and b>20 and max(r,g,b)-min(r,g,b)>15 and abs(r-g)>15 and r>g and r>b):
                dst[i,j] = 255
    return dst

In [None]:
if __name__ == "__main__":
    main()

## Results Analysis