In [1]:
'''
CS585 Assignment 2
CS585 Image and Video Computing
Lifu Zhang, Hin Lui Shum
--------------
This program introduces the following implementation
	a) Reading a stream of images from a webcamera, and displaying the video
	b) Skin color detection
	c) Hand gesture comparison
	d) Graphical response upon detection
--------------
'''
import cv2
import sys
import numpy as np

In [2]:
# templates for different hand gestures
templates = []
templates.append(cv2.imread('./yeah1.jpg', 0))
templates.append(cv2.imread('./fist1.jpg', 0))
templates.append(cv2.imread('./palm1.jpg', 0))
templates.append(cv2.imread('./thumb1.jpg', 0))
template_id = ['Scissors','Rock','Paper','Good']

# colors for drawing rectangles
color = [[255,0,0],[0,255,0],[255,255,0],[0,255,255]]

In [3]:
# Function that detects whether a saved hand gesture is detected
# param binary The source binary image from skin detection
# param binary The source color image from webcam

def gesture_matching(binary, src):
    image = binary
    # loop through the available templates
    for i in range(4):
        w, h = templates[i].shape[::-1]
        # Searching and finding the template in the binary screen
        res = cv2.matchTemplate(image, templates[i], cv2.TM_CCOEFF_NORMED)
        # setting threshold 
        threshold = 0.7
        # finding location with the compare score higher than threshold
        loc = np.where( res >= threshold)
        max_val = 1
        # draw rectangle and print text about the detected gesture accordingly
        for pt in zip(*loc[::-1]):
            if max_val > threshold:
                min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
                res[max_loc[1]-h//2:max_loc[1]+h//2+1, max_loc[0]-w//2:max_loc[0]+w//2+1] = 0   
                cv2.rectangle(src,(max_loc[0],max_loc[1]), (max_loc[0]+w+1, max_loc[1]+h+1), color[i])
                cv2.putText(src, str(template_id[i]), (max_loc[0]+w+1, max_loc[1]+h+1), 
                    cv2.FONT_HERSHEY_PLAIN, 1.0, (255,255,255))

    return src
    

In [4]:
# Function that detects whether a pixel belongs to the skin based on RGB values
# param src The source color image
def my_skin_detect(src):
    '''
    Surveys of skin color modeling and detection techniques:
    Vezhnevets, Vladimir, Vassili Sazonov, and Alla Andreeva. "A survey on pixel-based skin color detection techniques." Proc. Graphicon. Vol. 3. 2003.
    Kakumanu, Praveen, Sokratis Makrogiannis, and Nikolaos Bourbakis. "A survey of skin-color modeling and detection methods." Pattern recognition 40.3 (2007): 1106-1122.
    '''
    dst = np.zeros(np.shape(src)[:-1], dtype=np.uint8)
    
    mask = np.logical_and.reduce((src[:,:,0] > 20, src[:,:,1] > 40, src[:,:,2] > 95, 
                                    src.max(axis=-1) - src.min(axis=-1) > 15, 
                                    abs(src[:,:,2] - src[:,:,1]) > 15, 
                                    src[:,:,2] > src[:,:,1], src[:,:,2] > src[:,:,0]))
    dst[mask] = 255
    return dst

In [5]:
# ----------------
# a) Reading a stream of images from a webcamera, and displaying the video
# ----------------
# For more information on reading and writing video: http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html
# open the video camera no. 0
cap = cv2.VideoCapture(0)

# if not successful, exit program
if not cap.isOpened():
    print("Cannot open the video cam")
    sys.exit()

# create a window called "Original" for source webcam
cv2.namedWindow("Original", cv2.WINDOW_AUTOSIZE)

# read a new frame from video
ret, frame0 = cap.read()
if not ret:
    print("Cannot read a frame from video stream")

# show the frame in "MyVideo" window
cv2.imshow("Original", frame0)

# create windows for skin detection and gesture detection
cv2.namedWindow("Skin", cv2.WINDOW_AUTOSIZE)
cv2.namedWindow("Frame", cv2.WINDOW_AUTOSIZE)

len_history = 7
my_motion_history = []
fMH1 = np.zeros(np.shape(frame0)[:-1], dtype=np.uint8)
for i in range(len_history):
    my_motion_history.append(fMH1)


while(1):
    # read a new frame from video
    ret, frame = cap.read()
    # if not successful, break loop
    if not ret:
        print("Cannot read a frame from video stream")  
        break

    cv2.imshow("Original", frame)

    # ----------------
    # b) Skin color detection
    # ----------------
    
    frame_dst = my_skin_detect(frame)
    cv2.imshow("Skin", frame_dst)
    
    # ---------------
    # c) hand gesture detection
    # d) Graphical response upon detection
    # ---------------
    
    match_template = gesture_matching(frame_dst, frame)
    cv2.imshow("Frame", match_template)

    # wait for 'esc' key press for 30ms. If 'esc' key is pressed, break loop
    if cv2.waitKey(30) == 27:
        print("esc key is pressed by user")
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

esc key is pressed by user
