# Artificial Intelligence Spring 2019, Lab 7

# This program introduces the following concepts:

*		a) Reading a stream of images from a webcamera, and displaying the video (learned in lab 6)
*		b) Skin color detection (learned in lab 6)
*		c) Background differencing
*		d) Visualizing motion history


In [1]:
import numpy as np
import cv2
from collections import deque
import os
import math

In [2]:
def mse(src, dst):
    #mean squared error
    err = np.sum(pow((src.astype("float") - dst.astype("float")),2))
    err /= float(src.shape[0] * src.shape[1])

    return err

# skin color detection

In [3]:
# Function that detects whether a pixel belongs to the skin based on RGB values
# src - the source color image
# dst - the destination grayscale image where skin pixels are colored white and the rest are colored black
def mySkinDetect(src):
    # Surveys of skin color modeling and detection techniques:
    # 1. Vezhnevets, Vladimir, Vassili Sazonov, and Alla Andreeva. "A survey on pixel-based skin color detection techniques." Proc. Graphicon. Vol. 3. 2003.
    # 2. Kakumanu, Praveen, Sokratis Makrogiannis, and Nikolaos Bourbakis. "A survey of skin-color modeling and detection methods." Pattern recognition 40.3 (2007): 1106-1122.
    dst = np.zeros((src.shape[0], src.shape[1], 1), dtype = "uint8")
    for i in range(src.shape[0]):
        for j in range(src.shape[1]):
            #b,g,r = src[i,j]
            b = int(src[i,j][0])
            g = int(src[i,j][1])
            r = int(src[i,j][2])
            if(r>95 and g>40 and b>20 and max(r,g,b)-min(r,g,b)>15 and abs(r-g)>15 and r>g and r>b):
                dst[i,j] = 255
    return dst

# frame-to-frame differencing

In [4]:
# Function that does frame differencing between the current frame and the previous frame
# prev - the previous color image
# curr - the current color image
# dst - the destination grayscale image where pixels are colored white if the corresponding pixel intensities in the current
# and previous image are not the same
def myFrameDifferencing(prev, curr):
    # For more information on operation with arrays: 
    # http://docs.opencv.org/modules/core/doc/operations_on_arrays.html
    dst = cv2.absdiff(prev, curr)
    dst = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)
    _, dst = cv2.threshold(dst, 50, 255, cv2.THRESH_BINARY)
    return dst

# motion energy templates
* example 1: the bottom row displays a cumulative binary motion energy image sequence corresponding to the frames above
![title](mh1.png)
* example 2: pixel intensity is a function of the motion history at that location, where brighter values correspond to more recent motion, three actions: sit-down, arms-raise, crouch-down
![title](mh2.png)

In [5]:
# Function that accumulates the frame differences for a certain number of pairs of frames
# mh - vector of frame difference images
# dst - the destination grayscale image to store the accumulation of the frame difference images
def myMotionEnergy(mh):
    # the window of time is 3
    mh0 = mh[0]
    mh1 = mh[1]
    mh2 = mh[2]
    dst = np.zeros((mh0.shape[0], mh0.shape[1], 1), dtype = "uint8")
    for i in range(mh0.shape[0]):
        for j in range(mh0.shape[1]):
            if mh0[i,j] == 255 or mh1[i,j] == 255 or mh2[i,j] == 255:
                dst[i,j] = 255
    return dst

In [6]:
def backgroundDifferencing(curr):
    # For more information on operation with arrays: 
    # http://docs.opencv.org/modules/core/doc/operations_on_arrays.html
    background = cv2.imread("background.png")
    dst = cv2.absdiff(curr, background)
    dst = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)
    _, dst = cv2.threshold(dst, 50, 255, cv2.THRESH_BINARY)
    return dst

In [7]:
def getEdges(src):
    
    #get edges of skin detection image
    left=src.shape[1]
    right=0
    down=0
    up=src.shape[0]
    for i in range(src.shape[0]):
        for j in range(src.shape[1]):
            if src[i][j]==255:
                if j>right:
                    right=j
                elif j<left:
                    left=j
                elif i>down:
                    down=i
                elif i<up:
                    up=i
    return left,right,up,down
#(0, 100, 0, 150)

In [8]:
def genstureSort(src,mSrc,gestures):
    #convert to skin color
    output=np.copy(src)
    black=np.copy(mSrc)
    _, black = cv2.threshold(black,0,0,cv2.THRESH_BINARY)
    src=mySkinDetect(src)
    #cv2.imshow("skinDetect",cv2.resize(src,(300,200)))
    gesture=None
    minimum=os.sys.maxint
    err=mse(black,mSrc)
    #check for motion
    if err>2500:
        left,right,up,down=getEdges(mSrc)
        cv2.putText(output,"wave", (5,90), cv2.FONT_HERSHEY_SIMPLEX, .35, (255,255,255))
        cv2.rectangle(output,(left,up),(right,down), 255, 2)
    else:
        #otherwise template matching
        for template in gestures:
            result=cv2.matchTemplate(src,template.img,cv2.TM_SQDIFF)
            minV, maxV, minL, maxL = cv2.minMaxLoc(result)
            if minV<minimum:
                minimum=minV
                gesture=template         
        #cv2.imshow("thegesture",gesture.img)
        cv2.rectangle(output,minL,(minL[0]+gesture.img.shape[1],minL[1]+gesture.img.shape[0]), 255, 2)
        cv2.putText(output,gesture.gesture[:5], (5,90), cv2.FONT_HERSHEY_SIMPLEX, .35, (255,255,255))
    return output

In [9]:
class Gesture:
    
    #Gesture Class
    def __init__(self,img):
        self.gesture=img
        if img[:5]=="peace":
            self.img=mySkinDetect(cv2.resize(cv2.imread(img),(58,55)))
        elif img[:5]=="thumb":
            self.img=mySkinDetect(cv2.resize(cv2.imread(img),(50,58)))
        else:
            self.img=mySkinDetect(cv2.resize(cv2.imread(img),(63,85)))

In [10]:
def getGestures():
    i=0
    gestures=[]
    gNumber=0
    
    #read in gestures
    while(True):
        if gNumber==0:
            if os.path.isfile("peace"+str(i)+".png"):
                gestures.append(Gesture("peace"+str(i)+".png"))
                i+=1
                continue
            else:
                gNumber=1
                i=0
        elif gNumber==1:
            if os.path.isfile("thumb"+str(i)+".png"):
                gestures.append(Gesture("thumb"+str(i)+".png"))
                i+=1
                continue
            else:
                gNumber=2
                i=0
        else:
            break
    
    return gestures

In [11]:
def main():
    # a) Reading a stream of images from a webcamera, and displaying the video
    # open the video camera no. 0
    # for more information on reading and writing video: http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html
    
    gestures=getGestures()
    cap = cv2.VideoCapture(0)
    
    #if not successful, exit program
    if not cap.isOpened():
        print("Cannot open the video cam")
        return -1

    # read a new frame from video
    success, prev_frame = cap.read()
    
    #if not successful, exit program
    if not success:
        print("Cannot read a frame from video stream")
        return -1
    cv2.namedWindow("frame", cv2.WINDOW_AUTOSIZE)
    
    prev_frame = cv2.resize(prev_frame,(150,100))
    
    fMH1 = np.zeros((prev_frame.shape[0], prev_frame.shape[1], 1), dtype = "uint8")
    fMH2 = fMH1.copy()
    fMH3 = fMH1.copy()
    myMotionHistory = deque([fMH1, fMH2, fMH3]) 
    while(True):
        #read a new frame from video
        success, curr_frame = cap.read()
        curr_frame = cv2.resize(curr_frame,(150,100))
        if not success:
            print("Cannot read a frame from video stream")
            break
    
        cv2.imshow('frame',curr_frame)

        # c) Background differencing
        frameDest = myFrameDifferencing(prev_frame, curr_frame)
        myMotionHistory.popleft()
        myMotionHistory.append(frameDest)
        myMH = myMotionEnergy(myMotionHistory)
        myMH = cv2.resize(myMH,(150,100))
        #cv2.imshow('myMotionHistory',cv2.resize(myMH,(300,200)))
        
        # output with gesture detection
        output=genstureSort(curr_frame,myMH,gestures)
        output=cv2.resize(output,(150,100))
        cv2.imshow("output",output)
        prev_frame = curr_frame
        
        # wait for 'q' key press. If 'q' key is pressed, break loop
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)
    return 0

In [12]:
if __name__ == "__main__":
    main()

In [37]:
i=0
gestures=[]
gNumber=0
    
while(True):
    if gNumber==0:
        if os.path.isfile("peace"+str(i)+".png"):
            gestures.append(Gesture("peace"+str(i)+".png"))
            i+=1
            continue
        else:
            gNumber=1
            i=0
    elif gNumber==1:
        if os.path.isfile("thumb"+str(i)+".png"):
            gestures.append(Gesture("thumb"+str(i)+".png"))
            i+=1
            continue
        else:
            gNumber=2
            i=0
    else:
        break
    print(i)
        
peacesym=cv2.resize(cv2.imread("thumbsup.png"),(150,100))
print(peacesym.shape)
out=genstureSort(peacesym,gestures)
cv2.imshow("output",out)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.waitKey(1)

0
0
(100, 150, 3)


TypeError: genstureSort() takes exactly 3 arguments (2 given)

In [11]:
cap = cv2.VideoCapture(0)
    
#if not successful, exit program
if not cap.isOpened():
    print("Cannot open the video cam")

    # read a new frame from video
success, prev_frame = cap.read()
cv2.imshow("backstart",prev_frame)

In [None]:
def genstureSort(src,mSrc,gestures):
    output=np.copy(src)
    black=np.copy(mSrc)
    _, black = cv2.threshold(black,0,0,cv2.THRESH_BINARY)
    src=mySkinDetect(src)
    gesture=None
    minimum=os.sys.maxint
    err=mse(black,mSrc)
    if err>3000:
        cv2.putText(output,"wave", (5,90), cv2.FONT_HERSHEY_SIMPLEX, .35, (255,255,255))
    else:
        for template in gestures:
            result=cv2.matchTemplate(src,template.img,cv2.TM_SQDIFF)
            minV, maxV, minL, maxL = cv2.minMaxLoc(result)
            if minV<minimum:
                minimum=minV
                gesture=template         
        #cv2.imshow("thegesture",gesture.img)
        cv2.rectangle(output,minL,(minL[0]+gesture.img.shape[1],minL[1]+gesture.img.shape[0]), 255, 2)
        cv2.putText(output,gesture.gesture[:5], (5,90), cv2.FONT_HERSHEY_SIMPLEX, .35, (255,255,255))
    return output