## Creating our own dataset

In [1]:
import cv2
import numpy as np
import os

In [2]:
image_x, image_y = 50, 50

In [3]:
#Create folder if it doesn't exist
def create_folder(folder_name):
    if not os.path.exists(folder_name):
        os.mkdir(folder_name)

In [4]:
def store_images(g_id):
    total_pics = 1200 #CHANGE TO 1200 LATER
    cap = cv2.VideoCapture(0) #Open webcam
    x, y, w, h = 300, 50, 350, 350   #Remove noise (IF hand gesture is within this area only then consider it)
    
    create_folder("gestures/" + str(g_id))
    pic_no = 0
    flag_start_capturing = False
    frames = 0
    
    while True:
        #Read image from webcam
        ret, frame = cap.read() 
        #Invert the thing because webcam returns an inverted image only
        frame = cv2.flip(frame, 1)
        #Convert into HSV format
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        #Using inRange which means color of this mask will be between the 2 values provided (They're the values of skin color)
        mask2 = cv2.inRange(hsv, np.array([2, 50, 60]), np.array([25, 150, 255]))
        res = cv2.bitwise_and(frame, frame, mask=mask2)
        gray = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
        median = cv2.GaussianBlur(gray, (5, 5), 0)
        
        kernel_square = np.ones((5,5), np.uint8)
        #Removes every pixel from the border of image which it thinks is background noise. Ultimately smoothens the image 
        dilation = cv2.dilate(median, kernel_square, iterations=2)
        #USed to smoothen the image further 
        opening = cv2.morphologyEx(dilation, cv2.MORPH_CLOSE, kernel_square)
        
        #Get threshold - Black & White
        ret, thresh = cv2.threshold(opening, 30, 255, cv2.THRESH_BINARY)
        thresh = thresh[y:y+h, x:x+w]
        #look for contours - diff hand shapes
        contours = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]
        
        #Check if we got any contours
        if(len(contours) > 0):
            contour = max(contours, key=cv2.contourArea)
        #If area bigger than some value. Frames > 50 because after 50 frames we start capturing
        if(cv2.contourArea(contour) > 10000 and frames > 50):

            #Gives the smallest rectangle that can cover the hand
            x1, y1, w1, h1 = cv2.boundingRect(contour)
            pic_no+=1
            save_img = thresh[y1: y1+h1, x1:x1 + w1]
            #Resize image to 50 x 50
            save_img = cv2.resize(save_img, (image_x, image_y))
            #Put text on screen
            cv2.putText(frame, "Capturing...", (30, 60), cv2.FONT_HERSHEY_TRIPLEX, 2, (127, 255, 255))
            #Save images into gesture folder
            cv2.imwrite("gestures/"+str(g_id)+"/"+str(pic_no)+".jpg", save_img)

        
        cv2.rectangle(frame, (x,y), (x+w, y+h), (0,255,0), 2)
        cv2.putText(frame, str(pic_no), (30,400), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (127, 127, 255))
        cv2.imshow("Capturing gesture", frame)
        cv2.imshow("thresh", thresh)
        
        
        keypress = cv2.waitKey(1)
        
        if(keypress==ord('c')):
            if(flag_start_capturing == False):
                flag_start_capturing= True
            else:
                flag_start_capturing = False
                frames = 0
        if(flag_start_capturing==True):
            frames+=1
        if(pic_no==total_pics):
            break         

In [5]:
print("Press 'c' to start recording")
g_id = input("Enter gesture no: ")
store_images(g_id)

Press 'c' to start recording
Enter gesture no: 5
