In [1]:
import cv2
import os
import re
import time
import numpy as np

In [2]:
script_dir = os.path.dirname("Image Collection.ipynb")
data_dir = os.path.join(script_dir, "Data")

In [3]:
# Specify the list of gestures, a subfolder will be created for each one
gestures = ["1", "2", "3", "A", "B"]

# Specify the desired number of images for each gesture
desired_amount = {"1": 200, "2": 200, "3": 200, "A": 100, "B": 100}

# Initialize the dictionary of current number of occurrences per each gesture
current_amount = {gesture: 0 for gesture in gestures}

In [4]:
# Create Data folder if it does not exist yet
if not os.path.exists(data_dir):
    os.makedirs(data_dir)


for gesture in gestures:
    
    # Create a subfolder per each gesture if it does not exist yet
    new = os.path.join(data_dir, gesture)
    if not os.path.exists(new):
        os.makedirs(new)
        
    # If the subfolder exists, make sure that the ordering is correct and
    # shift it if any skips are present
    # (e.g. "A_1.jpg", "A_2.jpg", ... instead of "A_1.jpg", "A_3.jpg", ...)
    else:
        files = os.listdir(new)
        files.sort(key=lambda file: int(re.split(r"[_|.]", file)[1]))
        l = len(files)
        
        # Go through each file and if the run order skips a count, shift the respective file's run order
        for i in range(l - 1):
            name_split = re.split(r"[_|.]", files[i])
            name_split_next = re.split(r"[_|.]", files[i + 1])
            if (int(name_split[1]) + 1) != int(name_split_next[1]):
                new_name = name_split[0] + "_" + str(int(name_split[1]) + 1) + "." + name_split[2]
                os.rename(os.path.join(new, files[i + 1]), os.path.join(new, new_name))
                files = os.listdir(new)
                files.sort(key=lambda file: int(re.split(r"[_|.]", file)[1]))
            
        # Since the gesture subfolder is sorted by padding, we can use the last element as the current run
        current_amount[gesture] = 0 if not files else int(re.split(r"[_|.]", files[-1])[1])

paths = {gesture: os.path.join(data_dir, gesture) for gesture in gestures}

In [5]:
# The rectangle in the frame that is cropped from the web camera image
rect = [(225, 275), (425, 275), 
       (225, 475), (425, 475)]

In [184]:
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)

# Encapsulate the whole process to be able to close cameras in case of error
try:
    
    # Establish the windows and place them accordingly
    cv2.namedWindow("Camera view")
    cv2.resizeWindow("Camera view", 640, 480)
    cv2.moveWindow("Camera view", 15, 200)

    cv2.namedWindow("Grayscale view")
    cv2.resizeWindow("Grayscale view", 480, 360)
    cv2.moveWindow("Grayscale view", 655, 30)

    cv2.namedWindow("Binary view")
    cv2.resizeWindow("Binary view", 480, 360)
    cv2.moveWindow("Binary view", 655, 430)

    cv2.namedWindow("Example")
    cv2.resizeWindow("Example", 380, 270)
    cv2.moveWindow("Example", 1125, 280)
    
    # Initialize variables for background substraction
    frame_count = 0
    background = None

    # Perform the data collecting process for each gesture in the given gesture list
    for gesture in gestures:

        current = current_amount[gesture] + 1
        counter = current
        end = desired_amount[gesture]
        flag = 0
        exit = 0

        # Continue until the respective subfolder has the designated number of samples
        while counter <= end:
            ret, frame = cap.read()

            # Check validity and avoid mirroring if frame is present
            if not ret:
                print("There has been a problem retrieving your frame")
                break
            else:
                frame_count += 1
                frame = cv2.flip(frame, 1)

            # End the process for the current gesture in case the "q" key is hit
            key = cv2.waitKey(1)
            if key == ord("q"):
                break

            # End the whole process in case the "Esc" key is hit
            if key == ord("\x1b"):
                exit = 1
                break

            # Create grayscale version(s)
            frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            # Binarize these version(s) using thresholding
                # Version using contouring and background mask
            c="""
                # Version incorporating Nesrine's pieces of advice
            frame_binary = cv2.GaussianBlur(frame_gray, (7, 7), 0)
            # Create mask for the background
            if background is None:
                background = frame_binary.copy().astype("float")
            else:
                if frame_count <= 30:
                    background = cv2.accumulateWeighted(frame_binary, background, 0.5)
                else:
                    frame_binary = cv2.absdiff(background.astype("uint8"), frame_gray)
                    frame_binary = cv2.threshold(frame_binary, 70, 255, cv2.THRESH_BINARY)[1]
                    #contours = cv2.findContours(frame_binary.copy(), 
                    #                            cv2.RETR_EXTERNAL, 
                    #                            cv2.CHAIN_APPROX_SIMPLE)[1]
                    #segmentation = max(contours, key=cv2.countourArea) if contours else None"""

            
            #kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
            #frame_binary = cv2.morphologyEx(frame_binary, cv2.MORPH_GRADIENT, kernel)
            #frame_binary = cv2.erode(frame_binary, kernel, iterations=1)
            #frame_binary = cv2.dilate(frame_binary, kernel, iterations=1)
            #frame_binary = cv2.medianBlur(frame_binary, 3)
            #frame_binary = cv2.threshold(frame_gray, 150, 255, cv2.THRESH_BINARY)[1]
            #frame_binary = cv2.erode(frame_binary, kernel, iterations=3)
            #frame_binary = cv2.dilate(frame_binary, kernel, iterations=2)
            #frame_binary = cv2.morphologyEx(frame_binary, cv2.MORPH_GRADIENT, kernel)
            
                # Good adaptive version (fastNlMeansDenoising (5, 15, 7) + 2 median blurs (5) + adaptive thresholding (3, 1))
            #frame_gray = cv2.fastNlMeansDenoising(frame_gray, None, 5, 15, 7)
            #frame_gray = cv2.medianBlur(frame_gray, 5)
            #frame_gray = cv2.medianBlur(frame_gray, 5)
            #frame_binary = cv2.adaptiveThreshold(frame_gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 3, 2)
            
            
                # Good binary version (fastNlMeansDenoising (5, 15, 7) + 1 gaussian blur((7, 7), 0) + binary thresholding (150))
            #frame_gray = cv2.fastNlMeansDenoising(frame_gray, None, 5, 15, 7)
            frame_gray = cv2.GaussianBlur(frame_gray, (7, 7), 0)
            frame_binary = cv2.threshold(frame_gray, 127, 255, cv2.THRESH_BINARY)[1]
            
            
            
            # Show all images
            # Live view with frame and text
            cv2.rectangle(frame, rect[0], rect[3], (0, 255, 0), 2)
            #acc = 0.0
            #txt = gesture.capitalize() + f" ({str(round(acc, 2))} %)"      # in preparation for model version
            txt = gesture.capitalize()
            cv2.putText(frame, txt, (rect[0][0], rect[0][1] - 15), 
                        cv2.FONT_HERSHEY_DUPLEX, 1, (0, 255, 0), 2)
            cv2.imshow("Camera view", frame)

            # Grayscale version(s)
            cv2.imshow("Grayscale view", cv2.resize(frame_gray, (480, 360)))

            # Binary version(s)
            if frame_count > 0:
                cv2.imshow("Binary view", cv2.resize(frame_binary, (480, 360)))

            if not flag:
                time.sleep(1)
            flag = 1

            # To reduce the number of almost identical frames, only save every n frames
            if not current % 4:

                # Create the naming for the file with the desired padding, i.e. ("gesture_run-number.jpg")
                img_name = gesture + "_" + str(counter) + ".jpg"
                img_path = r"%s" %os.path.join(paths[gesture], img_name)

                # Save the cropped rectangle from the frame
                if not cv2.imwrite(img_path, 
                                   frame_binary[(rect[0][1] + 2):(rect[2][1] - 2), 
                                                (rect[0][0] + 2):(rect[1][0] - 2)]):
                    print("Something went wrong during this attempt:",
                          f"gesture - {gesture}, run - {counter}")

                counter += 1

            current += 1

        if exit:
            break

    cap.release()
    cv2.destroyAllWindows()

# Close the camera and all windows in case of unexpected fatality
except:
    print("A fatality has occured, the program will now terminate")
    cap.release()
    cv2.destroyAllWindows()