Import OpenCV for computer vision, NumPy for array operations, and PyAutoGUI to interact with the computer.

In [92]:
import time
import cv2 as cv
import numpy as np
import pyautogui

Get webcam input.

In [93]:
cap = cv.VideoCapture(0)

Create a MOG2 Background Subtractor and make a kernel for it.

In [94]:
fgbg = cv.createBackgroundSubtractorMOG2()
kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (3, 3))

Create a buffer to store previous distances between the LEDs.

In [95]:
distances = []

Keep track of frame times for an FPS counter.

In [96]:
prev_frame_time = 0
new_frame_time = 0

Apply the MOG2 Background Subtractor to a frame, isolating movement.

In [97]:
def apply_mask(frame):
    fgmask = fgbg.apply(frame)
    fgmask = cv.morphologyEx(fgmask, cv.MORPH_OPEN, kernel)
    fgmask = cv.morphologyEx(fgmask, cv.MORPH_CLOSE, kernel)
    return fgmask

Apply a binary threshold to a frame, isolating white pixels.

In [98]:
def apply_threshold(frame):
    gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
    _, thresh = cv.threshold(gray, 240, 255, cv.THRESH_BINARY)
    return thresh

Find the centers of an array of contours.

In [99]:
def find_centers(contours):
    centers = []
    for contour in contours:
        M = cv.moments(contour)
        cx = int(M["m10"] / M["m00"])
        cy = int(M["m01"] / M["m00"])
        centers.append((cx, cy))
    return centers

Display the centers on the frame.

In [100]:
def draw_centers(frame, centers):
    for center in centers:
        cv.circle(frame, center, 5, (0, 0, 255), -1)
        cv.putText(frame, str(center), center, cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

Find the distances between the centers.

In [101]:
def find_distances(centers):
    return [np.linalg.norm(np.array(centers[i]) - np.array(centers[i + 1])) for i in range(0, len(centers) - 1, 2)]

Display a connection between the centers, and display the distance.

In [102]:
def connect_centers(frame, centers, distance):
    if len(centers) == 2:
        cv.line(frame, centers[0], centers[1], (0, 255, 0), 2)
        cv.putText(frame, str(distance), (10, 30), cv.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)


Main logic loop, that generates webcam frames and applys gesture detection to them.

In [103]:
while True:
        _, frame = cap.read()
        frame = cv.flip(frame, 1)
        fgmask = apply_mask(frame)
        thresh = apply_threshold(frame)
        merged = cv.bitwise_and(thresh, thresh, mask=fgmask)
        contours, _ = cv.findContours(merged, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
        if len(contours) > 0:
            contours = [c for c in contours if cv.contourArea(c) > 100]
            centers = find_centers(contours)
            center = np.mean(centers, axis=0).astype(int)
            distances = find_distances(centers)
            draw_centers(frame, centers)
            connect_centers(frame, centers, distances)
            # click if distances are close
            if len(distances) > 0 and distances[0] < 50:
                pyautogui.click()
                cv.putText(frame, "CLICK", (10, 60), cv.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
            if len(centers) > 0:
                screen_width, screen_height = pyautogui.size()
                frame_width, frame_height = frame.shape[1], frame.shape[0]
                mouse_x = int(center[0] / frame_width * screen_width)
                mouse_y = int(center[1] / frame_height * screen_height)
                cv.circle(frame, (int(center[0]), int(center[1])), 5, (0, 255, 0), -1)
                pyautogui.FAILSAFE = False
                pyautogui.moveTo(mouse_x, mouse_y)
        new_frame_time = time.time()
        fps = 1 / (new_frame_time - prev_frame_time)
        prev_frame_time = new_frame_time
        cv.putText(
            frame,
            "FPS: " + str(fps),
            (7, 70),
            cv.FONT_HERSHEY_SIMPLEX,
            2,
            (100, 255, 0),
            3,
            cv.LINE_AA,
        )
        if cv.waitKey(1) & 0xFF == ord("q"):
            break
        cv.imshow("webcam", frame)
        if cv.waitKey(10) & 0xFF == ord("q"):
            cap.release()
            break

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Destroy the camera display.
The final `waitKey(1)` is required due to a macOS bug.

In [104]:
cv.destroyAllWindows()
cv.waitKey(1)

-1