# Virtual Mouse using Finger Tracking

In [1]:
import cv2
import mediapipe as mp
import pyautogui
import numpy as np
import math
import time

Configuration Constants

In [2]:
WEBCAM_ID = 0
CAM_WIDTH = 640
CAM_HEIGHT = 480
SMOOTHING = 7
FRAME_REDUCTION = 100

Main Function

In [3]:
def main():
    # Initialization
    p_time = 0

    # Previous and current locations for smoothing
    p_loc_x, p_loc_y = 0, 0
    c_loc_x, c_loc_y = 0, 0

    # Initialize webcam
    cap = cv2.VideoCapture(WEBCAM_ID)
    if not cap.isOpened():
        print(f"Error: Could not open webcam with ID {WEBCAM_ID}.")
        return
    cap.set(3, CAM_WIDTH)
    cap.set(4, CAM_HEIGHT)

    # Get screen dimensions
    screen_width, screen_height = pyautogui.size()
    pyautogui.FAILSAFE = False # Disable the failsafe to prevent accidental program termination

    # Initialize MediaPipe Hands
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(
        max_num_hands=1,
        min_detection_confidence=0.7,
        min_tracking_confidence=0.5
    )
    mp_draw = mp.solutions.drawing_utils

    print("AI Virtual Mouse started. Show your hand to the camera.")
    print("Move your index finger to move the cursor.")
    print("Bring your middle finger close to your index finger to click.")
    print("Press 'q' to quit.")

    # Main Loop
    try:
        while True:
            # Read a frame from the webcam
            success, frame = cap.read()
            if not success:
                print("Ignoring empty camera frame.")
                continue

            # Flip the frame horizontally for a more intuitive, mirror-like experience
            frame = cv2.flip(frame, 1)

            # Process the frame to find hand landmarks
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = hands.process(frame_rgb)

            # If a hand is detected, process the landmarks
            if results.multi_hand_landmarks:
                hand_landmarks = results.multi_hand_landmarks[0] # Get landmarks for the first hand

                # Get coordinates for index and middle finger tips
                index_finger_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
                middle_finger_tip = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]

                # Convert normalized landmark coordinates to frame coordinates
                h, w, c = frame.shape
                ix, iy = int(index_finger_tip.x * w), int(index_finger_tip.y * h)
                mx, my = int(middle_finger_tip.x * w), int(middle_finger_tip.y * h)

                # Map Hand Coordinates to Screen Coordinates
                screen_x = np.interp(ix, (FRAME_REDUCTION, w - FRAME_REDUCTION), (0, screen_width))
                screen_y = np.interp(iy, (FRAME_REDUCTION, h - FRAME_REDUCTION), (0, screen_height))

                # Smooth the coordinates to reduce jitter
                c_loc_x = p_loc_x + (screen_x - p_loc_x) / SMOOTHING
                c_loc_y = p_loc_y + (screen_y - p_loc_y) / SMOOTHING

                # Move the mouse
                pyautogui.moveTo(c_loc_x, c_loc_y)
                p_loc_x, p_loc_y = c_loc_x, c_loc_y # Update previous location

                # Gesture Recognition: Check for click
                # Calculate the distance between the index and middle finger tips
                click_distance = math.hypot(mx - ix, my - iy)

                # Draw visuals on the frame for feedback
                cv2.circle(frame, (ix, iy), 15, (255, 0, 255), cv2.FILLED) # Circle on index finger

                # If the distance is small, it's a click
                if click_distance < 30: # Threshold distance for a click
                    cv2.circle(frame, (ix, iy), 15, (0, 255, 0), cv2.FILLED) # Green circle indicates click
                    pyautogui.click()
                    time.sleep(0.2) # Small delay to prevent multiple clicks

                # Draw all hand landmarks for visualization
                mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Display FPS
            c_time = time.time()
            fps = 1 / (c_time - p_time)
            p_time = c_time
            cv2.putText(frame, f'FPS: {int(fps)}', (20, 50), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 0), 3)

            # Display the frame in a window
            cv2.imshow("AI Virtual Mouse", frame)

            # Check for quit command
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    finally:
        print("\nShutting down...")
        cap.release()
        cv2.destroyAllWindows()
        hands.close()

In [4]:
if __name__ == "__main__":
    main()

AI Virtual Mouse started. Show your hand to the camera.
Move your index finger to move the cursor.
Bring your middle finger close to your index finger to click.
Press 'q' to quit.

Shutting down...


KeyboardInterrupt: 