In [1]:
import cv2
import mediapipe as mp
import pyautogui # Simulates keyboard presses for slide control
import time # Used to implement cooldown timing.
import numpy as np

class GestureSlideController:
    def __init__(self):
        self.cap = cv2.VideoCapture(0)
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            max_num_hands=1,
            min_detection_confidence=0.7,
            min_tracking_confidence=0.5
        )
        self.mp_draw = mp.solutions.drawing_utils

        # Gesture parameters
        self.prev_x = 0 # stores previous x-position of the hand (used to track motion)
        self.action_cooldown = 0.1   # wait time between gestures to avoid accidental multiple triggers.
        self.last_action_time = 0 # time of last detected gesture.
        self.movement_threshold = 30   # minimum x-distance required to trigger a gesture.

        # Smoothing
        self.position_history = [] #Stores last few hand x-positions to smooth out noise
        self.history_size = 5  

        # UI elements
        self.font = cv2.FONT_HERSHEY_SIMPLEX  #Font used to draw text instructions.

    def smooth_position(self, current_x):
        """Apply smoothing to reduce jitter"""
        self.position_history.append(current_x)
        if len(self.position_history) > self.history_size:
            self.position_history.pop(0)
        return int(np.mean(self.position_history))  
# Adds current x-position to position_history.
#Maintains a sliding window of last 5 positions.
#Returns the average of those values (smoother than just current_x).
    def draw_ui(self, img):
        """Draw UI elements on the image"""
        h, w, _ = img.shape

        # Draw instructions
        cv2.putText(img, "Hand Gesture Slide Controller", (10, 30), self.font, 0.7, (0, 255, 0), 2)
        cv2.putText(img, "Move hand LEFT for next slide", (10, 60), self.font, 0.5, (255, 255, 255), 1)
        cv2.putText(img, "Move hand RIGHT for previous slide", (10, 80), self.font, 0.5, (255, 255, 255), 1)
        cv2.putText(img, "Press 'q' to quit", (10, 100), self.font, 0.5, (255, 255, 255), 1)
#Title, Instructions
        # Draw center line
        cv2.line(img, (w//2, 0), (w//2, h), (100, 100, 100), 1)

        # Draw threshold zones
        left_zone = w//2 - self.movement_threshold #left_zone and right_zone define gesture-triggering areas 
#(move across this threshold to trigger a slide change).
        right_zone = w//2 + self.movement_threshold
        cv2.line(img, (left_zone, 0), (left_zone, h), (0, 255, 255), 1)
        cv2.line(img, (right_zone, 0), (right_zone, h), (0, 255, 255), 1)

        # Show cooldown status
        current_time = time.time()
        if current_time - self.last_action_time < self.action_cooldown:
            remaining = self.action_cooldown - (current_time - self.last_action_time) #Cooldown timer
            cv2.putText(img, f"Cooldown: {remaining:.1f}s", (10, h-20), self.font, 0.5, (0, 0, 255), 1)

    def process_gesture(self, lm_list): #Called when hand landmarks are detected.
        """Process hand landmarks for gesture recognition"""
        current_time = time.time()

        # If no hand is detected, reset tracking values
        if not lm_list:
            self.prev_x = 0  # Reset hand position tracking
            self.last_action_time = 0  # Also reset action timing
            return
        
        current_x = lm_list[9][0]  # Gets the x-coordinate of landmark 9 (index finger base or wrist-like point).

        # If hand reappears after disappearance, take fresh position & wait briefly
        if self.prev_x == 0:
            self.prev_x = current_x  # Initialize fresh starting position
            self.last_action_time = current_time  # Reset timer
            return

        # Ignore large jumps in movement when hand reappears (avoids false triggers)
        if abs(current_x - self.prev_x) > 200:  # Adjust threshold if needed
            self.prev_x = current_x  # Stabilize new hand position before detecting gestures
            return

        # Check cooldown period
        if current_time - self.last_action_time <= self.action_cooldown:
            return

        # Calculate movement difference
        diff = current_x - self.prev_x


        if abs(diff) > self.movement_threshold:
            if diff < -self.movement_threshold: #If hand moved left → next slide (right key)
                print(f"Next Slide (Hand moved LEFT by {abs(diff)} pixels)")
                pyautogui.press("right")
            elif diff > self.movement_threshold:   #If hand moved right → previous slide (left key)
                print(f"Previous Slide (Hand moved RIGHT by {diff} pixels)")
                pyautogui.press("left")
            #last_action_time updated after a gesture is performed.
            # Update last action time
            self.last_action_time = current_time
        
        # Update the previous position
        self.prev_x = current_x

    def run(self):
        """Main loop"""
        print("Starting Gesture Slide Controller...")
        print("Make sure your presentation software is active!")

#Webcam frames are read

#Hand detection is applied

#UI and gesture logic are updated

#Output window is shown
        while True:
            success, img = self.cap.read()
            if not success:
                print("Failed to read from camera")
                break
            
            img = cv2.flip(img, 1)  # Flip horizontally
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            results = self.hands.process(img_rgb)

            # Draw UI elements
            self.draw_ui(img)

            if results.multi_hand_landmarks:
                for handLms in results.multi_hand_landmarks:
                    self.mp_draw.draw_landmarks(img, handLms, self.mp_hands.HAND_CONNECTIONS)

                    # Extract landmark positions
                    lm_list = [(int(lm.x * img.shape[1]), int(lm.y * img.shape[0])) for lm in handLms.landmark]
#Extracts (x, y) pixel positions of each landmark.

#Calls process_gesture() to decide if movement indicates a slide change.

#Highlights wrist point (landmark 9).
                    # Process gestures
                    self.process_gesture(lm_list)

                    # Highlight wrist position
                    if lm_list:
                        wrist_pos = lm_list[9]
                        cv2.circle(img, wrist_pos, 10, (255, 0, 0), -1)

            cv2.imshow("Gesture Slide Controller", img)

            # Exit condition
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
            elif key == ord('r'):  # Reset position
                self.prev_x = 0
                self.position_history = []
                print("Position reset")

    def cleanup(self):
        """Clean up resources"""
        self.cap.release()
        cv2.destroyAllWindows()
        #tops webcam and closes the OpenCV window.

# Usage
if __name__ == "__main__":
    controller = GestureSlideController()
    try:
        controller.run()
    except KeyboardInterrupt:
        print("\nStopping controller...")
    finally:
        controller.cleanup()

Starting Gesture Slide Controller...
Make sure your presentation software is active!
