In [None]:
import cv2
import mediapipe as mp
import numpy as np
import math #To calculate distances between fingers

# Load image to zoom
img_to_zoom = cv2.imread("prysh.jpg") #If this file fails to load then img_to_zoom will be none
if img_to_zoom is None:
    img_to_zoom = 255 * np.ones((400, 400, 3), dtype=np.uint8)

# - If the image wasn't found, it creates a blank white image instead.
#- np.ones((400, 400, 3), dtype=np.uint8) generates a 400x400 pixel array with 3 color channels (RGB).
#- Multiplying by 255 makes all pixels fully white (255, 255, 255).

# Initialize MediaPipe
mp_draw = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands  #used to detect hand landmarks
hands = mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.7) #- Hands() initializes the hand tracking model.
#min_detection_confidence is a threshold that determines how confident the model must be before it considers a hand detection valid
#- The MediaPipe Hands model assigns a confidence score (from 0 to 1) to every detected hand.
#- If the score is higher than min_detection_confidence, the model accepts the detection as valid.
#- If the score is lower, it ignores that detection to avoid false positives.



# Webcam
cap = cv2.VideoCapture(0)

# Zoom settings
zoom_scale = 1.0 #Initial zoom scale = 1.0x.
min_zoom = 0.2 #- This sets the minimum zoom limit (50% of the original size) 
max_zoom = 5.0 #- This sets the maximum zoom limit (500% scale).
prev_distance = None #- This stores the previous thumb-index distance during hand tracking. If none means no previous gesture
gesture_reset_ready = False #- This tracks whether the zoom reset gesture has been detected. When True, the system resets the zoom baseline for fresh tracking.
dead_zone = 5 #- This prevents small movements from causing zoom changes. Any movement below 5 pixel is ignored

while True:  #Reads a frame from the webcam.If it fails, exits the loop, success is boolean value 0,1 and img contains frames
    success, img = cap.read()
    if not success:
        break

    img = cv2.flip(img, 1) #This flips the image horizontally (left to right).
    h, w, _ = img.shape #Retrieves the height (h), width (w), and color channels (_) of the image. This is useful for resizing, cropping, and placing objects within the frame.
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = hands.process(img_rgb) # Sends the image to the MediaPipe Hands model for hand detection.
# If hands are found, results.multi_hand_landmarks will store the detected landmarks (key points like fingers, wrist, etc.).

    if results.multi_hand_landmarks and results.multi_handedness: #Checks if any hands were detected and whether 
        #handedness info is available (left/right).
        hand_labels = [hand.classification[0].label for hand in results.multi_handedness] # In simple hand_labels is a list which contains the hands either left or right.
        #- This list comprehension creates a list hand_labels where each detected hand is labeled as "Right" or "Left".
        # for better go to chatgpt for explaination
        hand_landmarks = results.multi_hand_landmarks #- Stores the hand landmarks (finger joints, wrist positions) for each detected hand.

        right_hand_index = None
        left_hand_index = None

        # Determine hand indices
        # ✅ This starts a loop through the hand_labels list, where: - i represents the index (position in the list). - label is the value ("Right" or "Left"), specifying the hand type.
        #Example: If hand_labels = ["Right", "Left"],- First iteration: i = 0, label = "Right"- Second iteration: i = 1, label = "Left"

        for i, label in enumerate(hand_labels):
            if label == "Right":
                right_hand_index = i
            elif label == "Left":
                left_hand_index = i

        # Handle Left Hand (Reset Zoom to 1.0x)
        if left_hand_index is not None:  #- This condition checks whether a left hand is detected.
            zoom_scale = 1.0
            prev_distance = None #- This ensures that zoom resets when the left hand is detected.
            cv2.putText(img, "Left hand detected - Reset Zoom", (10, 100),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)

        # Handle Right Hand (Zoom In/Out)
        if right_hand_index is not None: #- Verifies if a right hand was detected
            hand = hand_landmarks[right_hand_index] #- Stores the coordinates of finger joints in hand
            mp_draw.draw_landmarks(img, hand, mp_hands.HAND_CONNECTIONS) #This function is used to draw hand landmarks on an image.
            # & if we add  mp_hands.HAND_CONNECTIONS in parameter then it will connect the dots using lines


            # Thumb & Index tip for zoom
            x_thumb = int(hand.landmark[4].x * w) #Extracts the thumb tip coordinates (landmark[4]). hand.landmark[4].x and hand.landmark[4].y provide normalized values (0 to 1).
            y_thumb = int(hand.landmark[4].y * h) #Multiplies by w (image width) and h (image height) to convert to pixel coordinates.
            x_index = int(hand.landmark[8].x * w) # Extracts the index finger tip coordinates (landmark[8]).
            y_index = int(hand.landmark[8].y * h) # Converts normalized values to pixel coordinates, same as above.
            x_middle_tip = int(hand.landmark[12].x * w) #Extracts the middle finger tip coordinates (landmark[12]).
            y_middle_tip = int(hand.landmark[12].y * h) #Used for additional gesture recognition, like multi-finger zoom control.
            y_middle_pip = int(hand.landmark[10].y * h) #Extracts the middle finger joint (PIP - Proximal Interphalangeal Joint) (landmark[10]).
            #✅ Used to determine finger bending or pinch gestures.
            # Draw
            cv2.circle(img, (x_thumb, y_thumb), 8, (255, 0, 255), cv2.FILLED)
            cv2.circle(img, (x_index, y_index), 8, (255, 0, 255), cv2.FILLED)
            # cv2.line(img, (x_thumb, y_thumb), (x_index, y_index), (0, 255, 0), 2)

            # Measure pinch distance
            distance = math.hypot(x_index - x_thumb, y_index - y_thumb) # This will calculate the distance which helps in zoom in/out.
            # eg- When you bring your thumb and index finger closer, it reduces the distance, triggering a zoom-out action
            # Gesture reset signal with middle finger
            if y_middle_tip < y_middle_pip: #Detects if the middle finger tip is positioned above the middle finger joint (PIP). condition for zoom reset
                gesture_reset_ready = True #Marks the reset gesture as "ready" 
                prev_distance = None #Clears previous distance to prevent incorrect zoom adjustments.
                cv2.putText(img, "Reset gesture detected", (10, 140),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)

            elif gesture_reset_ready: #Checks if reset gesture was performed earlier
                #  Allows zoom modifications only after a reset gesture
                # Apply zoom after reset gesture
                if prev_distance is not None: #Ensures prev_distance isn't empty before making zoom adjustments.
                    diff = distance - prev_distance #Helps determine whether fingers moved apart (zoom in) or together (zoom out).Used for ignoring small distance.
                    if abs(diff) > dead_zone: # Ignores small finger movements below a threshold (dead_zone).
                        zoom_change = diff * 0.005 # Calculates zoom factor (zoom_change) based on finger movement (diff)
                        #0.005 scales the change for smooth adjustments
                        zoom_scale += zoom_change #Updates zoom level 
                        zoom_scale = max(min_zoom, min(max_zoom, zoom_scale))# Ensures zoom scale remains between min_zoom and max_zoom limits.
                        #Prevents extreme zoom-in or zoom-out.
                prev_distance = distance #Stores current distance as prev_distance, ensuring smooth zoom tracking over frames.

    else:
        prev_distance = None #If no hand is detected, it resets prev_distance to None

    # Apply zoom to the image
    zoomed_img = cv2.resize(img_to_zoom, None, fx=zoom_scale, fy=zoom_scale) #Resizes the image using OpenCV (cv2.resize) based on the zoom scale (zoom_scale).
    # fx and fy are scaling factors for width and height, respectively.
    # None means the new dimensions are automatically determined based on scaling.


    zh, zw = zoomed_img.shape[:2]
    #Extracts the height (zh) and width (zw) of the resized (zoomed_img) image.
    # shape[:2] ensures we only get height and width, ignoring color channels.

    # Determines how much to crop to keep the image centered after zooming.\.
    # max(0, value) ensures no negative cropping values (avoiding errors).

    crop_x = max(0, (zw - w) // 2) #(zw - w) // 2 finds the amount to trim horizontally, ensuring focus
    crop_y = max(0, (zh - h) // 2) # (zh - h) // 2 finds the amount to trim vertically, keeping the zoom effect centered
    zoomed_crop = zoomed_img[crop_y:crop_y + h, crop_x:crop_x + w] #This extracts a cropped section from the zoomed_img to ensure the displayed image size remains the same after zooming.


    # Display
    cv2.putText(img, f"Zoom: {zoom_scale:.2f}x", (10, 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1.2, (255, 0, 255), 3)

    cv2.imshow("Webcam", img)
    cv2.imshow("Zoom Image", zoomed_crop)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
