# Gesture Volume Control

In [2]:
import cv2
import mediapipe as mp
import numpy as np
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
import math

# Webcam
cap = cv2.VideoCapture(0)
mute_icon = cv2.imread("mute.png", cv2.IMREAD_COLOR)
icon_resized = cv2.resize(mute_icon, (50, 50))
x_offset, y_offset = 50, 80   # Define where to place it
# MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
mp_draw = mp.solutions.drawing_utils

# Get system volume interface
devices = AudioUtilities.GetSpeakers() #This gets the default speaker device on your system.
#It's the audio device your system is currently using for output (like your speakers or headphones).
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None) #This activates the volume interface for that audio device.
#IAudioEndpointVolume._iid_: Tells the system which specific interface you want — in this case, the one for controlling volume.
#CLSCTX_ALL: Says to activate the interface in any context (in-process or out-of-process).
#None: No extra data is passed when activating.
volume_control = cast(interface, POINTER(IAudioEndpointVolume))
#here you're telling Python (through the ctypes library) to treat the generic interface object as a pointer to the
#very specific IAudioEndpointVolume interface.
#cast function, take this general pointer (interface) and reinterpret it as a pointer that specifically knows how to 
#interact with IAudioEndpointVolume objects.
# Get volume range
vol_min, vol_max = volume_control.GetVolumeRange()[:2] # This function will helps us to find the min and the 
# max value and the volumne step size. These all values will determined by our hardware system
#[:2]
#This is Python slicing.
#It means: take only the first two values from the returned tuple.
#So from (-65.25, 0.0, 0.03125) → you get (-65.25, 0.0)
min_ratio = 0.3
max_ratio = 0.9
while True:
    success, img = cap.read()
    img = cv2.flip(img, 1)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = hands.process(img_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            lm_list = []
            h, w, c = img.shape
            for id, lm in enumerate(hand_landmarks.landmark):
                lm_list.append((int(lm.x * w), int(lm.y * h)))

            # Get Thumb tip (id=4) and Index tip (id=8)
            x0, y0 = lm_list[0]  # Wrist
            x5, y5 = lm_list[5]  # Index base (MCP)
            x1, y1 = lm_list[4]
            x2, y2 = lm_list[8]

            # Draw circles and line
            cv2.circle(img, (x1, y1), 10, (255, 0, 0), cv2.FILLED)
            cv2.circle(img, (x2, y2), 10, (255, 0, 0), cv2.FILLED)
            #cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3)

            # Calculate distance
            hand_size_ref = math.hypot(x5 - x0, y5 - y0)
            distance = math.hypot(x2 - x1, y2 - y1)

            if hand_size_ref != 0:
                ratio = distance / hand_size_ref
                cv2.putText(img, f"Ratio: {ratio:.2f}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
                # mute gesture
                if distance < 55:
                    volume_control.SetMasterVolumeLevel(vol_min, None)
                    img[y_offset:y_offset+icon_resized.shape[0], x_offset:x_offset+icon_resized.shape[1]] = icon_resized
            # Map distance to volume range
                else:
                    vol = np.interp(ratio, [min_ratio,max_ratio], [vol_min, vol_max])
                    volume_control.SetMasterVolumeLevel(vol, None)
                    #this line then sets the system's master volume to the calculated vol level.

            # Draw volume bar
                    vol_bar = np.interp(ratio, [min_ratio,max_ratio], [400, 150])
                    cv2.rectangle(img, (50, 150), (85, 400), (0, 255, 0), 3)
                    cv2.rectangle(img, (50, int(vol_bar)), (85, 400), (0, 255, 0), cv2.FILLED)

            # Draw distance text
                    cv2.putText(img, f'{int(distance)}', (50, 450),cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

                    mp_draw.draw_landmarks(img, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    cv2.imshow("Volume Control", img)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
