In [2]:
# Import libraries
import numpy as np
import cv2
import mediapipe as mp
import sys
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
import sounddevice as sd
import platform

In [3]:
def get_landmarks(img):
    ''' Draw the landmarks of the detected hands on the given image,
        and get the coordinates of the THUMP_TIP(4) and of the INDEX_TIP(8) of the one hand'''
 
    coord_lms = []
    height, width, _ = img.shape
    indexes = [4, 8]
    
    # For using the Mediapipe hand tracking solution
    mp_hands = mp.solutions.hands
    mp_drawing = mp.solutions.drawing_utils
    
    # Initialize the hand tracking model
    hands = mp_hands.Hands()
    results = hands.process(img)
    
    # The landmarks of the detected hands
    landmarks = results.multi_hand_landmarks

    # Draw hand landmarks of each hand
    if landmarks:
        # If landmarks are found, it iterates through each detected hand
        for hand_landmarks in landmarks:
            # draw the landmarks on the original image
            mp_drawing.draw_landmarks(img, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            
        # Get the coordinates of the THUMP_TIP and of the INDEX_TIP of the first hand
        hand_lms = landmarks[0]
        for index in indexes:
            landmark = hand_lms.landmark[index]
            width_lm = round(landmark.x * width)
            height_lm = round(landmark.y * height)
            coord_lms.append((width_lm, height_lm))
                    
    return coord_lms

In [4]:
def resize_img(img, resize_ratio=1):
    ''' resize a given image'''
    height, width, _ = img.shape
    new_height = int(height * resize_ratio)
    new_width = int(width * resize_ratio)
    resized_img = cv2.resize(img, (new_width, new_height))  
    return resized_img

In [None]:
# Open the VideoCapture
cap = cv2.VideoCapture(0)

# Check if the VideoCapture is opened successfully
if not cap.isOpened():
    print('Error: Could not open the camera.')
    sys.exit()

# Capture frame from the camera, and process it
while True:        
    _ , frame = cap.read()
    resize_ratio = 1
    resized_img = resize_img(frame, resize_ratio)
    coord_lms = get_landmarks(resized_img)
    
    # If there are hands on the actuel frame
    if coord_lms != []:
        thumb_tip, index_tip = coord_lms
        height, width, _ = resized_img.shape
        
        # Draw a line between the thumb tip and the index tip
        cv2.circle(resized_img, thumb_tip, 10, (255,0,255), cv2.FILLED)
        cv2.circle(resized_img, index_tip, 10, (255,0,255), cv2.FILLED)
        cv2.line(resized_img, thumb_tip, index_tip, (255,0,255), 4)

        # Compute the distance between thumb tip and the index tip
        dist = np.linalg.norm(np.array(thumb_tip) - np.array(index_tip))
        dist = round(np.interp(dist, [5, 95*resize_ratio], [0, 100]))
        h_full_rect = round(np.interp(dist, [0, 100], [height-100, 100]))
        
        target_volume = dist / 100
        
        # Control the system's volume
        if platform.system() == 'Windows':
            # Get the default audio device
            devices = AudioUtilities.GetSpeakers()
            interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)

            # Create an instance of the audio endpoint volume interface
            volume = cast(interface, POINTER(IAudioEndpointVolume))

            volume.SetMasterVolumeLevelScalar(target_volume, None)# Set the volume
        else:
            # If the OS is Linux or macOS
            default_device = sd.query_devices().index
            sd._alsa._setoutputmute(default_device, False)  # Unmute the output if muted
            sd._alsa._setoutputvolume(default_device, target_volume)  # Set the volume  
            
        # Draw rectangles that represent the volume
        cv2.rectangle(resized_img, (width-100, height-100), (width-60, 100), (255,0,0), 2)
        cv2.putText(resized_img, f'{dist}%', (width-100, 95), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
        cv2.rectangle(resized_img, (width-100, height-100), (width-60, h_full_rect), (0,255,0), cv2.FILLED)
        
    # Display the captured frame after proccesing
    cv2.imshow('My Camera', resized_img)
    
    # Press 'q' key to quit the VideoCapture
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the VideoCapture object and close the window
cap.release()
cv2.destroyAllWindows()