In [1]:
import mediapipe as mp
import cv2

In [2]:
mpHands = mp.solutions.hands
mpDraw = mp.solutions.drawing_utils

In [3]:
class HandDetector:
    def __init__(self,max_num_hands=2,min_detection_confidence = 0.5, min_tracking_confidence=0.5):
        self.hands = mpHands.Hands(max_num_hands = max_num_hands, min_detection_confidence = min_detection_confidence, min_tracking_confidence=min_tracking_confidence)
    def findHandLandMarks(self,image, handNumber=0, draw=False): #image is on which hand landmarks would be detected. The handNumber is used if multiple hands are there in image, so this function would return landmarks only for the specified hand number. Boolean parameter draw decides if we want the mediapipe to draw those landmarks on our image.
        originalImage = image
        if(image is not None):
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = self.hands.process(image)
        landMarkList = [] #empty list which will contain final result from this function.
    
        if results.multi_hand_landmarks:
            hand = results.multi_hand_landmarks[handNumber] #results.multi_hand_landmarks returns landMarks for all the hands that were detected, so passing the handNumber to it gives you data for the correct hand.
        
            for id, landMark in enumerate(hand.landmark):#landMark holds x,y,z ratios of single landmark #hand.landmark gives 21 landmarks for the selected hand. we iterate 21 points where id holds the id for each of the landmark.
                imgH, imgW, imgC = originalImage.shape  #height, width, channel for image
                xPos, yPos = int(landMark.x * imgW), int(landMark.y * imgH)      #Mediapipe returns ration of the image dimensions as landmark information, so convert here to x and y coordinate of the pixel of the landmark.
                landMarkList.append([id,xPos,yPos])  #findHandLandMarks() would return: oth index->id of landmark, 1st index-> x coordinate of landmark, 2nd index-> x coordinate of landmark

            
            if draw:
                mpDraw.draw_landmarks(originalImage, hand, mpHands.HAND_CONNECTIONS)
        
        return landMarkList
            

In [8]:
def findHandLandMarks(self,image, handNumber=0, draw=False): #image is on which hand landmarks would be detected. The handNumber is used if multiple hands are there in image, so this function would return landmarks only for the specified hand number. Boolean parameter draw decides if we want the mediapipe to draw those landmarks on our image.
    originalImage = image
    if(image is not None):
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = self.hands.process(image)
    landMarkList = [] #empty list which will contain final result from this function.
    
    if results.multi_hand_landmarks:
        hand = results.multi_hand_landmarks[handNumber] #results.multi_hand_landmarks returns landMarks for all the hands that were detected, so passing the handNumber to it gives you data for the correct hand.
        
        for id, landMark in enumerate(hand.landmark):#landMark holds x,y,z ratios of single landmark #hand.landmark gives 21 landmarks for the selected hand. we iterate 21 points where id holds the id for each of the landmark.
            imgH, imgW, imgC = originalImage.shape  #height, width, channel for image
            xPos, yPos = int(landMark.x * imgW), int(landMark.y * imgH)      #Mediapipe returns ration of the image dimensions as landmark information, so convert here to x and y coordinate of the pixel of the landmark.
            landMarkList.append([id,xPos,yPos])  #findHandLandMarks() would return: oth index->id of landmark, 1st index-> x coordinate of landmark, 2nd index-> x coordinate of landmark
            
            
        if draw:
            mpDraw.draw_landmarks(originalImage, hand, mpHands.HAND_CONNECTIONS)
        
        return landMarkList
            

# Volume Controller

In [17]:
!pip install pycaw

Collecting pycaw
  Downloading pycaw-20181226.tar.gz (5.7 kB)
Collecting enum34
  Downloading enum34-1.1.10-py3-none-any.whl (11 kB)
Building wheels for collected packages: pycaw
  Building wheel for pycaw (setup.py): started
  Building wheel for pycaw (setup.py): finished with status 'done'
  Created wheel for pycaw: filename=pycaw-20181226-py3-none-any.whl size=6497 sha256=f65d5531d76ba306bdcff3855e118bf298953e0304531515f34f4eec4061e212
  Stored in directory: c:\users\prash\appdata\local\pip\cache\wheels\dc\11\93\a8f4f331966f5a23ea9150999db050821e91a0a3273069df49
Successfully built pycaw
Installing collected packages: enum34, pycaw
Successfully installed enum34-1.1.10 pycaw-20181226


In [4]:
#from MediapipeVolumeController import HandDetector  #from handDetector import HandDetector #for importing custom class from other file
#import cv2
import math
import numpy as np
#python package pycaw to control system volume
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

In [5]:
#creating instance of our custom HandDetector class
handDetector = HandDetector(min_detection_confidence = 0.7)

In [6]:
webcamFeed = cv2.VideoCapture(0)

In [7]:
#Volume related initializations
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_,CLSCTX_ALL,None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
print(volume.GetVolumeRange()) #min volume -62.25 and max volume is 0.0

(-65.25, 0.0, 0.03125)


In [8]:
while True:
    status, image = webcamFeed.read()
    handLandmarks = HandDetector()
    handLandmarks = handLandmarks.findHandLandMarks(image=image,draw=True)
    
    if(len(handLandmarks)!= 0):
        #for volume control we need 4th and 8th landmark for thumb and firstfinger.
        x1,y1 = handLandmarks[4][1], handLandmarks[4][2]
        x2,y2 = handLandmarks[8][1], handLandmarks[8][2]
        length = math.hypot(x2-x1,y2-y1)
        print(length)
        
        #Hand range length- 50 to 250
        
        volumeValue = np.interp(length, [50,300],[-62.25,0.01]) #converting length to proportionate to volume range
        volume.SetMasterVolumeLevel(volumeValue,None)
        
        cv2.circle(image,(x1,y1),15,(305,0,305),cv2.FILLED)
        cv2.circle(image,(x2,y2),15,(305,0,305),cv2.FILLED)
        cv2.line(image,(x1,y1),(x2,y2),(305,0,305),3)
        
    cv2.imshow("Volume",image)
    cv2.waitKey(1)

88.64536084872123
11.6619037896906
11.6619037896906
8.06225774829855
8.06225774829855
45.541190146942796
45.221676218380054
45.221676218380054
54.74486277268397
47.539457296018846
27.730849247724095
27.730849247724095
10.19803902718557
13.453624047073712
30.0
30.0
32.202484376209235
27.80287754891569
27.80287754891569
35.0
34.43835071544513
24.758836806279895
24.758836806279895
28.30194339616981
22.825424421026653
35.84689665786985
27.202941017470888
27.202941017470888
26.400757564888174
22.203603311174515
28.600699292150182
28.600699292150182
27.459060435491963
31.622776601683793
31.622776601683793
28.30194339616981
30.528675044947498
30.528675044947498
35.22782990761707
4.242640687119286
30.870698080866262
30.870698080866262
37.73592452822641
24.041630560342618
27.459060435491963
25.553864678361276
27.018512172212596
27.018512172212596
28.844410203711917
27.202941017470888
23.60084744241189
29.20616373302047
42.80186911806539
29.068883707497267
17.720045146669353
17.720045146669353
2

KeyboardInterrupt: 

In [None]:
webcamFeed.release()
#webcamFeed.destroyAllWindows()