# Final Year Project Code

In [1]:
import cv2
import numpy as np
import mediapipe as mp
import time
import autopy

w_cam, h_cam = 640, 480 # setting width and height of webcam
w_screen, h_screen = autopy.screen.size() # height and width of screen
frame = 100 # width and height of the frame inside the webcam window
smoothening = 7
prev_loc_x = prev_loc_y = 0
current_loc_x = current_loc_y = 0

cap = cv2.VideoCapture(0) # 0 will take the input from the default camera. 1, 2 etc id's for other cameras
cap.set(3, w_cam) # width (id is 3)
cap.set(4, h_cam) # height (id is 4)
cap.set(10, 100) # brightness (id is 10)

mp_hands = mp.solutions.hands
# hands = mp_hands.Hands() # default parameters are preferred
hands = mp_hands.Hands(max_num_hands = 1)
mp_draw = mp.solutions.drawing_utils # function to draw (visualize) line and points which was used to detect hands

# used for calculating FPS
previous_time = 0
current_time = 0

while True:
    success, img = cap.read()
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Hands class only uses RGB images
    results = hands.process(img_rgb)
#     print(results.multi_hand_landmarks) # will give co-ordinates if hands are detected, else None

    if results.multi_hand_landmarks: # if hands are detected
#         for hand_lms in results.multi_hand_landmarks: # for each hand detected(max 2 hands by default - Hands())
        hand_lms = results.multi_hand_landmarks[0] # first hand (here, only one hand is detected anyway)
#         lm - coordinate of point in ratio of h(x),w(y) and z. 
#         These landmarks will always be in order and hence we get id by enumerate
        lm = hand_lms.landmark[9] # total of 21 points(0 - 20 id's)

        h, w, c = img.shape # height, width and channels of the img(frame)
        cix, ciy = int(lm.x * w), int(lm.y * h) # coordinates with respect to the pixels of the img(frame)           

        cv2.circle(img, (cix, ciy), 15, (255,0,255), cv2.FILLED) # highlighting the specific landmark

        # frame where hand movement is detected
        cv2.rectangle(img, (frame, frame), (w_cam-frame, h_cam-frame), (255, 0, 255), 2)

#         csx = np.interp(cix, (0, w_cam), (0, w_screen)) # range from 0 to width of webcam is converted to 0 to width of screen
#         csy = np.interp(ciy, (0, h_cam), (0, h_screen)) # range from 0 to height of webcam is converted to 0 to height of screen
        csx = np.interp(cix, (frame, w_cam-frame), (0, w_screen)) # range from 0 to width of frame is converted to 0 to width of screen
        csy = np.interp(ciy, (frame, h_cam-frame), (0, h_screen)) # range from 0 to height of frame is converted to 0 to height of screen
#         print(csx, csy)

#         print("Landmark: [{0}, {1}]".format(lm.x, lm.y))
#         print("Webcam frame coordinates: [{0}, {1}]".format(cix, ciy))
#         print("Screen coordinates: [{0}, {1}]\n".format(csx, csy))
#         Smootheing x and y value
        current_loc_x = prev_loc_x + (csx - prev_loc_x) / smoothening
        current_loc_y = prev_loc_y + (csy - prev_loc_y) / smoothening

#         giving mouse coordinates(x coordinate is inverted, hence we subtract it from width of screen)
        autopy.mouse.move(w_screen - current_loc_x, current_loc_y)

#         img - destination image, hand_lms - for each hand, mp_hands.HAND_CONNECTIONS - to connect the dots(points)
        mp_draw.draw_landmarks(img, hand_lms, mp_hands.HAND_CONNECTIONS)

        prev_loc_x, prev_loc_y = current_loc_x, current_loc_y
    
    current_time = time.time()
    fps = 1 / (current_time - previous_time)
    previous_time = time.time()
    
    cv2.putText(img, str(int(fps)), (10,70), cv2.FONT_HERSHEY_COMPLEX, 2, (0,0,255), 3)
    cv2.imshow("Video", img)
    
    if cv2.waitKey(1) & 0xFF == ord('q'): # adds a delay between each image and checks if 'q' is pressed to close the window
        break
        
cap.release()
cv2.destroyAllWindows() # for jupyter notebook