In [None]:
# %conda install -c conda-forge selenium
# %pip install webdriver_manager

In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, ElementNotInteractableException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from webdriver_manager.firefox import GeckoDriverManager

import mediapipe as mp
from mediapipe.framework.formats import landmark_pb2
from mediapipe import solutions

import cv2 as cv
import numpy as np
import time, random, threading, math

In [3]:
MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

URL = "https://www.google.com/search?q=snake"

In [4]:
def waitFinishLoad(driver, byLoader=By.ID, valueLoader=None, dissapear=False, extra_wait=False):
    if valueLoader is None: return
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((byLoader, valueLoader)))
    # if dissapear: WebDriverWait(driver, 10).until(EC.invisibility_of_element((byLoader, valueLoader)))
    if extra_wait: time.sleep(random.choice([0.6, 0.5, 0.8, 0.7, 0.9, 0.4]))

def get_elem(driver, by, value):
    return driver.find_element(by, value)

def get_elements(driver, by, value):
    return driver.find_elements(by, value)

def click(driver, by, value, wait=True, byLoader=By.ID, valueLoader=None, extra_wait=False):
    if wait: waitFinishLoad(driver, byLoader, valueLoader, extra_wait)
    get_elem(driver, by, value).click()

def click_script(driver, value, attr = "OnClick",by=By.ID, wait=True, byLoader=By.ID, valueLoader=None, extra_wait=False):
    if wait: waitFinishLoad(driver, byLoader, valueLoader, extra_wait)
    driver.execute_script(get_elem(driver, by, value).get_dom_attribute(attr))

In [5]:
def gestos(driver, direction):
    action = ActionChains(driver)
    if direction == 'UP': action.send_keys(Keys.ARROW_UP)
    elif direction == 'RIGHT': action.send_keys(Keys.ARROW_RIGHT)
    elif direction == 'DOWN': action.send_keys(Keys.ARROW_DOWN)
    elif direction == 'LEFT': action.send_keys(Keys.ARROW_LEFT)
    if direction == None: 
         click(driver, By.XPATH, value=f"//div[contains(@jsname, 'NSjDf')]", byLoader=By.XPATH, valueLoader=f"//div[contains(@jsname, 'NSjDf')]")
    action.perform()

def calc_distance(wrist, destino):
    return math.sqrt((wrist.x-destino.x)**2 + (wrist.y-destino.y)**2 + (wrist.z-destino.z)**2) 

def is_figner_up(finger_pip, finger_tip, wrist):
    dist_pip = calc_distance(wrist,finger_pip)
    dist_tip = calc_distance(wrist,finger_tip)

    return dist_tip > dist_pip

def draw_landmarks_on_image(driver, rgb_image, detection_result):
    hand_landmarks_list = detection_result.hand_landmarks
    direction = ""
    annotated_image = np.copy(rgb_image)

  	# Loop through the detected hands to visualize.
    for idx in range(len(hand_landmarks_list)):
        hand_landmarks = hand_landmarks_list[idx]

        wrist = hand_landmarks[0]
        thumb = hand_landmarks[4]  

        if(thumb.x < wrist.x and math.sqrt((thumb.y - wrist.y)**2) < math.sqrt((thumb.x - wrist.x)**2))*0.05:direction = "RIGHT" 
        if(thumb.x > wrist.x and math.sqrt((thumb.y - wrist.y)**2) < math.sqrt((thumb.x - wrist.x)**2))*0.05:direction = "LEFT"  
        if(thumb.y < wrist.y and math.sqrt((thumb.x - wrist.x)**2) < math.sqrt((thumb.y - wrist.y)**2))*0.05:direction = "UP"    
        if(thumb.y > wrist.y and math.sqrt((thumb.x - wrist.x)**2) < math.sqrt((thumb.y - wrist.y)**2))*0.05:direction = "DOWN"  

		
        if is_figner_up(hand_landmarks[18], hand_landmarks[20], wrist): direction = None

        t = threading.Thread(target=gestos, args=[driver, direction])
        t.start()
        t.join()    

        # Draw the hand landmarks.
        hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        hand_landmarks_proto.landmark.extend([
          landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
        ])
        solutions.drawing_utils.draw_landmarks(
          annotated_image,
          hand_landmarks_proto,
          solutions.hands.HAND_CONNECTIONS,
          solutions.drawing_styles.get_default_hand_landmarks_style(),
          solutions.drawing_styles.get_default_hand_connections_style())    
        
        # Get the top left corner of the detected hand's bounding box.
        height, width, _ = annotated_image.shape
        x_coordinates = [landmark.x for landmark in hand_landmarks]
        y_coordinates = [landmark.y for landmark in hand_landmarks]
        text_x = int(min(x_coordinates) * width)
        text_y = int(min(y_coordinates) * height) - MARGIN  

        # Draw direction
        cv.putText(annotated_image, f"{direction}",(text_x, text_y), cv.FONT_HERSHEY_DUPLEX,FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv.LINE_AA)
    
    return annotated_image

In [6]:
GeckoDriverManager().install()
driver = webdriver.Firefox()

In [7]:
driver.get(url=URL) 
click(driver, By.ID, "L2AGLb", valueLoader="L2AGLb")
click(driver, By.XPATH, value=f"//div[contains(@jsname, 'ZC7Tjb')]", byLoader=By.XPATH, valueLoader=f"//div[contains(@data-loaded, 'true')]")
click(driver, By.XPATH, value=f"//div[contains(@jsname, 'NSjDf')]")

In [8]:
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

options = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path="model/hand_landmarker.task"),
    running_mode=VisionRunningMode.VIDEO,
    num_hands=1)

In [9]:
cam = cv.VideoCapture(0) 
cv.namedWindow("Cam") 

with HandLandmarker.create_from_options(options) as landmarker:

    while cam.isOpened():  
        # Read frames from videoCaptura and show
        _, frame = cam.read() 
        cv.imshow("Cam", frame)

        # The landmarker is initialized. 
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
        hand_landmarker_result = landmarker.detect_for_video(mp_image, int(round(time.time() * 100)))
        annotated_image = draw_landmarks_on_image(driver, mp_image.numpy_view(), hand_landmarker_result)
        cv.imshow("Salida", annotated_image)

        # Press esc to exit
        if cv.waitKey(10) & 0xFF == 27: cam.release()
    
    cv.destroyAllWindows()

Exception in thread Thread-47 (gestos):
Traceback (most recent call last):
  File "c:\Users\nicol\miniconda3\envs\ia\lib\threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "c:\Users\nicol\miniconda3\envs\ia\lib\threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\nicol\AppData\Local\Temp\ipykernel_21448\607062062.py", line 8, in gestos
  File "C:\Users\nicol\AppData\Local\Temp\ipykernel_21448\2723431773.py", line 15, in click
  File "c:\Users\nicol\miniconda3\envs\ia\lib\site-packages\selenium\webdriver\remote\webelement.py", line 94, in click
    self._execute(Command.CLICK_ELEMENT)
  File "c:\Users\nicol\miniconda3\envs\ia\lib\site-packages\selenium\webdriver\remote\webelement.py", line 395, in _execute
    return self._parent.execute(command, params)
  File "c:\Users\nicol\miniconda3\envs\ia\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 354, in execute
    self.error_handler.check_response(response)
  File 