In [1]:
#!pip install -q mediapipe==0.10.0
#!curl -o pose_landmarker.task -sSL https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_heavy/float16/1/pose_landmarker_heavy.task
#!curl -o image.jpg -sSL https://cdn.pixabay.com/photo/2019/03/12/20/39/girl-4051811_960_720.jpg


In [2]:
import cv2
import mediapipe as mp
from math import acos, degrees

mp_drawing = mp.solutions.drawing_utils
mp_face_mesh = mp.solutions.face_mesh
mp_hands = mp.solutions.hands

def palm_centroid(coordinates_list):
     coordinates = np.array(coordinates_list)
     centroid = np.mean(coordinates, axis=0)
     centroid = int(centroid[0]), int(centroid[1])
     return centroid

def fingers_up_down(hand_results, thumb_points, palm_points, fingertips_points, finger_base_points, height, width, frame):
     fingers = None
     coordinates_thumb = []
     coordinates_palm = []
     coordinates_ft = []
     coordinates_fb = []
     for hand_landmarks in hand_results.multi_hand_landmarks:
          for index in thumb_points:
               x = int(hand_landmarks.landmark[index].x * width)
               y = int(hand_landmarks.landmark[index].y * height)
               coordinates_thumb.append([x, y])
          
          for index in palm_points:
               x = int(hand_landmarks.landmark[index].x * width)
               y = int(hand_landmarks.landmark[index].y * height)
               coordinates_palm.append([x, y])
          
          for index in fingertips_points:
               x = int(hand_landmarks.landmark[index].x * width)
               y = int(hand_landmarks.landmark[index].y * height)
               coordinates_ft.append([x, y])
          
          for index in finger_base_points:
               x = int(hand_landmarks.landmark[index].x * width)
               y = int(hand_landmarks.landmark[index].y * height)
               coordinates_fb.append([x, y])
          ##########################
          # Pulgar
          p1 = np.array(coordinates_thumb[0])
          p2 = np.array(coordinates_thumb[1])
          p3 = np.array(coordinates_thumb[2])
          l1 = np.linalg.norm(p2 - p3)
          l2 = np.linalg.norm(p1 - p3)
          l3 = np.linalg.norm(p1 - p2)
          # Calcular el ángulo
          to_angle = (l1**2 + l3**2 - l2**2) / (2 * l1 * l3)
          if int(to_angle) == -1:
               angle = 180
          else:
               angle = degrees(acos(to_angle))
          thumb_finger = np.array(False)
          if angle > 150:
               thumb_finger = np.array(True)
          
          ################################
          # Índice, medio, anular y meñique
          nx, ny = palm_centroid(coordinates_palm)
          cv2.circle(frame, (nx, ny), 3, (0, 255, 0), 2)
          coordinates_centroid = np.array([nx, ny])
          coordinates_ft = np.array(coordinates_ft)
          coordinates_fb = np.array(coordinates_fb)
          # Distancias
          d_centrid_ft = np.linalg.norm(coordinates_centroid - coordinates_ft, axis=1)
          d_centrid_fb = np.linalg.norm(coordinates_centroid - coordinates_fb, axis=1)
          dif = d_centrid_ft - d_centrid_fb
          fingers = dif > 0
          fingers = np.append(thumb_finger, fingers)
          mp_drawing.draw_landmarks(
               frame,
               hand_landmarks,
               mp_hands.HAND_CONNECTIONS,
               mp_drawing_styles.get_default_hand_landmarks_style(),
               mp_drawing_styles.get_default_hand_connections_style())
     return fingers

def play_paint_game():

    TO_ACTIVATE = np.array([True, False, False, False, False])  # Configurar la condición del pulgar arriba

    # Images to show
    image1 = cv2.imread("images/imagen_inicio.jpg")
    image2 = cv2.imread("images/instruccion_paint.jpg")
    show_instructions = True
    imAux = image1

    cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)

    thumb_points = [1, 2, 4]

    # Índice, medio, anular y meñique
    palm_points = [0, 1, 2, 5, 9, 13, 17]
    fingertips_points = [8, 12, 16, 20]
    finger_base_points =[6, 10, 14, 18]

    PIEDRA = np.array([False, False, False, False, False])
    COLOR = np.array([False, True, False, False, False])
    EXIT = np.array([False, False, False, False, True])

    color = (255, 0, 0)

    detect_hand = True

    with mp_face_mesh.FaceMesh() as face_mesh, mp_hands.Hands(
            static_image_mode=False,
            max_num_hands=2,
            min_detection_confidence=0.5) as hands:

        nose_positions = []
        nose_traces = []  # Lista para almacenar los trazados de la nariz
        drawing_nose_trace = False  # Variable para indicar si se debe dibujar un nuevo trazado.
        hand_detection_active = False  # Variable para verificar si la detección de la mano está activa

        while True:
            ret, frame = cap.read()
            if ret == False:
                break
            frame = cv2.flip(frame, 1)
            height, width, _ = frame.shape
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            results_hands = hands.process(frame_rgb)
            if results_hands.multi_hand_landmarks is not None:
                fingers = fingers_up_down(results_hands, thumb_points, palm_points, fingertips_points, finger_base_points, height, width, frame)
                if not hand_detection_active:
                    if not False in (fingers == TO_ACTIVATE):
                        hand_detection_active = True  # Activar la detección de mano al tener el pulgar arriba
                        show_instructions = False
                if hand_detection_active:

                    for hand_landmarks in results_hands.multi_hand_landmarks:
                        mp_drawing.draw_landmarks(
                            frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                            mp_drawing.DrawingSpec(color=(0, 255, 255), thickness=3, circle_radius=5),
                            mp_drawing.DrawingSpec(color=(255, 0, 255), thickness=4, circle_radius=5))

                    fingers = fingers_up_down(results_hands, thumb_points, palm_points, fingertips_points, finger_base_points, height, width, frame)

                    if detect_hand:
                        if not False in (fingers == PIEDRA):
                            # Iniciar un nuevo trazado cuando se detecta un puño
                            if not drawing_nose_trace:
                                nose_positions = []
                                drawing_nose_trace = True
                        elif not False in (fingers == COLOR):
                            drawing_nose_trace = False
                            color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                        elif not False in (fingers == EXIT):
                            break
                        else:
                            drawing_nose_trace = False

            # Dibujar trazados anteriores
            for i, trace in enumerate(nose_traces):
                if len(trace) > 1:
                    for i in range(1, len(trace)):
                        cv2.line(frame, trace[i - 1], trace[i], color, 2)

            # Detección de cara y almacenamiento de las posiciones de la nariz
            results_face = face_mesh.process(frame_rgb)
            if results_face.multi_face_landmarks:
                for face_landmarks in results_face.multi_face_landmarks:
                    for idx, landmark in enumerate(face_landmarks.landmark):
                        if idx == 4:
                            cx, cy = int(landmark.x * width), int(landmark.y * height)
                            nose_positions.append((cx, cy))
                            cv2.circle(frame, (cx, cy), 5, (0, 255, 0), -1)

            # Actualizar la lista de trazados de la nariz
            if drawing_nose_trace:
                nose_traces.append(list(nose_positions))

            resized_image = cv2.resize(frame, (800, 600))
            if not show_instructions:
                imAux = image2

            if imAux.shape[0] != resized_image.shape[0]:
                # Resize imAux to have the same number of rows as frame
                imAux = cv2.resize(imAux, (resized_image.shape[1], resized_image.shape[0]))
            # Concatenate images horizontally
            n_image = cv2.hconcat([imAux, resized_image])
            cv2.imshow("MediaPipe Pose", n_image)
            
            # cv2.imshow("frame", frame)
            if cv2.waitKey(1) & 0xFF == 27:
                break
    cap.release()
    cv2.destroyAllWindows()

Documentation:
https://github.com/google/mediapipe/blob/master/docs/solutions/face_mesh.md
https://omes-va.com/mediapipe-hands-python/
https://omes-va.com/deteccion-de-rostros-mediapipe-python/