In [5]:
#!pip install -q mediapipe==0.10.0
#!curl -o pose_landmarker.task -sSL https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_heavy/float16/1/pose_landmarker_heavy.task
#!curl -o image.jpg -sSL https://cdn.pixabay.com/photo/2019/03/12/20/39/girl-4051811_960_720.jpg


In [6]:
import cv2
import mediapipe as mp
import numpy as np
import pygame
from math import acos, degrees

# Initialize pygame
pygame.init()

# Load the background music file
background_music_file = "sounds/base.mp3"
pygame.mixer.init()
background_music = pygame.mixer.Sound(background_music_file)

# Set the initial volume for background music
background_music_volume = 0.3
background_music.set_volume(background_music_volume)

# Load the sound file you want to play on pose detection
sound_files = [
    "sounds/sound1.mp3",
    "sounds/sound2.mp3",
    "sounds/sound3.mp3",
    "sounds/sound4.mp3",
    "sounds/sound5.mp3",
    "sounds/sound6.mp3",
    # Add more sound files as needed
]


# Create an array of sounds
sounds = [pygame.mixer.Sound(file) for file in sound_files]

# Set the initial volume for pose detection sound
pose_detection_volume = 0.5

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands

def play_music_game():
    circle_exit_x, circle_exit_y = 40, 40 

    # Pulgar
    thumb_points = [1, 2, 4]
    # Índice, medio, anular y meñique
    palm_points = [0, 1, 2, 5, 9, 13, 17]
    fingertips_points = [8, 12, 16, 20]
    finger_base_points =[6, 10, 14, 18]
    # FINGERS COMBINATIONS
    TO_ACTIVATE = np.array([True, False, False, False, False])

    # Images to show
    image1 = cv2.imread("images/imagen_inicio.jpg")
    image2 = cv2.imread("images/instruccion_music.jpg")

    # Image to concat
    imAux = image1

    # For webcam input:
    cap = cv2.VideoCapture(0)
    sound_playing = False
    exit = False

    sound_background_playing = False

    with mp_pose.Pose(
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5) as pose, mp_hands.Hands(
        static_image_mode=False,
        max_num_hands=2,
        min_detection_confidence=0.5) as hands:

        hand_detection_active = True

        while True:
            success, image = cap.read()
            if not success:
                print("Ignoring empty camera frame.")
                continue
            
            image = cv2.flip(image, 1)
            
            if hand_detection_active:
                # Process hands
                image.flags.writeable = False
                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                height, width, _ = image.shape
                results_hands = hands.process(image_rgb)
                if results_hands.multi_hand_landmarks is not None:
                    fingers = fingers_up_down(results_hands, thumb_points, palm_points, fingertips_points, finger_base_points, height, width, image)
                    if not False in (fingers == TO_ACTIVATE):
                        hand_detection_active = False
            else:
                # Process pose
                if not sound_background_playing:
                    background_music.play(-1) 
                    sound_background_playing = True

                image.flags.writeable = False
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                results = pose.process(image)
                if results.pose_landmarks is not None:  # Use pose_landmarks instead of multi_hand_landmarks
                    image.flags.writeable = True
                    image = np.zeros_like(image)
                    mp_drawing.draw_landmarks(
                        image,
                        results.pose_landmarks,
                        mp_pose.POSE_CONNECTIONS,
                        landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())

                    center_coordinates = (image.shape[1] // 2, image.shape[0] // 2)
                    radius = 230
                    thickness = 2
                    cv2.circle(image, center_coordinates, radius, (0, 255, 0), thickness)

                    num_circles = min(len(sounds), 8)  # Use the minimum between the number of sounds and circles
                    circle_radius = 35

                    landmark_19_x, landmark_19_y, landmark_20_x, landmark_20_y, landmark_31_x, landmark_31_y, landmark_32_x, landmark_32_y = None, None, None, None, None, None, None, None  
                    if results.pose_landmarks.landmark:
                        landmark_19_x = int(results.pose_landmarks.landmark[19].x * image.shape[1])
                        landmark_19_y = int(results.pose_landmarks.landmark[19].y * image.shape[0])
                        landmark_20_x = int(results.pose_landmarks.landmark[20].x * image.shape[1])
                        landmark_20_y = int(results.pose_landmarks.landmark[20].y * image.shape[0])
                        landmark_31_x = int(results.pose_landmarks.landmark[31].x * image.shape[1])
                        landmark_31_y = int(results.pose_landmarks.landmark[31].y * image.shape[0])
                        landmark_32_x = int(results.pose_landmarks.landmark[32].x * image.shape[1])
                        landmark_32_y = int(results.pose_landmarks.landmark[32].y * image.shape[0])

                    cv2.circle(image, (circle_exit_x, circle_exit_y), circle_radius, (0, 255, 255), -1)
                    cv2.putText(image, "Exit", (circle_exit_x - 15, circle_exit_y + 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2, cv2.LINE_AA)

                    for i in range(num_circles):
                        angle_rad = 2 * np.pi * i / num_circles
                        circle_x = int(center_coordinates[0] + radius * np.cos(angle_rad))
                        circle_y = int(center_coordinates[1] + radius * np.sin(angle_rad))
                        cv2.circle(image, (circle_x, circle_y), circle_radius, (255, 0, 0), -1)

                        distance_exit_19 = np.sqrt((landmark_19_x - circle_exit_x)**2 + (landmark_19_y - circle_exit_x)**2)
                        distance_exit_20 = np.sqrt((landmark_20_x - circle_exit_x)**2 + (landmark_20_y - circle_exit_y)**2)
                        threshold_distance = 30

                        if distance_exit_19 < threshold_distance or distance_exit_20 < threshold_distance:
                            exit = True
                            break

                        distance_19 = np.sqrt((landmark_19_x - circle_x)**2 + (landmark_19_y - circle_y)**2)
                        distance_20 = np.sqrt((landmark_20_x - circle_x)**2 + (landmark_20_y - circle_y)**2)
                        distance_31 = np.sqrt((landmark_31_x - circle_x)**2 + (landmark_31_y - circle_y)**2)
                        distance_32 = np.sqrt((landmark_32_x - circle_x)**2 + (landmark_32_y - circle_y)**2)

                        if distance_19 < threshold_distance or distance_20 < threshold_distance or distance_31 < threshold_distance or distance_32 < threshold_distance:
                            if not sound_playing:
                                sounds[i].play()  # Play the sound corresponding to the circle
                                sounds[i].set_volume(pose_detection_volume)  # Set the volume
                                sound_playing = True
                        else:
                            sound_playing = False

            resized_image = cv2.resize(image, (800, 600))
            if not hand_detection_active:
                imAux = image2

            if imAux.shape[0] != resized_image.shape[0]:
                # Resize imAux to have the same number of rows as frame
                imAux = cv2.resize(imAux, (resized_image.shape[1], resized_image.shape[0]))

            # Concatenate images horizontally
            n_image = cv2.hconcat([imAux, resized_image])

            cv2.imshow("MediaPipe Pose", n_image)
            #cv2.imshow('MediaPipe Pose', resized_image)
            if cv2.waitKey(5) & 0xFF == 27 or exit:
                background_music.stop()  # Stop background music
                break

    # Release resources
    cap.release()
    cv2.destroyAllWindows()
    
def palm_centroid(coordinates_list):
    coordinates = np.array(coordinates_list)
    centroid = np.mean(coordinates, axis=0)
    centroid = int(centroid[0]), int(centroid[1])
    return centroid

def fingers_up_down(hand_results, thumb_points, palm_points, fingertips_points, finger_base_points, height, width, frame):
    fingers = None
    if hand_results.multi_hand_landmarks:
        coordinates_thumb = []
        coordinates_palm = []
        coordinates_ft = []  # Initialize as a Python list
        coordinates_fb = []
        for hand_landmarks in hand_results.multi_hand_landmarks:
            for index in thumb_points:
                x = int(hand_landmarks.landmark[index].x * width)
                y = int(hand_landmarks.landmark[index].y * height)
                coordinates_thumb.append([x, y])
            
            for index in palm_points:
                x = int(hand_landmarks.landmark[index].x * width)
                y = int(hand_landmarks.landmark[index].y * height)
                coordinates_palm.append([x, y])
            
            for index in fingertips_points:
                x = int(hand_landmarks.landmark[index].x * width)
                y = int(hand_landmarks.landmark[index].y * height)
                coordinates_ft.append([x, y])
            
            for index in finger_base_points:
                x = int(hand_landmarks.landmark[index].x * width)
                y = int(hand_landmarks.landmark[index].y * height)
                coordinates_fb.append([x, y])

            ##########################
            # Pulgar
            p1 = np.array(coordinates_thumb[0])
            p2 = np.array(coordinates_thumb[1])
            p3 = np.array(coordinates_thumb[2])
            l1 = np.linalg.norm(p2 - p3)
            l2 = np.linalg.norm(p1 - p3)
            l3 = np.linalg.norm(p1 - p2)
            # Calcular el ángulo
            to_angle = (l1**2 + l3**2 - l2**2) / (2 * l1 * l3)
            if int(to_angle) == -1:
                angle = 180
            else:
                angle = degrees(acos(to_angle))
            thumb_finger = np.array(False)
            if angle > 150:
                thumb_finger = np.array(True)
            
            ################################
            # Índice, medio, anular y meñique
            nx, ny = palm_centroid(coordinates_palm)
            cv2.circle(frame, (nx, ny), 3, (0, 255, 0), 2)
            coordinates_centroid = np.array([nx, ny])
            coordinates_ft = np.array(coordinates_ft)
            coordinates_fb = np.array(coordinates_fb)
            # Distancias
            d_centrid_ft = np.linalg.norm(coordinates_centroid - coordinates_ft, axis=1)
            d_centrid_fb = np.linalg.norm(coordinates_centroid - coordinates_fb, axis=1)
            dif = d_centrid_ft - d_centrid_fb
            fingers = dif > 0
            fingers = np.append(thumb_finger, fingers)
            mp_drawing.draw_landmarks(
                frame,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())
    return fingers

FileNotFoundError: No file 'sounds/base.mp3' found in working directory 'e:\VC\P7\minigames'.

Documentation:
https://github.com/google/mediapipe/blob/master/docs/solutions/pose.md
