In [1]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.15-cp310-cp310-macosx_11_0_x86_64.whl.metadata (9.7 kB)
Collecting jax (from mediapipe)
  Downloading jax-0.4.34-py3-none-any.whl.metadata (22 kB)
Collecting jaxlib (from mediapipe)
  Downloading jaxlib-0.4.34-cp310-cp310-macosx_10_14_x86_64.whl.metadata (983 bytes)
Collecting opencv-contrib-python (from mediapipe)
  Downloading opencv_contrib_python-4.10.0.84-cp37-abi3-macosx_12_0_x86_64.whl.metadata (20 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-macosx_10_6_x86_64.macosx_10_6_universal2.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.15-cp310-cp310-macosx_11_0_x86_64.whl (50.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.6/50.6 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading sounddevice-0.5.1-py3-none-macosx_10_6_x86_64.macosx_10_6_universal2.whl (107 kB)
Downloading jax-0.4.34-py3-none-any.whl (2.1 MB)
[2K   [90

In [2]:
import cv2
import numpy as np
import mediapipe as mp

# Configuración de MediaPipe
mp_hands = mp.solutions.hands.Hands(
    static_image_mode=False, 
    max_num_hands=1, 
    min_detection_confidence=0.5, 
    min_tracking_confidence=0.5
)
cap = cv2.VideoCapture(0)  # Iniciar la cámara web

# Almacenamiento de datos y etiquetas
data, labels = [], []

def capture_gesture(label):
    """Captura gestos y guarda los landmarks junto con la etiqueta."""
    print(f"Grabando gesto {label}. Presiona 'q' para detener.")
    
    while True:
        ret, frame = cap.read()
        frame = cv2.flip(frame, 1)  # Refleja la imagen horizontalmente
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = mp_hands.process(frame_rgb)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                landmarks = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]).flatten()
                data.append(landmarks)  # Guardar landmarks
                labels.append(label)    # Guardar etiqueta del gesto

                # Dibujar los puntos en la mano detectada
                mp.solutions.drawing_utils.draw_landmarks(
                    frame, hand_landmarks, mp.solutions.hands.HAND_CONNECTIONS
                )

        cv2.imshow('Grabación de Gestos', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Captura de gestos con etiquetas 0: Piedra, 1: Papel, 2: Tijeras
for i, gesture in enumerate(["Piedra (0)", "Papel (1)", "Tijeras (2)"]):
    input(f"Presiona Enter para grabar el gesto: {gesture}")
    capture_gesture(i)

# Guardar los datos y etiquetas en archivos .npy
cap.release()
cv2.destroyAllWindows()
np.save('rps_dataset.npy', np.array(data))
np.save('rps_labels.npy', np.array(labels))
print("Dataset guardado con éxito en 'rps_dataset.npy' y 'rps_labels.npy'.")


I0000 00:00:1729366253.762061  247417 gl_context.cc:357] GL version: 2.1 (2.1 ATI-6.1.13), renderer: AMD Radeon Pro 5500M OpenGL Engine
W0000 00:00:1729366253.779455  253657 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1729366253.793188  253657 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Grabando gesto 0. Presiona 'q' para detener.




Grabando gesto 1. Presiona 'q' para detener.
Grabando gesto 2. Presiona 'q' para detener.
Dataset guardado con éxito en 'rps_dataset.npy' y 'rps_labels.npy'.


# train-gesture-classifier

In [4]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

# Cargar los datos
X = np.load('rps_dataset.npy')
y = np.load('rps_labels.npy')

# División del dataset en entrenamiento y validación
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Definir el modelo con la capa de entrada explícita
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X.shape[1],)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')  # 3 clases: piedra, papel, tijeras
])

# Compilar el modelo
model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

# Entrenar el modelo
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, batch_size=32)

# Guardar el modelo en formato Keras
model.save('rps_model.keras')
print("Modelo entrenado y guardado como 'rps_model.keras'")


Epoch 1/30
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.4624 - loss: 1.0000 - val_accuracy: 0.5877 - val_loss: 0.8859
Epoch 2/30
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6992 - loss: 0.8162 - val_accuracy: 0.7325 - val_loss: 0.6615
Epoch 3/30
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7555 - loss: 0.6176 - val_accuracy: 0.7632 - val_loss: 0.5176
Epoch 4/30
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8108 - loss: 0.4641 - val_accuracy: 0.8947 - val_loss: 0.4198
Epoch 5/30
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8851 - loss: 0.3712 - val_accuracy: 0.9211 - val_loss: 0.3152
Epoch 6/30
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9290 - loss: 0.2950 - val_accuracy: 0.9386 - val_loss: 0.2323
Epoch 7/30
[1m29/29[0m [32m━━━━━━━━━━

# rock-paper-scissors.

In [5]:
import cv2
import numpy as np
import tensorflow as tf
import mediapipe as mp

# Cargar el modelo entrenado
model = tf.keras.models.load_model('rps_model.keras')

# Inicializar MediaPipe para la detección de manos
mp_hands = mp.solutions.hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

# Nombres de las clases
class_names = ['Piedra', 'Papel', 'Tijeras']

# Captura desde la cámara web
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)  # Refleja la imagen para una interacción natural
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = mp_hands.process(frame_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            landmarks = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]).flatten()
            
            # Realizar la predicción
            prediction = model.predict(landmarks[np.newaxis, ...])
            class_id = np.argmax(prediction)
            class_name = class_names[class_id]

            # Dibujar los puntos clave y el nombre de la clase
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp.solutions.hands.HAND_CONNECTIONS)
            cv2.putText(frame, class_name, (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Mostrar la imagen con las predicciones
    cv2.imshow('Piedra, Papel o Tijeras', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


I0000 00:00:1729366706.692745  247417 gl_context.cc:357] GL version: 2.1 (2.1 ATI-6.1.13), renderer: AMD Radeon Pro 5500M OpenGL Engine
W0000 00:00:1729366706.706919  261539 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1729366706.719324  261539 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14