In [2]:
import os
import cv2
import numpy as np
import tensorflow as tf
import mediapipe as mp
import matplotlib.pyplot as plt
import time
# Directorios para entrenamiento, validación y prueba
train_dir = 'new_dataset/train' 
NP_PATH = 'new_dataset/NP_PATH'

actions = []

# creating data training folders

In [17]:
## creating folders for the landmarks for each frame of each video
try:
    for action in os.listdir(train_dir):
        action_path = os.path.join(train_dir, action)
        if os.path.isdir(action_path):
            print(f"Processing action: {action}")
            actions.append(action)
    for action in actions:
        action_path=os.path.join(NP_PATH, action)
        os.makedirs(action_path, exist_ok=True)
        print(f'creando:{action_path}')
        for video in range(5):
            video_path=os.path.join(action_path, str(video))
            os.makedirs(video_path, exist_ok=True)
            print(f'creando:{video_path}')
except Exception as e:
    print(e)

Processing action: Aprender
Processing action: Ayuda
Processing action: Bien
Processing action: Buenas noches
Processing action: Buenos dias
Processing action: Comer
Processing action: Como estas
Processing action: Como te llamas
Processing action: Cuando
Processing action: Donde
Processing action: Entiendo
Processing action: Feliz
Processing action: Gracias
Processing action: Hasta luego
Processing action: Hola
Processing action: Igual
Processing action: Mal
Processing action: Me gusta
Processing action: No
Processing action: No puedo
Processing action: No quiero
Processing action: Ocupado
Processing action: Otra vez
Processing action: Para que
Processing action: Perdon
Processing action: Por favor
Processing action: Por que
Processing action: Pregunta
Processing action: Puedo
Processing action: Que
Processing action: Que pasa
Processing action: Quien
Processing action: Quiero
Processing action: Si
Processing action: Tu
Processing action: Yo
Processing action: Yo no se
Processing acti

# Preprocesing data training

In [18]:
mp_holistic = mp.solutions.holistic  # Modelo Holístico

def draw_landmarks(image, results_holistic):
    mp_drawing = mp.solutions.drawing_utils  # Utilidades de dibujo

    # Configuración para líneas más delgadas
    landmark_drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
    connection_drawing_spec = mp_drawing.DrawingSpec(thickness=1)

    if results_holistic.pose_landmarks:
        mp_drawing.draw_landmarks(
            image, results_holistic.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
            landmark_drawing_spec=landmark_drawing_spec,
            connection_drawing_spec=connection_drawing_spec)
    
    if results_holistic.left_hand_landmarks:
        mp_drawing.draw_landmarks(
            image, results_holistic.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
            landmark_drawing_spec=landmark_drawing_spec,
            connection_drawing_spec=connection_drawing_spec)
    
    if results_holistic.right_hand_landmarks:
        mp_drawing.draw_landmarks(
            image, results_holistic.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
            landmark_drawing_spec=landmark_drawing_spec,
            connection_drawing_spec=connection_drawing_spec)
    
    return image

def extract_keypoints(results_holistic):
    # Extracción de keypoints de la pose
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results_holistic.pose_landmarks.landmark]).flatten() if results_holistic.pose_landmarks else np.zeros(33*4)
    
    # Extracción de keypoints de la mano izquierda
    lh = np.zeros(21*3)
    if results_holistic.left_hand_landmarks:
        lh = np.array([[res.x, res.y, res.z] for res in results_holistic.left_hand_landmarks.landmark]).flatten()
    
    # Extracción de keypoints de la mano derecha
    rh = np.zeros(21*3)
    if results_holistic.right_hand_landmarks:
        rh = np.array([[res.x, res.y, res.z] for res in results_holistic.right_hand_landmarks.landmark]).flatten()
    
    return np.concatenate([pose, lh, rh])
    
def process_all_videos(root_path, sequence_length):
    """
    Procesa todos los videos en los subdirectorios de la ruta raíz especificada.
    
    Args:
    root_path (str): Ruta al directorio raíz que contiene las carpetas de videos.
    sequence_length (int): Número de frames a procesar por video.
    NP_PATH (str): Ruta donde se guardarán los archivos numpy con los keypoints.
    """
    # Configuración de MediaPipe
    actions = []

    with mp_holistic.Holistic(
        static_image_mode=False,
        model_complexity=2,                 # 0 para más rápido, 2 para más preciso
        smooth_landmarks=True,
        min_detection_confidence=0.7,       # Aumentar el umbral para reducir falsos positivos
        min_tracking_confidence=0.7) as holistic:
        
        for action in os.listdir(root_path):
            action_path = os.path.join(root_path, action)
            if os.path.isdir(action_path):
                print(f"Processing action: {action}")
                num_video =0
                for video in os.listdir(action_path):
                    if video.endswith(('.mp4', '.avi', '.mov')):
                        video_path = os.path.join(action_path, video)
                        process_video(video_path, holistic, action, num_video, sequence_length)
                        num_video+=1
        print("Processed actions:", actions)

def process_video(video_path, holistic, action, video, sequence_length):
    """
    Procesa un único video y extrae los landmarks.
    
    Args:
    video_path (str): Ruta al archivo de video a procesar.
    hands (mediapipe.solutions.hands.Hands): Instancia de MediaPipe Hands.
    pose (mediapipe.solutions.pose.Pose): Instancia de MediaPipe Pose.
    action (str): Nombre de la acción (directorio padre del video).
    video (str): Nombre del archivo de video.
    sequence_length (int): Número de frames a procesar por video.
    NP_PATH (str): Ruta donde se guardarán los archivos numpy con los keypoints.
    """
    cap = cv2.VideoCapture(video_path)
    
    # Crear una ventana para mostrar el video
    window_name = f"Processing: {action} - {video}"
    cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
    cv2.resizeWindow(window_name, 640, 720)

    for frame_num in range(sequence_length):
        success, image = cap.read()
        if not success:
            break
        
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results_holistic = holistic.process(image_rgb)
        
        # Dibujar los landmarks en la imagen
        image_with_landmarks = draw_landmarks(image, results_holistic)
        
        # Mostrar la imagen procesada
        cv2.imshow(window_name, image_with_landmarks)
        
        if frame_num == 0:
            cv2.waitKey(500)
        else:
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        
        keypoints = extract_keypoints(results_holistic)
        npy_path = os.path.join(NP_PATH, action, str(video), str(frame_num))
        np.save(npy_path, keypoints)
    
    cap.release()
    cv2.destroyWindow(window_name)
    print(f"Processed {frame_num + 1} frames from {video_path}")
    # Agregar una pausa de 2 segundos entre videos
    time.sleep(2)




In [19]:
# running the function

process_all_videos(train_dir, 34) 


Processing action: Aprender




Processed 34 frames from new_dataset/train\Aprender\Aprender.mp4
Processed 34 frames from new_dataset/train\Aprender\Aprender2.mp4
Processed 34 frames from new_dataset/train\Aprender\Aprender3.mp4
Processed 34 frames from new_dataset/train\Aprender\Aprender4.mp4
Processed 34 frames from new_dataset/train\Aprender\Aprender5.mp4
Processing action: Ayuda
Processed 34 frames from new_dataset/train\Ayuda\Ayuda.mp4
Processed 34 frames from new_dataset/train\Ayuda\Ayuda2.mp4
Processed 34 frames from new_dataset/train\Ayuda\Ayuda3.mp4
Processed 34 frames from new_dataset/train\Ayuda\Ayuda4.mp4
Processed 34 frames from new_dataset/train\Ayuda\Ayuda5.mp4
Processing action: Bien
Processed 34 frames from new_dataset/train\Bien\Bien.mp4
Processed 34 frames from new_dataset/train\Bien\Bien2.mp4
Processed 34 frames from new_dataset/train\Bien\Bien3.mp4
Processed 34 frames from new_dataset/train\Bien\Bien4.mp4
Processed 34 frames from new_dataset/train\Bien\Bien5.mp4
Processing action: Buenas noches
P

In [12]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import pandas as pd
import json


In [13]:
actions = list(pd.Series(actions).unique())
label_map = {label:num for num, label in enumerate(actions)}
with open('datos.json', 'w') as f:
    json.dump(label_map, f)

In [47]:
label_map

{'Aprender': 0,
 'Ayuda': 1,
 'Bien': 2,
 'Buenas noches': 3,
 'Buenos dias': 4,
 'Comer': 5,
 'Como estas': 6,
 'Como te llamas': 7,
 'Cuando': 8,
 'Donde': 9,
 'Entiendo': 10,
 'Feliz': 11,
 'Gracias': 12,
 'Hasta luego': 13,
 'Hola': 14,
 'Igual': 15,
 'Mal': 16,
 'Me gusta': 17,
 'No': 18,
 'No puedo': 19,
 'No quiero': 20,
 'Ocupado': 21,
 'Otra vez': 22,
 'Para que': 23,
 'Perdon': 24,
 'Por favor': 25,
 'Por que': 26,
 'Pregunta': 27,
 'Puedo': 28,
 'Que': 29,
 'Que pasa': 30,
 'Quien': 31,
 'Quiero': 32,
 'Si': 33,
 'Tu': 34,
 'Yo': 35,
 'Yo no se': 36,
 'Yo se': 37}

In [48]:
try:
    sequences, labels = [], []
    for action in actions:
        print(action)
        for sequence in np.array(os.listdir(os.path.join(NP_PATH, action))).astype(int):
            print(sequence)
            window = []
            for frame_num in range(34):
                print(frame_num)
                res = np.load(os.path.join(NP_PATH, action, str(sequence), "{}.npy".format(frame_num)))
                window.append(res)
            sequences.append(window)
            labels.append(label_map[action])
except Exception as e:
    print(e)

Aprender
0
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
1
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
2
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
3
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
4
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
Ayuda
0
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
1
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
2
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
3
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
4
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
Bien
0
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15


In [49]:
np.array(sequences).shape

(380, 34, 258)

In [50]:
np.array(labels).shape

(380,)

In [51]:
X = np.array(sequences)
y = to_categorical(labels).astype(int)
val_x, y_val=X, y

In [56]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.07)

In [59]:
y_test.shape

(19, 38)

In [33]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.initializers import HeNormal
from tensorflow.keras.regularizers import l2 
from tensorflow.keras.optimizers import Adam
from keras import Input

In [58]:
print(len(actions))

76


In [31]:
import matplotlib.pyplot as plt

def plot_training_history(history):
    # Crear una figura con dos subplots
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

    # Graficar la pérdida
    ax1.plot(history.history['loss'], label='Pérdida de entrenamiento')
    ax1.plot(history.history['val_loss'], label='Pérdida de validación')
    ax1.set_title('Pérdida del modelo')
    ax1.set_ylabel('Pérdida')
    ax1.set_xlabel('Época')
    ax1.legend(loc='upper right')
    ax1.grid(True)

    # Graficar la precisión
    ax2.plot(history.history['categorical_accuracy'], label='Precisión de entrenamiento')
    ax2.plot(history.history['val_categorical_accuracy'], label='Precisión de validación')
    ax2.set_title('Precisión del modelo')
    ax2.set_ylabel('Precisión')
    ax2.set_xlabel('Época')
    ax2.legend(loc='lower right')
    ax2.grid(True)

    # Ajustar el espacio entre subplots
    plt.tight_layout()

    # Mostrar la figura
    plt.show()



# model

In [134]:

log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)
# reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)

# Definir el callback de Early Stopping
early_stopping = EarlyStopping(
    monitor='val_loss',  # Métrica a monitorear
    patience=30,         # Número de épocas a esperar antes de detener si no hay mejora
    restore_best_weights=True,  # Restaurar los mejores pesos encontrados
    verbose=1            # Mostrar mensaje cuando se detenga el entrenamiento
)

model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(34, 258)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(Dropout(0.3))
model.add(LSTM(64, return_sequences=False, kernel_regularizer=l2(0.03)))
model.add(BatchNormalization())  # Normalización para mejorar la convergencia
model.add(Dropout(0.5))  # Regularización para evitar overfitting
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.03)))
model.add(Dense(38, activation='softmax'))  # 38 clases




# model = Sequential()
# model.add(Input(shape=(34,258)))
# model.add(LSTM(32, return_sequences=True, activation='relu', ))
# model.add(LSTM(64, return_sequences=True, activation='relu'))
# model.add(BatchNormalization())
# model.add(LSTM(128, activation='relu'))
# model.add(BatchNormalization())
# model.add(Dropout(0.3))
# model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01)))
# model.add(Dense(38, activation='softmax'))
# model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])



model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])



In [135]:
# Ajustar el modelo con Early Stopping
history = model.fit(X_train, y_train, 
          epochs=200,
          validation_split= 0.1,#(X_train, y_train),  # Usar 20% de los datos para validación
          callbacks=[tb_callback, early_stopping]) #reduce_lr  

# Evaluar en el conjunto de prueba
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Loss en test: {test_loss}, Accuracy en test: {test_accuracy}')

Epoch 1/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 94ms/step - categorical_accuracy: 0.0259 - loss: 10.8955 - val_categorical_accuracy: 0.0270 - val_loss: 9.6225
Epoch 2/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - categorical_accuracy: 0.0522 - loss: 9.6596 - val_categorical_accuracy: 0.0541 - val_loss: 8.7583
Epoch 3/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - categorical_accuracy: 0.0472 - loss: 8.6013 - val_categorical_accuracy: 0.0000e+00 - val_loss: 8.0342
Epoch 4/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - categorical_accuracy: 0.0412 - loss: 7.7777 - val_categorical_accuracy: 0.0541 - val_loss: 7.3993
Epoch 5/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - categorical_accuracy: 0.0723 - loss: 7.0049 - val_categorical_accuracy: 0.0811 - val_loss: 6.9017
Epoch 6/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

# Tensorboard plot

In [65]:

from tensorboard import program
import webbrowser

# Inicia TensorBoard
tb = program.TensorBoard()

tb.configure(argv=[None, '--logdir', 'Logs/train'])
url = tb.launch()
print(f"TensorBoard iniciado en {url}")


TensorBoard iniciado en http://localhost:6009/


In [136]:
model.summary()

In [138]:
results = model.predict(X_test)
results

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step


array([[1.98033609e-04, 3.20326799e-05, 2.26779284e-05, 7.27552251e-05,
        5.35340281e-04, 2.03473814e-04, 3.26003018e-03, 8.02992785e-04,
        1.14111484e-04, 9.95197297e-07, 5.88561932e-04, 2.32762759e-04,
        1.86203197e-05, 1.53595360e-03, 2.76260474e-03, 5.26276199e-06,
        1.96198511e-04, 4.19232201e-05, 3.43729334e-05, 2.62289104e-04,
        4.50384250e-05, 1.20146615e-05, 8.69852556e-06, 8.55157734e-04,
        7.72986550e-06, 2.87425428e-05, 3.23133645e-05, 9.67147025e-06,
        8.99109116e-04, 1.27874682e-05, 4.64988407e-04, 8.98902908e-06,
        2.17530469e-05, 5.29528887e-04, 2.44791568e-06, 1.54664158e-04,
        5.17603382e-02, 9.34225082e-01],
       [6.10791312e-06, 5.26504184e-04, 4.25635653e-06, 1.00784084e-04,
        1.28346011e-02, 5.68178657e-05, 5.98604083e-06, 5.66117989e-04,
        3.30203875e-05, 2.02157826e-05, 2.15757708e-03, 7.03215413e-03,
        1.16686365e-02, 7.66300887e-04, 6.27145937e-07, 6.11231080e-04,
        4.03115928e-06,

In [141]:
actions[np.argmax(results[2])]

'Mal'

In [142]:
actions[np.argmax(y_test[2])]

'Mal'

In [98]:
model.save('lector_model(95acc-95val_acc-95test).keras')

In [133]:
## deleting model from the buffer
del model

In [105]:
import keras
model=keras.models.load_model('lector_model(99acc-97val_acc).keras')

In [106]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [107]:
yhat = model.predict(X_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 317ms/step


In [108]:
ytrue = np.argmax(y_test, axis=1).tolist()

yhat = np.argmax(yhat, axis=1).tolist()

In [49]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[9, 0],
        [0, 1]],

       [[9, 0],
        [0, 1]],

       [[9, 0],
        [0, 1]],

       [[9, 0],
        [0, 1]],

       [[9, 0],
        [0, 1]],

       [[9, 0],
        [0, 1]],

       [[9, 0],
        [0, 1]],

       [[9, 0],
        [0, 1]],

       [[9, 0],
        [0, 1]],

       [[9, 0],
        [0, 1]]], dtype=int64)

In [109]:
accuracy_score(ytrue, yhat)

1.0

In [110]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [4]:
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model
from collections import deque
import time


last_prediction_time = 0
prediction_interval = 1

sequence = []
sentence = []
predictions = []
threshold = 0.5
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
hands = mp_hands.Hands()
pose = mp_pose.Pose()

# def draw_landmarks(image, results_hands, results_pose):
#     mp_drawing = mp.solutions.drawing_utils
#     mp_hands = mp.solutions.hands
#     mp_pose = mp.solutions.pose
#     if results_pose.pose_landmarks:
#         mp_drawing.draw_landmarks(
#             image, results_pose.pose_landmarks, mp_pose.POSE_CONNECTIONS)
#     if results_hands.multi_hand_landmarks:
#         for hand_landmarks in results_hands.multi_hand_landmarks:
#             mp_drawing.draw_landmarks(
#                 image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
#     return image
def draw_landmarks(image, results_hands, results_pose):
    mp_drawing = mp.solutions.drawing_utils
    mp_hands = mp.solutions.hands
    mp_pose = mp.solutions.pose

    # Configuración para líneas más delgadas
    landmark_drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
    connection_drawing_spec = mp_drawing.DrawingSpec(thickness=1)

    if results_pose.pose_landmarks:
        mp_drawing.draw_landmarks(
            image, results_pose.pose_landmarks, mp_pose.POSE_CONNECTIONS,
            landmark_drawing_spec=landmark_drawing_spec,
            connection_drawing_spec=connection_drawing_spec)
    
    if results_hands.multi_hand_landmarks:
        for hand_landmarks in results_hands.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                image, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                landmark_drawing_spec=landmark_drawing_spec,
                connection_drawing_spec=connection_drawing_spec)
    
    return image

def extract_keypoints(results_hands, results_pose):
    # # pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results_pose.pose_landmarks.landmark]).flatten() if results_pose.pose_landmarks else np.zeros(33*4)
    
    # # if results_hands.multi_hand_landmarks:
    # #     lh = np.array([[res.x, res.y, res.z] for res in results_hands.multi_hand_landmarks[0].landmark]).flatten()
    # #     if len(results_hands.multi_hand_landmarks) > 1:
    # #         rh = np.array([[res.x, res.y, res.z] for res in results_hands.multi_hand_landmarks[1].landmark]).flatten()
    # #     else:
    # #         rh = np.zeros(21*3)
    # # else:
    # #     lh = np.zeros(21*3)
    # #     rh = np.zeros(21*3)
    
    # # return np.concatenate([pose, lh, rh])
    # pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results_pose.pose_landmarks.landmark]).flatten() if results_pose.pose_landmarks else np.zeros(33*4)
    # lh = np.array([[res.x, res.y, res.z] for res in results_hands.left_hand_landmarks.landmark]).flatten() if results_hands.left_hand_landmarks else np.zeros(21*3)
    # rh = np.array([[res.x, res.y, res.z] for res in results_hands.right_hand_landmarks.landmark]).flatten() if results_hands.right_hand_landmarks else np.zeros(21*3)
    # return np.concatenate([pose, lh, rh])
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results_pose.pose_landmarks.landmark]).flatten() if results_pose.pose_landmarks else np.zeros(33*4)
    
    lh = np.zeros(21*3)
    rh = np.zeros(21*3)
    
    if results_hands.multi_hand_landmarks:
        for idx, hand_landmarks in enumerate(results_hands.multi_hand_landmarks):
            hand = np.array([[res.x, res.y, res.z] for res in hand_landmarks.landmark]).flatten()
            if idx == 0:
                lh = hand
            elif idx == 1:
                rh = hand
    
    return np.concatenate([pose, lh, rh])


def prob_viz(res, actions, image, color):
    output_image = image.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_image, (0, 60+num*40), (int(prob*100), 90+num*40), color, -1)
        cv2.putText(output_image, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
    return output_image

# Define un solo color para todas las acciones (por ejemplo, azul)
action_color = (255, 0, 0)  # BGR format (Blue)




# Captura de video
cap = cv2.VideoCapture(0)

while cap.isOpened():
    success, image = cap.read()
    if not success:
        break
    
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results_hands = hands.process(image_rgb)
    results_pose = pose.process(image_rgb)
    
    # Dibujar los landmarks en la imagen
    image_with_landmarks = draw_landmarks(image, results_hands, results_pose)
    
    # Verificar si se detecta una mano
    if results_hands.multi_hand_landmarks:
        # Extraer keypoints
        keypoints = extract_keypoints(results_hands, results_pose)
        
        # Añadir keypoints a la secuencia y mantener solo los últimos 33
        sequence.append(keypoints)
        sequence = sequence[-33:]
        current_time = time.time()
        if len(sequence) == 33 and current_time - last_prediction_time >= prediction_interval:
            # Realizar predicción
            input_data = np.expand_dims(sequence, axis=0)
            res = model.predict(input_data)[0]
            predicted_action = actions[np.argmax(res)]
            print(predicted_action)
            predictions.append(np.argmax(res))
            sequence.clear()  # Limpiar la secuencia cuando no se detecta mano
            
            # Lógica de visualización
            if np.unique(predictions[-10:])[0] == np.argmax(res):
                if res[np.argmax(res)] > threshold:
                    if len(sentence) > 0:
                        if predicted_action != sentence[-1]:
                            sentence.append(predicted_action)
                    else:
                        sentence.append(predicted_action)
            
            
            # Visualizar probabilidades
            image_with_landmarks = prob_viz(res, actions, image_with_landmarks, action_color)
            last_prediction_time = current_time
        # Mostrar la predicción en la imagen
        cv2.rectangle(image_with_landmarks, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image_with_landmarks, ' '.join(sentence), (3,30), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
    else:
        # No se detecta mano
        sequence.clear()  # Limpiar la secuencia cuando no se detecta mano
        cv2.putText(image_with_landmarks, 'No hand detected', (10, 30), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
    
    # Mostrar la imagen procesada
    cv2.imshow('Real-time Prediction', image_with_landmarks)
    
    # Salir si se presiona 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

ModuleNotFoundError: No module named 'mediapipe.python._framework_bindings'

In [58]:
cap.release()
cv2.destroyAllWindows()