In [1]:
import cv2
import mediapipe as mp
import os
import numpy as np

In [2]:
hands = mp.solutions.hands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.5
)

In [3]:
manos = "C:/Users/sebastian/limpio/primera parte/pipe_entrenamiento/pipe5/manos"

In [4]:
data = []
labels = []

In [5]:
def extract_landmarks(image_path, label):
  imagen = cv2.imread(image_path)
  imagen_rgb = cv2.cvtColor(imagen, cv2.COLOR_BGR2RGB)
  result = hands.process(imagen_rgb)

  if result.multi_hand_landmarks:
    for hand_landmarks in result.multi_hand_landmarks:
      landmarks = []
      for landmark in hand_landmarks.landmark:
        landmarks.extend([landmark.x, landmark.y, landmark.z])
      data.append(landmarks)
      labels.append(label)
     
gesture_folders = [f for f in os.listdir(manos) if os.path.isdir(os.path.join(manos, f))]



In [6]:
for label in gesture_folders:
  folder_path = os.path.join(manos, label)
  print(f"Procesando carpeta '{label}'...")
  for filename in os.listdir(folder_path):
    image_path = os.path.join(folder_path, filename)
    extract_landmarks(image_path, label)

Procesando carpeta 'A'...
Procesando carpeta 'B'...
Procesando carpeta 'C'...
Procesando carpeta 'D'...
Procesando carpeta 'E'...
Procesando carpeta 'F'...
Procesando carpeta 'G'...
Procesando carpeta 'H'...
Procesando carpeta 'I'...
Procesando carpeta 'J'...
Procesando carpeta 'K'...
Procesando carpeta 'L'...
Procesando carpeta 'M'...
Procesando carpeta 'N'...
Procesando carpeta 'O'...
Procesando carpeta 'P'...
Procesando carpeta 'Q'...
Procesando carpeta 'R'...
Procesando carpeta 'S'...
Procesando carpeta 'T'...
Procesando carpeta 'U'...
Procesando carpeta 'V'...
Procesando carpeta 'W'...
Procesando carpeta 'X'...
Procesando carpeta 'Y'...
Procesando carpeta 'Z'...


In [7]:
X = np.array(data, dtype=np.float32)
y = np.array(labels)
print(f"Datos guardados: {X.shape[0]} muestras, {X.shape[1]} características por muestra.")
print(f"Etiquetas guardadas: {len(np.unique(y))} clases únicas -> {np.unique(y)}")

Datos guardados: 60347 muestras, 63 características por muestra.
Etiquetas guardadas: 26 clases únicas -> ['A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R'
 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z']


In [51]:
np.save("X_hand_landmarks.npy", X)
np.save("y_labels.npy", y)

In [29]:
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import json

In [30]:
print(f"Datos cargados: {X.shape[0]} muestras, {len(np.unique(y))} clases")


Datos cargados: 60347 muestras, 26 clases


In [31]:
def normalize_hand_landmarks(landmarks_batch):
    
    normalized_batch = np.zeros_like(landmarks_batch)
    
    for i in range(landmarks_batch.shape[0]):
        landmarks = landmarks_batch[i].reshape(21, 3)
        
        # Usar la muñeca como punto de referencia (más estable para ASL)
        wrist = landmarks[0].copy()
        landmarks_relative = landmarks - wrist
        
        # Calcular escala usando los dedos (excluyendo muñeca)
        finger_points = landmarks_relative[1:]
        distances = np.linalg.norm(finger_points, axis=1)
        max_distance = np.max(distances)
        
        # Normalizar si hay movimiento
        if max_distance > 1e-6:
            landmarks_relative = landmarks_relative / max_distance
        
        normalized_batch[i] = landmarks_relative.flatten()
    
    return normalized_batch

In [32]:
# Normalizar landmarks
print("🔄 Normalizando landmarks...")
X_normalized = normalize_hand_landmarks(X)


🔄 Normalizando landmarks...


In [33]:
# Preparar etiquetas para ASL A-Z
le = LabelEncoder()
y_encoded = le.fit_transform(y)


In [34]:
# Verificar que tenemos 26 clases (A-Z)
n_classes = len(le.classes_)
print(f"📊 Clases detectadas: {n_classes}")
print(f"Letras: {sorted(le.classes_)}")


📊 Clases detectadas: 26
Letras: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']


In [35]:
if n_classes != 26:
    print(f"⚠️  Advertencia: Se esperaban 26 clases, se encontraron {n_classes}")


In [36]:
# División estratificada de datos
X_train, X_test, y_train, y_test = train_test_split(
    X_normalized, y_encoded, 
    test_size=0.15,  # Más datos para entrenamiento
    random_state=42, 
    stratify=y_encoded
)

In [37]:
print(f"📊 Train: {X_train.shape[0]} muestras | Test: {X_test.shape[0]} muestras")
# Modelo optimizado para ASL
def create_asl_model(input_dim, n_classes):
    """
    Arquitectura optimizada para reconocimiento ASL
    """
    model = keras.Sequential([
        # Input layer con normalización adicional
        keras.layers.Input(shape=(input_dim,)),
        keras.layers.BatchNormalization(),
        
        # Capas principales
        keras.layers.Dense(256, activation='relu'),
        keras.layers.Dropout(0.4),
        keras.layers.BatchNormalization(),
        
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dropout(0.3),
        keras.layers.BatchNormalization(),
        
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dropout(0.2),
        
        # Output layer
        keras.layers.Dense(n_classes, activation='softmax', name='predictions')
    ])
    
    return model

📊 Train: 51294 muestras | Test: 9053 muestras


In [38]:
# Crear modelo
model = create_asl_model(X_train.shape[1], n_classes)


In [39]:
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'] 
)

In [40]:
print("\n🏗️  Arquitectura del modelo:")
model.summary()


🏗️  Arquitectura del modelo:


In [41]:
# Callbacks mejorados
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_accuracy',
        patience=15,
        restore_best_weights=True,
        verbose=1
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=8,
        min_lr=1e-6,
        verbose=1
    )
]

In [42]:
print("\n🚀 Entrenando modelo ASL...")
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=64,  # Batch size mayor para mejor convergencia
    validation_split=0.2,
    callbacks=callbacks,
    verbose=1
)


🚀 Entrenando modelo ASL...
Epoch 1/100
[1m642/642[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - accuracy: 0.6570 - loss: 1.1894 - val_accuracy: 0.9685 - val_loss: 0.1121 - learning_rate: 0.0010
Epoch 2/100
[1m642/642[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - accuracy: 0.9334 - loss: 0.2177 - val_accuracy: 0.9751 - val_loss: 0.0866 - learning_rate: 0.0010
Epoch 3/100
[1m642/642[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.9445 - loss: 0.1763 - val_accuracy: 0.9744 - val_loss: 0.0784 - learning_rate: 0.0010
Epoch 4/100
[1m642/642[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.9536 - loss: 0.1538 - val_accuracy: 0.9752 - val_loss: 0.0685 - learning_rate: 0.0010
Epoch 5/100
[1m642/642[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - accuracy: 0.9599 - loss: 0.1347 - val_accuracy: 0.9839 - val_loss: 0.0617 - learning_rate: 0.0010
Epoch 6/100
[1m642/642[0m [32m

In [45]:
model.save("modelo.h5")



In [47]:
# 3. Guardar el mapeo de etiquetas para usar en JavaScript
label_mapping = {i: label for i, label in enumerate(le.classes_)}
with open('labels.json', 'w') as f:
    json.dump(label_mapping, f)
print("✅ Mapeo de etiquetas guardado como 'labels.json'")

✅ Mapeo de etiquetas guardado como 'labels.json'


In [48]:
# 4. Información del modelo para JavaScript
model_info = {
    'input_shape': X_train.shape[1],
    'num_classes': n_classes,
    'classes': le.classes_.tolist(),
    'normalization_info': {
        'type': 'hand_landmarks_relative_to_wrist',
        'description': 'Landmarks normalized relative to wrist position and scaled by max finger distance'
    }
}

with open('model_info.json', 'w') as f:
    json.dump(model_info, f, indent=2)
print("✅ Información del modelo guardada como 'model_info.json'")


✅ Información del modelo guardada como 'model_info.json'
