In [None]:
# Crear un entorno virtual y activar (Windows)
!python -m venv venv
!.\venv\Scripts\activate

# Instalar dependencias
%pip install torch torchvision opencv-python

## 1. Preparar el dataset

In [5]:
import os
import cv2
import numpy as np
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Definir transformaciones para las imágenes
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Cargar el dataset
data_dir = 'asl_alphabet_train'
dataset = datasets.ImageFolder(data_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

## 2. Definir el modelo

In [6]:
import torch.nn as nn
import torchvision.models as models

# Usar un modelo preentrenado (ResNet18)
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 29)  # 29 clases (A-Z, SPACE, DELETE, NOTHING)

# # Si prefieres crear un modelo desde cero con CNN
# class SimpleCNN(nn.Module):
#     def __init__(self):
#         super(SimpleCNN, self).__init__()
#         self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
#         self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
#         self.fc1 = nn.Linear(64 * 56 * 56, 512)
#         self.fc2 = nn.Linear(512, 29)
    
#     def forward(self, x):
#         x = F.relu(self.conv1(x))
#         x = F.max_pool2d(x, 2)
#         x = F.relu(self.conv2(x))
#         x = F.max_pool2d(x, 2)
#         x = x.view(-1, 64 * 56 * 56)
#         x = F.relu(self.fc1(x))
#         x = self.fc2(x)
#         return x

# model = SimpleCNN()

## 3. Entrenar el modelo

In [None]:
import torch.optim as optim

# Verificar si hay una GPU disponible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")
torch.set_num_threads(torch.get_num_threads())

# Mover el modelo a la GPU si está disponible
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print("Empezando entrenamiento: ")
# Entrenar el modelo
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in dataloader:
        # Mover los datos a la GPU si está disponible
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / len(dataloader)
    epoch_accuracy = 100 * correct / total
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')

Usando dispositivo: cpu
Empezando entrenamiento: 


## 4. Evaluar el modelo

In [None]:
# Evaluar el modelo (puedes usar un conjunto de validación separado)
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in dataloader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {100 * correct / total}%')

## 5. Implementar la detección en tiempo real

In [None]:
import cv2

# Abrir la cámara y hacer predicciones en tiempo real
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    # Preprocesar la imagen
    img = cv2.resize(frame, (224, 224))
    img = transform(img).unsqueeze(0)
    
    # Hacer la predicción
    model.eval()
    with torch.no_grad():
        outputs = model(img)
        _, predicted = torch.max(outputs.data, 1)
    
    # Mostrar la predicción en la imagen
    label = dataset.classes[predicted.item()]
    cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow('ASL Alphabet Recognition', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()