In [None]:
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, models
from deepface import DeepFace
from collections import deque, Counter
import time

# ============================
# CONFIG GENERAL
# ============================

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

class_names = ["angry", "fear", "happy", "neutral", "sad", "surprise"]

# Mapeo emociones del modelo -> nombres de archivos mono
emotion_to_mono = {
    "angry": "enfadado",
    "fear": "sorprendido",
    "happy": "feliz",
    "neutral": "neutral",
    "sad": "pensando",
    "surprise": "sorprendido"
}

IMG_SIZE = 224

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ),
])

# ============================
# MODELO RESNET50 EMOCIONES
# ============================

def get_model(num_classes=6):
    base = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
    num_ftrs = base.fc.in_features

    base.fc = nn.Sequential(
        nn.Dropout(0.5),
        nn.Linear(num_ftrs, 512),
        nn.ReLU(),
        nn.BatchNorm1d(512),
        nn.Dropout(0.4),
        nn.Linear(512, num_classes)
    )

    for name, param in base.named_parameters():
        if 'layer1' in name or 'layer2' in name:
            param.requires_grad = False
        else:
            param.requires_grad = True

    return base

model = get_model(num_classes=len(class_names)).to(device)
state_dict = torch.load("best_model_emotions.pth", map_location=device)
model.load_state_dict(state_dict, strict=True)
model.eval()

@torch.no_grad()
def predict_emotion(face_bgr):
    face_rgb = cv2.cvtColor(face_bgr, cv2.COLOR_BGR2RGB)
    tensor = transform(face_rgb)
    tensor = tensor.unsqueeze(0).to(device)
    logits = model(tensor)
    probs = F.softmax(logits, dim=1).cpu().numpy()[0]
    idx = int(np.argmax(probs))
    return class_names[idx], probs

# ============================
# CARGAR IMÁGENES DE MONOS
# ============================

monos = {
    'neutral': cv2.imread('../images/mono_neutral.webp'),
    'feliz': cv2.imread('../images/mono_feliz.png'),
    'sorprendido': cv2.imread('../images/mono_sorprendido.jpg'),
    'pensando': cv2.imread('../images/mono_pensando.jpg'),
    'enfadado': cv2.imread('../images/mono_enfadado.jpg')
}

for nombre, img in monos.items():
    if img is None:
        print(f"AVISO: No se pudo cargar images/mono_{nombre}")

# ============================
# FUNCIÓN REDIMENSIONAR
# ============================

def redimensionar_imagen(img, alto_objetivo):
    """Redimensiona imagen manteniendo aspect ratio"""
    if img is None:
        return None
    h, w = img.shape[:2]
    ratio = alto_objetivo / h
    nuevo_ancho = int(w * ratio)
    return cv2.resize(img, (nuevo_ancho, alto_objetivo))

# ============================
# BUCLE WEBCAM LADO A LADO
# ============================

def main():
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: No se pudo abrir la webcam")
        return

    print("Webcam iniciada - Modo LADO A LADO con tu modelo")
    print("Presiona 'q' para salir")

    historial = deque(maxlen=5)
    prev_time = time.time()

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.flip(frame, 1)
        orig_frame = frame.copy()
        annotated_frame = frame.copy()

        label = "neutral"
        face_box = None

        # 1. Detección de cara con DeepFace
        try:
            detections = DeepFace.extract_faces(
                img_path=orig_frame,
                detector_backend="opencv",
                enforce_detection=False
            )
            if len(detections) > 0:
                det = detections[0]
                fa = det["facial_area"]
                x, y, w, h = fa["x"], fa["y"], fa["w"], fa["h"]

                x = max(0, x)
                y = max(0, y)
                w = max(1, min(w, orig_frame.shape[1] - x))
                h = max(1, min(h, orig_frame.shape[0] - y))

                face_box = (x, y, w, h)
                face_bgr = orig_frame[y:y+h, x:x+w]
                label, probs = predict_emotion(face_bgr)

                # Dibujar rectángulo de detección
                cv2.rectangle(annotated_frame, (x, y), (x+w, y+h), (0, 255, 0), 2)

        except Exception:
            label = "neutral"
            face_box = None

        # 2. Suavizado temporal
        historial.append(label)
        label_suavizado = Counter(historial).most_common(1)[0][0]

        # 3. Texto con estado y FPS
        cv2.putText(annotated_frame, f"Estado: {label_suavizado.upper()}",
                    (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 3)

        now = time.time()
        fps = 1.0 / (now - prev_time + 1e-6)
        prev_time = now
        cv2.putText(annotated_frame, f"FPS: {fps:.1f}",
                    (10, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

        # 4. Obtener mono y redimensionar
        mono_key = emotion_to_mono.get(label_suavizado, "neutral")
        mono_img = monos.get(mono_key, monos.get('neutral'))

        altura_frame = annotated_frame.shape[0]
        mono_resized = redimensionar_imagen(mono_img, altura_frame)

        # 5. Combinar lado a lado
        if mono_resized is not None:
            combined = np.hstack((annotated_frame, mono_resized))
        else:
            combined = annotated_frame

        cv2.imshow('Espejo con el mono (Modelo propio)', combined)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()


Device: cpu
Webcam iniciada - Modo LADO A LADO con tu modelo
Presiona 'q' para salir
Finalizado
