### SOLO UNA PERSONA

In [10]:
import cv2
import mediapipe as mp
import numpy as np
from collections import deque
from dataclasses import dataclass
from enum import Enum
import time
from datetime import datetime
import os


# ==================== INICIALIZACI√ìN MEDIAPIPE ====================
mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands
mp_face = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles


pose = mp_pose.Pose(
    static_image_mode=False,
    model_complexity=1,
    smooth_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.7,
    enable_segmentation=False
)

hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.6
)

face_detection = mp_face.FaceDetection(
    min_detection_confidence=0.5
)


# ==================== SISTEMA DE PLANOS ====================
class TipoPlano(Enum):
    EXTREME_WIDE = 1      
    WIDE = 2              
    FULL = 3              
    COWBOY = 4            
    MEDIUM = 5            
    MEDIUM_CLOSEUP = 6    
    CLOSEUP = 7           
    EXTREME_CLOSEUP = 8
    OVER_SHOULDER = 9
    BACK_SHOT = 10
    LOW_ANGLE = 11
    HIGH_ANGLE = 12


PLANOS = {
    'EXTREME_WIDE': {
        'zoom': 0.7,
        'nombre': 'Plano General Extremo',
        'tipo': TipoPlano.EXTREME_WIDE,
        'descripcion': 'Contexto completo + entorno',
        'y_offset': 0.05,
        'gesto_manual': 'Pulgar abajo üëé',
        'deteccion_auto': 'Persona muy lejos o en borde del frame'
    },
    'WIDE': {
        'zoom': 0.9,
        'nombre': 'Plano General',
        'tipo': TipoPlano.WIDE,
        'descripcion': 'Cuerpo completo + entorno',
        'y_offset': 0.0,
        'gesto_manual': 'Pu√±o cerrado ‚úä',
        'deteccion_auto': 'Cuerpo completo visible con espacio'
    },
    'FULL': {
        'zoom': 1.0,
        'nombre': 'Plano Entero',
        'tipo': TipoPlano.FULL,
        'descripcion': 'De pies a cabeza',
        'y_offset': 0.0,
        'gesto_manual': '1 dedo ‚òùÔ∏è',
        'deteccion_auto': 'Pies y cabeza visibles ajustados'
    },
    'COWBOY': {
        'zoom': 1.3,
        'nombre': 'Plano Americano',
        'tipo': TipoPlano.COWBOY,
        'descripcion': 'Desde rodillas',
        'y_offset': -0.05,
        'gesto_manual': '4 dedos üññ',
        'deteccion_auto': 'Visible desde rodillas hacia arriba'
    },
    'MEDIUM': {
        'zoom': 1.5,
        'nombre': 'Plano Medio',
        'tipo': TipoPlano.MEDIUM,
        'descripcion': 'Cintura hacia arriba',
        'y_offset': -0.08,
        'gesto_manual': '3 dedos ü§ü',
        'deteccion_auto': 'Visible desde cintura/caderas'
    },
    'MEDIUM_CLOSEUP': {
        'zoom': 1.8,
        'nombre': 'Plano Medio Corto',
        'tipo': TipoPlano.MEDIUM_CLOSEUP,
        'descripcion': 'Pecho hacia arriba',
        'y_offset': -0.10,
        'gesto_manual': '5 dedos (mano abierta) üñêÔ∏è',
        'deteccion_auto': 'Solo torso superior visible'
    },
    'CLOSEUP': {
        'zoom': 2.2,
        'nombre': 'Primer Plano',
        'tipo': TipoPlano.CLOSEUP,
        'descripcion': 'Cara y hombros',
        'y_offset': -0.08,
        'gesto_manual': 'Paz (V) ‚úåÔ∏è',
        'deteccion_auto': 'Cara cercana, hombros anchos'
    },
    'EXTREME_CLOSEUP': {
        'zoom': 2.8,
        'nombre': 'Primer√≠simo Plano',
        'tipo': TipoPlano.EXTREME_CLOSEUP,
        'descripcion': 'Solo rostro',
        'y_offset': -0.05,
        'gesto_manual': 'Rock and roll ü§ò',
        'deteccion_auto': 'Cara muy cercana ocupando frame'
    },
    'OVER_SHOULDER': {
        'zoom': 1.6,
        'nombre': 'Sobre el Hombro',
        'tipo': TipoPlano.OVER_SHOULDER,
        'descripcion': 'Desde hombro lateral',
        'y_offset': -0.08,
        'gesto_manual': 'N/A (solo AUTO)',
        'deteccion_auto': 'Perfil con hombro prominente'
    },
    'BACK_SHOT': {
        'zoom': 1.2,
        'nombre': 'Plano de Espaldas',
        'tipo': TipoPlano.BACK_SHOT,
        'descripcion': 'Vista posterior completa',
        'y_offset': 0.0,
        'gesto_manual': 'N/A (solo AUTO)',
        'deteccion_auto': 'Persona de espaldas detectada'
    },
    'LOW_ANGLE': {
        'zoom': 1.3,
        'nombre': 'Contrapicado',
        'tipo': TipoPlano.LOW_ANGLE,
        'descripcion': 'Desde abajo mirando arriba',
        'y_offset': 0.1,
        'gesto_manual': 'N/A (solo AUTO)',
        'deteccion_auto': 'Persona en parte inferior del frame'
    },
    'HIGH_ANGLE': {
        'zoom': 1.3,
        'nombre': 'Picado',
        'tipo': TipoPlano.HIGH_ANGLE,
        'descripcion': 'Desde arriba mirando abajo',
        'y_offset': -0.15,
        'gesto_manual': 'N/A (solo AUTO)',
        'deteccion_auto': 'Persona en parte superior del frame'
    }
}


def print_resumen_planos():
    print("\n" + "=" * 80)
    print("üìã SISTEMA DE C√ÅMARA INTELIGENTE")
    print("=" * 80)
    
    print("\nüé≠ MODO MANUAL (Gestos de mano):")
    print("-" * 80)
    for key, plano in PLANOS.items():
        if plano['gesto_manual'] != 'N/A (solo AUTO)':
            print(f"  {plano['nombre']:25} ‚Üí {plano['gesto_manual']}")
    
    print("\nü§ñ MODO AUTO (Detecci√≥n autom√°tica):")
    print("-" * 80)
    print("  ‚Ä¢ Ac√©rcate a la c√°mara ‚Üí Primer Plano / Extremo")
    print("  ‚Ä¢ Al√©jate de la c√°mara ‚Üí Plano Entero / General")
    print("  ‚Ä¢ Mu√©vete al borde ‚Üí General Extremo")
    print("  ‚Ä¢ G√≠rate de espaldas ‚Üí Plano de Espaldas")
    
    print("\n‚å®Ô∏è  CONTROLES:")
    print("-" * 80)
    print("  'm' - Cambiar entre AUTO/MANUAL")
    print("  'h' - HOLD: Congelar plano actual")
    print("  's' - Capturar screenshot")
    print("  'g' - Mostrar/ocultar grid")
    print("  'c' - Cambiar c√°mara")
    print("  'r' - Reset sistema")
    print("  ESC - Salir")
    
    print("\n" + "=" * 80 + "\n")


# ==================== GESTOS MANUALES ====================
def contar_dedos(hand_landmarks):
    dedos = 0
    tips_ids = [4, 8, 12, 16, 20]
    
    if hand_landmarks.landmark[tips_ids[0]].x < hand_landmarks.landmark[tips_ids[0] - 1].x:
        dedos += 1
    
    for i in range(1, 5):
        if hand_landmarks.landmark[tips_ids[i]].y < hand_landmarks.landmark[tips_ids[i] - 2].y:
            dedos += 1
    
    return dedos


def detectar_rock_and_roll(hand_landmarks):
    indice_up = hand_landmarks.landmark[8].y < hand_landmarks.landmark[6].y
    medio_down = hand_landmarks.landmark[12].y > hand_landmarks.landmark[10].y
    anular_down = hand_landmarks.landmark[16].y > hand_landmarks.landmark[14].y
    me√±ique_up = hand_landmarks.landmark[20].y < hand_landmarks.landmark[18].y
    
    return indice_up and medio_down and anular_down and me√±ique_up


def detectar_pulgar_arriba(hand_landmarks):
    pulgar_up = hand_landmarks.landmark[4].y < hand_landmarks.landmark[3].y
    otros_cerrados = all([
        hand_landmarks.landmark[8].y > hand_landmarks.landmark[6].y,
        hand_landmarks.landmark[12].y > hand_landmarks.landmark[10].y,
        hand_landmarks.landmark[16].y > hand_landmarks.landmark[14].y,
        hand_landmarks.landmark[20].y > hand_landmarks.landmark[18].y
    ])
    return pulgar_up and otros_cerrados


def detectar_pulgar_abajo(hand_landmarks):
    pulgar_down = hand_landmarks.landmark[4].y > hand_landmarks.landmark[3].y
    otros_cerrados = all([
        hand_landmarks.landmark[8].y > hand_landmarks.landmark[6].y,
        hand_landmarks.landmark[12].y > hand_landmarks.landmark[10].y,
        hand_landmarks.landmark[16].y > hand_landmarks.landmark[14].y,
        hand_landmarks.landmark[20].y > hand_landmarks.landmark[18].y
    ])
    return pulgar_down and otros_cerrados


def detectar_gesto_paz(hand_landmarks):
    indice_up = hand_landmarks.landmark[8].y < hand_landmarks.landmark[6].y
    medio_up = hand_landmarks.landmark[12].y < hand_landmarks.landmark[10].y
    anular_down = hand_landmarks.landmark[16].y > hand_landmarks.landmark[14].y
    me√±ique_down = hand_landmarks.landmark[20].y > hand_landmarks.landmark[18].y
    return indice_up and medio_up and anular_down and me√±ique_down


def clasificar_gesto_manual(hand_landmarks):
    dedos = contar_dedos(hand_landmarks)
    
    if detectar_rock_and_roll(hand_landmarks):
        return 'EXTREME_CLOSEUP', "ü§ò"
    elif detectar_pulgar_abajo(hand_landmarks):
        return 'EXTREME_WIDE', "üëé"
    elif dedos == 0:
        return 'WIDE', "‚úä"
    elif detectar_gesto_paz(hand_landmarks):
        return 'CLOSEUP', "‚úåÔ∏è"
    elif dedos == 1:
        return 'FULL', "‚òùÔ∏è"
    elif dedos == 3:
        return 'MEDIUM', "ü§ü"
    elif dedos == 4:
        return 'COWBOY', "üññ"
    elif dedos == 5:
        return 'MEDIUM_CLOSEUP', "üñêÔ∏è"
    else:
        return 'WIDE', str(dedos)


# ==================== DETECCI√ìN AUTOM√ÅTICA ====================
class BodyPositionDetector:
    def __init__(self):
        self.history = deque(maxlen=3)
        self.last_plano = 'MEDIUM'
        self.debug_mode = True
    
    def detect_framing(self, pose_landmarks, frame_height, frame_width):
        if not pose_landmarks:
            return self.last_plano
        
        lm = pose_landmarks.landmark
        
        shoulder_width = abs(lm[11].x - lm[12].x)
        head_y = lm[0].y
        shoulder_y = (lm[11].y + lm[12].y) / 2
        
        hips_vis = min(lm[23].visibility, lm[24].visibility)
        knees_vis = min(lm[25].visibility, lm[26].visibility)
        ankles_vis = min(lm[27].visibility, lm[28].visibility)
        
        center_x = np.mean([lm[0].x, lm[11].x, lm[12].x])
        center_y = np.mean([lm[0].y, lm[11].y, lm[12].y])
        
        if ankles_vis > 0.3:
            ankle_y = (lm[27].y + lm[28].y) / 2
            body_span = abs(ankle_y - head_y)
        elif knees_vis > 0.3:
            knee_y = (lm[25].y + lm[26].y) / 2
            body_span = abs(knee_y - head_y)
        elif hips_vis > 0.4:
            hip_y = (lm[23].y + lm[24].y) / 2
            body_span = abs(hip_y - head_y)
        else:
            body_span = abs(shoulder_y - head_y)
        
        orientation = self._detect_orientation(lm)
        
        plano = self._classify_simple(
            shoulder_width, body_span, center_x, center_y,
            hips_vis, knees_vis, ankles_vis, orientation
        )
        
        if self.debug_mode:
            debug_info = f"SW:{shoulder_width:.2f} BS:{body_span:.2f} H:{hips_vis:.1f} K:{knees_vis:.1f} A:{ankles_vis:.1f} ‚Üí {plano}"
        
        self.history.append(plano)
        plano_final = self._smooth()
        self.last_plano = plano_final
        
        return plano_final
    
    def _detect_orientation(self, lm):
        eyes_vis = (lm[2].visibility + lm[5].visibility) / 2
        left_shoulder_z = lm[11].z
        right_shoulder_z = lm[12].z
        nose_z = lm[0].z
        
        if nose_z < (left_shoulder_z + right_shoulder_z) / 2 - 0.1:
            return 'FRONTAL'
        elif nose_z > (left_shoulder_z + right_shoulder_z) / 2 + 0.1:
            return 'ESPALDAS'
        else:
            return 'PERFIL'
    
    def _classify_simple(self, shoulder_width, body_span, center_x, center_y,
                        hips_vis, knees_vis, ankles_vis, orientation):
        near_edge = center_x < 0.15 or center_x > 0.85
        near_top = center_y < 0.2
        near_bottom = center_y > 0.8
        
        if orientation == 'ESPALDAS':
            return 'BACK_SHOT'
        
        if near_top:
            return 'HIGH_ANGLE'
        elif near_bottom:
            return 'LOW_ANGLE'
        
        if near_edge:
            return 'EXTREME_WIDE'
        
        if shoulder_width > 0.50:
            return 'EXTREME_CLOSEUP'
        elif shoulder_width > 0.38:
            return 'CLOSEUP'
        elif shoulder_width > 0.28:
            if hips_vis < 0.3:
                return 'MEDIUM_CLOSEUP'
            else:
                return 'MEDIUM'
        elif shoulder_width > 0.20:
            if knees_vis > 0.3:
                return 'COWBOY'
            else:
                return 'MEDIUM'
        elif shoulder_width > 0.15:
            if ankles_vis > 0.3:
                return 'FULL'
            else:
                return 'COWBOY'
        elif shoulder_width > 0.10:
            return 'WIDE'
        else:
            return 'EXTREME_WIDE'
    
    def _smooth(self):
        if len(self.history) == 0:
            return 'MEDIUM'
        from collections import Counter
        counts = Counter(self.history)
        return counts.most_common(1)[0][0]


# ==================== ORIENTACI√ìN CORPORAL ====================
class BodyOrientation:
    def __init__(self):
        self.history = deque(maxlen=10)
    
    def calculate(self, pose_landmarks):
        if not pose_landmarks:
            return None
        
        lm = pose_landmarks.landmark
        
        left_shoulder = np.array([lm[11].x, lm[11].y, lm[11].z])
        right_shoulder = np.array([lm[12].x, lm[12].y, lm[12].z])
        left_hip = np.array([lm[23].x, lm[23].y, lm[23].z])
        right_hip = np.array([lm[24].x, lm[24].y, lm[24].z])
        
        shoulder_vec = right_shoulder - left_shoulder
        yaw = np.arctan2(shoulder_vec[2], shoulder_vec[0]) * 180 / np.pi
        roll = np.arctan2(shoulder_vec[1], shoulder_vec[0]) * 180 / np.pi
        
        torso_center = (left_shoulder + right_shoulder) / 2
        hip_center = (left_hip + right_hip) / 2
        torso_vec = torso_center - hip_center
        pitch = np.arctan2(torso_vec[2], torso_vec[1]) * 180 / np.pi
        
        orientation = {
            'yaw': yaw,
            'pitch': pitch,
            'roll': roll,
            'visibility': min(lm[11].visibility, lm[12].visibility)
        }
        
        self.history.append(orientation)
        return self._smooth()
    
    def _smooth(self):
        if not self.history:
            return None
        return {
            'yaw': np.mean([o['yaw'] for o in self.history]),
            'pitch': np.mean([o['pitch'] for o in self.history]),
            'roll': np.mean([o['roll'] for o in self.history]),
            'visibility': np.mean([o['visibility'] for o in self.history])
        }


# ==================== ENCUADRE ====================
@dataclass
class FrameTarget:
    x: float = 0.5
    y: float = 0.5
    zoom: float = 1.0


class SmoothFramer:
    def __init__(self, smoothing=0.15):
        self.current = FrameTarget()
        self.target = FrameTarget()
        self.smoothing = smoothing
    
    def update(self, center_x, center_y, zoom, y_offset=0.0):
        self.target.x = center_x
        self.target.y = center_y + y_offset
        self.target.zoom = zoom
        
        self.current.x += (self.target.x - self.current.x) * self.smoothing
        self.current.y += (self.target.y - self.current.y) * self.smoothing
        self.current.zoom += (self.target.zoom - self.current.zoom) * self.smoothing
        
        return self.current


def obtener_centro_seguimiento(pose_landmarks, face_result, w, h, plano_actual):
    if not pose_landmarks:
        return (0.5, 0.5)
    
    lm = pose_landmarks.landmark
    
    if plano_actual in ['CLOSEUP', 'EXTREME_CLOSEUP']:
        nose = np.array([lm[0].x, lm[0].y])
        mouth_left = np.array([lm[9].x, lm[9].y])
        mouth_right = np.array([lm[10].x, lm[10].y])
        mouth_center = (mouth_left + mouth_right) / 2
        face_center = (nose + mouth_center) / 2
        return (float(face_center[0]), float(face_center[1]))
    
    elif plano_actual in ['MEDIUM', 'MEDIUM_CLOSEUP']:
        nose = np.array([lm[0].x, lm[0].y])
        shoulders = (np.array([lm[11].x, lm[11].y]) + np.array([lm[12].x, lm[12].y])) / 2
        center = (nose + shoulders * 2) / 3
        return (float(center[0]), float(center[1]))
    
    else:
        key_points = [lm[0], lm[11], lm[12], lm[23], lm[24]]
        center_x = np.mean([p.x for p in key_points])
        center_y = np.mean([p.y for p in key_points])
        return (float(center_x), float(center_y))


def aplicar_encuadre(frame, framer_state):
    h, w = frame.shape[:2]
    
    zoom = framer_state.zoom
    crop_w = int(w / zoom)
    crop_h = int(h / zoom)
    
    center_x = int(framer_state.x * w)
    center_y = int(framer_state.y * h)
    
    x1 = max(0, center_x - crop_w // 2)
    y1 = max(0, center_y - crop_h // 2)
    x2 = min(w, x1 + crop_w)
    y2 = min(h, y1 + crop_h)
    
    if x2 - x1 < crop_w:
        x1 = max(0, x2 - crop_w)
    if y2 - y1 < crop_h:
        y1 = max(0, y2 - crop_h)
    
    cropped = frame[y1:y2, x1:x2]
    if cropped.size == 0:
        return frame
    
    resized = cv2.resize(cropped, (w, h), interpolation=cv2.INTER_LINEAR)
    return resized


# ==================== CAPTURA DE SCREENSHOTS ====================
def guardar_screenshot(frame, plano_actual):
    if not os.path.exists('screenshots'):
        os.makedirs('screenshots')
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    plano_nombre = PLANOS[plano_actual]['nombre'].replace(' ', '_')
    filename = f"screenshots/shot_{timestamp}_{plano_nombre}.png"
    
    cv2.imwrite(filename, frame)
    print(f"üì∏ Screenshot: {filename}")
    
    return filename


# ==================== VISUALIZACI√ìN ====================
def dibujar_landmarks(frame, pose_results, hands_results):
    if pose_results and pose_results.pose_landmarks:
        mp_drawing.draw_landmarks(
            frame,
            pose_results.pose_landmarks,
            mp_pose.POSE_CONNECTIONS,
            landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
        )
    
    if hands_results and hands_results.multi_hand_landmarks:
        for hand_landmarks in hands_results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style()
            )
    
    return frame


def crear_panel_control(modo_control, plano_actual, plano_auto, orientation, fps, 
                       num_cameras, shoulder_width, hold_mode, show_grid):
    panel = np.zeros((1080, 400, 3), dtype=np.uint8)
    y_offset = 30
    
    cv2.putText(panel, "PANEL DE CONTROL", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 255), 2)
    y_offset += 50
    
    cv2.putText(panel, f"FPS: {fps:.1f}", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
    y_offset += 30
    
    cv2.putText(panel, f"Camaras: {num_cameras}", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 200, 100), 2)
    y_offset += 40
    
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    # MODO + HOLD STATUS (SIN ????)
    color_modo = (0, 255, 255) if modo_control == 'AUTO' else (255, 100, 255)
    if hold_mode:
        modo_text = f"MODO: {modo_control} LOCKED"
        color_modo = (0, 165, 255)
    else:
        modo_text = f"MODO: {modo_control}"
    
    cv2.putText(panel, modo_text, (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, color_modo, 2)
    y_offset += 40
    
    # PLANO ACTIVO (SIN TIPS)
    cv2.putText(panel, "PLANO ACTIVO:", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 200, 255), 2)
    y_offset += 28
    plano_info = PLANOS[plano_actual]
    cv2.putText(panel, plano_info['nombre'], (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 1)
    y_offset += 22
    cv2.putText(panel, f"({plano_info['descripcion']})", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.45, (180, 180, 180), 1)
    y_offset += 30
    
    if plano_auto != plano_actual and not hold_mode:
        cv2.putText(panel, f"Auto detecta: {PLANOS[plano_auto]['nombre']}", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45, (100, 150, 255), 1)
        y_offset += 25
    
    if shoulder_width > 0:
        cv2.putText(panel, f"Distancia: {shoulder_width:.2f}", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45, (150, 150, 150), 1)
        y_offset += 25
    
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    if orientation and orientation['visibility'] > 0.5:
        cv2.putText(panel, "ORIENTACION:", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 200, 255), 2)
        y_offset += 28
        cv2.putText(panel, f"Yaw: {orientation['yaw']:.0f}¬∞", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
        y_offset += 24
        cv2.putText(panel, f"Pitch: {orientation['pitch']:.0f}¬∞", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
        y_offset += 24
        cv2.putText(panel, f"Roll: {orientation['roll']:.0f}¬∞", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
        y_offset += 35
    
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    cv2.putText(panel, "CONTROLES:", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 200, 255), 2)
    y_offset += 28
    
    controles = [
        "'m' - AUTO/MANUAL",
        "'h' - HOLD/LOCK plano",
        "'s' - Capturar screenshot",
        "'g' - Mostrar/Ocultar grid",
        "'r' - Reset",
        "'c' - Cambiar camara",
        "ESC - Salir"
    ]
    for ctrl in controles:
        cv2.putText(panel, ctrl, (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.48, (200, 200, 200), 1)
        y_offset += 22
    
    y_offset += 10
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    # ESTADO DE FUNCIONES
    cv2.putText(panel, "ESTADO:", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 200, 255), 2)
    y_offset += 28
    
    lock_status = "ON" if hold_mode else "OFF"
    lock_color = (0, 255, 0) if hold_mode else (100, 100, 100)
    cv2.putText(panel, f"Hold Lock: {lock_status}", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, lock_color, 1)
    y_offset += 24
    
    grid_status = "ON" if show_grid else "OFF"
    grid_color = (0, 255, 0) if show_grid else (100, 100, 100)
    cv2.putText(panel, f"Grid: {grid_status}", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, grid_color, 1)
    y_offset += 30
    
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    cv2.putText(panel, "GESTOS MANUALES:", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 200, 255), 2)
    y_offset += 28
    
    gestos = [
        "Pulgar abajo - General Extremo",
        "Puno - General",
        "1 dedo - Entero",
        "4 dedos - Americano",
        "3 dedos - Medio",
        "5 dedos - Medio Corto",
        "Paz (V) - Primer Plano",
        "Rock & Roll - Extremo"
    ]
    for gesto in gestos:
        cv2.putText(panel, gesto, (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.42, (180, 180, 180), 1)
        y_offset += 20
    
    y_offset += 10
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    cv2.putText(panel, "MODO AUTO:", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 200, 255), 2)
    y_offset += 28
    
    tips = [
        "Acercate = Primer Plano",
        "Alejate = Plano General",
        "Borde del frame = Extremo",
        "Date la vuelta = Espaldas"
    ]
    for tip in tips:
        cv2.putText(panel, tip, (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45, (150, 150, 255), 1)
        y_offset += 22
    
    return panel


def dibujar_info_deteccion(frame, camera_name):
    h, w = frame.shape[:2]
    cv2.putText(frame, f"DETECCION: {camera_name}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
    cv2.putText(frame, "Landmarks Pose + Manos", (10, 60),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 1)
    return frame


def dibujar_info_resultado(frame, show_grid):
    """RESULTADO LIMPIO - SOLO GRID, NADA M√ÅS"""
    h, w = frame.shape[:2]
    
    # SOLO Grid (si est√° activado)
    if show_grid:
        color_grid = (80, 80, 80)
        cv2.line(frame, (w//3, 0), (w//3, h), color_grid, 1)
        cv2.line(frame, (2*w//3, 0), (2*w//3, h), color_grid, 1)
        cv2.line(frame, (0, h//3), (w, h//3), color_grid, 1)
        cv2.line(frame, (0, 2*h//3), (w, 2*h//3), color_grid, 1)
    
    return frame


# ==================== MAIN ====================
def main():
    print_resumen_planos()
    
    cap1 = cv2.VideoCapture(0)
    cap2 = cv2.VideoCapture(1)
    
    cameras = []
    if cap1.isOpened():
        cap1.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        cap1.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
        cameras.append(('Camara 1', cap1))
        print("‚úÖ C√°mara 1 detectada")
    
    if cap2.isOpened():
        cap2.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        cap2.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
        cameras.append(('Camara 2', cap2))
        print("‚úÖ C√°mara 2 detectada")
    
    if not cameras:
        print("‚ùå No se detectaron c√°maras")
        return
    
    orientation_tracker = BodyOrientation()
    body_position_detector = BodyPositionDetector()
    framers = [SmoothFramer(smoothing=0.15) for _ in cameras]
    fps_history = deque(maxlen=30)
    
    plano_actual = 'MEDIUM'
    plano_auto = 'MEDIUM'
    gesto_emoji = None
    modo_control = 'AUTO'
    camera_activa = 0
    shoulder_width_debug = 0
    
    hold_mode = False
    show_grid = True
    
    print(f"üé¨ Sistema iniciado - {len(cameras)} c√°mara(s) disponible(s)")
    print("‚ñ∂Ô∏è  Listo\n")
    
    cv2.namedWindow('CONTROL', cv2.WINDOW_NORMAL)
    cv2.namedWindow('DETECCION', cv2.WINDOW_NORMAL)
    cv2.namedWindow('RESULTADO', cv2.WINDOW_NORMAL)
    
    cv2.resizeWindow('CONTROL', 400, 1080)
    cv2.moveWindow('CONTROL', 0, 0)
    
    cv2.resizeWindow('DETECCION', 1280, 540)
    cv2.moveWindow('DETECCION', 420, 0)
    
    cv2.resizeWindow('RESULTADO', 1280, 540)
    cv2.moveWindow('RESULTADO', 420, 540)
    
    while True:
        start_time = time.time()
        
        camera_name, cap = cameras[camera_activa]
        ret, frame = cap.read()
        
        if not ret:
            break
        
        frame = cv2.flip(frame, 1)
        h, w = frame.shape[:2]
        
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        pose_results = pose.process(rgb_frame)
        hands_results = hands.process(rgb_frame)
        face_results = face_detection.process(rgb_frame)
        
        frame_deteccion = frame.copy()
        frame_deteccion = dibujar_landmarks(frame_deteccion, pose_results, hands_results)
        
        plano_auto = body_position_detector.detect_framing(
            pose_results.pose_landmarks, h, w
        )
        
        if pose_results.pose_landmarks:
            lm = pose_results.pose_landmarks.landmark
            shoulder_width_debug = abs(lm[11].x - lm[12].x)
        
        if hands_results and hands_results.multi_hand_landmarks:
            for hand_landmarks in hands_results.multi_hand_landmarks:
                plano_manual, gesto_emoji = clasificar_gesto_manual(hand_landmarks)
                if modo_control == 'MANUAL' and not hold_mode:
                    plano_actual = plano_manual
        else:
            gesto_emoji = None
        
        if modo_control == 'AUTO' and not hold_mode:
            plano_actual = plano_auto
        
        orientation = orientation_tracker.calculate(pose_results.pose_landmarks)
        
        centro = obtener_centro_seguimiento(
            pose_results.pose_landmarks, face_results, w, h, plano_actual
        )
        zoom_factor = PLANOS[plano_actual]['zoom']
        y_offset = PLANOS[plano_actual]['y_offset']
        framer_state = framers[camera_activa].update(centro[0], centro[1], zoom_factor, y_offset)
        
        frame_resultado = frame.copy()
        frame_resultado = aplicar_encuadre(frame_resultado, framer_state)
        
        fps = 1.0 / (time.time() - start_time)
        fps_history.append(fps)
        avg_fps = np.mean(fps_history)
        
        frame_deteccion = dibujar_info_deteccion(frame_deteccion, camera_name)
        frame_resultado = dibujar_info_resultado(frame_resultado, show_grid)
        
        panel_control = crear_panel_control(
            modo_control, plano_actual, plano_auto, orientation, 
            avg_fps, len(cameras), shoulder_width_debug, hold_mode, show_grid
        )
        
        cv2.imshow('CONTROL', panel_control)
        cv2.imshow('DETECCION', frame_deteccion)
        cv2.imshow('RESULTADO', frame_resultado)
        
        key = cv2.waitKey(1) & 0xFF
        if key == 27:
            print("\nüëã Cerrando sistema...")
            break
        elif key == ord('r'):
            framers = [SmoothFramer(smoothing=0.15) for _ in cameras]
            body_position_detector = BodyPositionDetector()
            orientation_tracker = BodyOrientation()
            hold_mode = False
            print("üîÑ Reset")
        elif key == ord('m'):
            modo_control = 'MANUAL' if modo_control == 'AUTO' else 'AUTO'
            print(f"üîÄ Modo: {modo_control}")
        elif key == ord('h'):
            hold_mode = not hold_mode
            if hold_mode:
                print(f"üîí Hold: ON - {PLANOS[plano_actual]['nombre']}")
            else:
                print(f"üîí Hold: OFF")
        elif key == ord('s'):
            filename = guardar_screenshot(frame_resultado, plano_actual)
        elif key == ord('g'):
            show_grid = not show_grid
            print(f"üìê Grid: {'ON' if show_grid else 'OFF'}")
        elif key == ord('c') and len(cameras) > 1:
            camera_activa = (camera_activa + 1) % len(cameras)
            print(f"üìπ {cameras[camera_activa][0]}")
        elif key == ord('d'):
            body_position_detector.debug_mode = not body_position_detector.debug_mode
            print(f"üêõ Debug: {'ON' if body_position_detector.debug_mode else 'OFF'}")
    
    for _, cap in cameras:
        cap.release()
    cv2.destroyAllWindows()
    pose.close()
    hands.close()
    face_detection.close()
    
    print("‚úÖ Sistema cerrado")


if __name__ == "__main__":
    main()



üìã SISTEMA DE C√ÅMARA INTELIGENTE

üé≠ MODO MANUAL (Gestos de mano):
--------------------------------------------------------------------------------
  Plano General Extremo     ‚Üí Pulgar abajo üëé
  Plano General             ‚Üí Pu√±o cerrado ‚úä
  Plano Entero              ‚Üí 1 dedo ‚òùÔ∏è
  Plano Americano           ‚Üí 4 dedos üññ
  Plano Medio               ‚Üí 3 dedos ü§ü
  Plano Medio Corto         ‚Üí 5 dedos (mano abierta) üñêÔ∏è
  Primer Plano              ‚Üí Paz (V) ‚úåÔ∏è
  Primer√≠simo Plano         ‚Üí Rock and roll ü§ò

ü§ñ MODO AUTO (Detecci√≥n autom√°tica):
--------------------------------------------------------------------------------
  ‚Ä¢ Ac√©rcate a la c√°mara ‚Üí Primer Plano / Extremo
  ‚Ä¢ Al√©jate de la c√°mara ‚Üí Plano Entero / General
  ‚Ä¢ Mu√©vete al borde ‚Üí General Extremo
  ‚Ä¢ G√≠rate de espaldas ‚Üí Plano de Espaldas

‚å®Ô∏è  CONTROLES:
--------------------------------------------------------------------------------
  'm' - Cambiar entr

### MULTI PERSONA CONTROL AUTO

In [9]:
import cv2
import mediapipe as mp
import numpy as np
from collections import deque
from dataclasses import dataclass
from enum import Enum
import time
from datetime import datetime
import os


# ==================== INICIALIZACI√ìN MEDIAPIPE ====================
mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands
mp_face = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles


# Configuraci√≥n para detectar m√∫ltiples personas
pose = mp_pose.Pose(
    static_image_mode=False,
    model_complexity=1,
    smooth_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.7,
    enable_segmentation=False
)

hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=4,  # Aumentado para m√∫ltiples personas
    min_detection_confidence=0.7,
    min_tracking_confidence=0.6
)

face_detection = mp_face.FaceDetection(
    min_detection_confidence=0.5
)


# ==================== SISTEMA DE PLANOS ====================
class TipoPlano(Enum):
    EXTREME_WIDE = 1      
    WIDE = 2              
    FULL = 3              
    COWBOY = 4            
    MEDIUM = 5            
    MEDIUM_CLOSEUP = 6    
    CLOSEUP = 7           
    EXTREME_CLOSEUP = 8
    OVER_SHOULDER = 9
    BACK_SHOT = 10
    LOW_ANGLE = 11
    HIGH_ANGLE = 12
    TWO_SHOT = 13         # NUEVO
    GROUP_SHOT = 14       # NUEVO


PLANOS = {
    'EXTREME_WIDE': {
        'zoom': 0.7,
        'nombre': 'Plano General Extremo',
        'tipo': TipoPlano.EXTREME_WIDE,
        'descripcion': 'Contexto completo + entorno',
        'y_offset': 0.05,
        'gesto_manual': 'Pulgar abajo üëé',
        'deteccion_auto': 'Persona muy lejos o en borde del frame'
    },
    'WIDE': {
        'zoom': 0.9,
        'nombre': 'Plano General',
        'tipo': TipoPlano.WIDE,
        'descripcion': 'Cuerpo completo + entorno',
        'y_offset': 0.0,
        'gesto_manual': 'Pu√±o cerrado ‚úä',
        'deteccion_auto': 'Cuerpo completo visible con espacio'
    },
    'FULL': {
        'zoom': 1.0,
        'nombre': 'Plano Entero',
        'tipo': TipoPlano.FULL,
        'descripcion': 'De pies a cabeza',
        'y_offset': 0.0,
        'gesto_manual': '1 dedo ‚òùÔ∏è',
        'deteccion_auto': 'Pies y cabeza visibles ajustados'
    },
    'COWBOY': {
        'zoom': 1.3,
        'nombre': 'Plano Americano',
        'tipo': TipoPlano.COWBOY,
        'descripcion': 'Desde rodillas',
        'y_offset': -0.05,
        'gesto_manual': '4 dedos üññ',
        'deteccion_auto': 'Visible desde rodillas hacia arriba'
    },
    'MEDIUM': {
        'zoom': 1.5,
        'nombre': 'Plano Medio',
        'tipo': TipoPlano.MEDIUM,
        'descripcion': 'Cintura hacia arriba',
        'y_offset': -0.08,
        'gesto_manual': '3 dedos ü§ü',
        'deteccion_auto': 'Visible desde cintura/caderas'
    },
    'MEDIUM_CLOSEUP': {
        'zoom': 1.8,
        'nombre': 'Plano Medio Corto',
        'tipo': TipoPlano.MEDIUM_CLOSEUP,
        'descripcion': 'Pecho hacia arriba',
        'y_offset': -0.10,
        'gesto_manual': '5 dedos (mano abierta) üñêÔ∏è',
        'deteccion_auto': 'Solo torso superior visible'
    },
    'CLOSEUP': {
        'zoom': 2.2,
        'nombre': 'Primer Plano',
        'tipo': TipoPlano.CLOSEUP,
        'descripcion': 'Cara y hombros',
        'y_offset': -0.08,
        'gesto_manual': 'Paz (V) ‚úåÔ∏è',
        'deteccion_auto': 'Cara cercana, hombros anchos'
    },
    'EXTREME_CLOSEUP': {
        'zoom': 2.8,
        'nombre': 'Primer√≠simo Plano',
        'tipo': TipoPlano.EXTREME_CLOSEUP,
        'descripcion': 'Solo rostro',
        'y_offset': -0.05,
        'gesto_manual': 'Rock and roll ü§ò',
        'deteccion_auto': 'Cara muy cercana ocupando frame'
    },
    'OVER_SHOULDER': {
        'zoom': 1.6,
        'nombre': 'Sobre el Hombro',
        'tipo': TipoPlano.OVER_SHOULDER,
        'descripcion': 'Desde hombro lateral',
        'y_offset': -0.08,
        'gesto_manual': 'N/A (solo AUTO)',
        'deteccion_auto': 'Perfil con hombro prominente'
    },
    'BACK_SHOT': {
        'zoom': 1.2,
        'nombre': 'Plano de Espaldas',
        'tipo': TipoPlano.BACK_SHOT,
        'descripcion': 'Vista posterior completa',
        'y_offset': 0.0,
        'gesto_manual': 'N/A (solo AUTO)',
        'deteccion_auto': 'Persona de espaldas detectada'
    },
    'LOW_ANGLE': {
        'zoom': 1.3,
        'nombre': 'Contrapicado',
        'tipo': TipoPlano.LOW_ANGLE,
        'descripcion': 'Desde abajo mirando arriba',
        'y_offset': 0.1,
        'gesto_manual': 'N/A (solo AUTO)',
        'deteccion_auto': 'Persona en parte inferior del frame'
    },
    'HIGH_ANGLE': {
        'zoom': 1.3,
        'nombre': 'Picado',
        'tipo': TipoPlano.HIGH_ANGLE,
        'descripcion': 'Desde arriba mirando abajo',
        'y_offset': -0.15,
        'gesto_manual': 'N/A (solo AUTO)',
        'deteccion_auto': 'Persona en parte superior del frame'
    },
    'TWO_SHOT': {
        'zoom': 1.2,
        'nombre': 'Dos Personas',
        'tipo': TipoPlano.TWO_SHOT,
        'descripcion': 'Encuadre para 2 personas',
        'y_offset': 0.0,
        'gesto_manual': 'N/A (solo AUTO)',
        'deteccion_auto': '2 personas detectadas'
    },
    'GROUP_SHOT': {
        'zoom': 0.8,
        'nombre': 'Grupo',
        'tipo': TipoPlano.GROUP_SHOT,
        'descripcion': 'Encuadre para 3+ personas',
        'y_offset': 0.0,
        'gesto_manual': 'N/A (solo AUTO)',
        'deteccion_auto': '3+ personas detectadas'
    }
}


def print_resumen_planos():
    print("\n" + "=" * 80)
    print("üìã SISTEMA DE C√ÅMARA INTELIGENTE - MULTI-PERSONA")
    print("=" * 80)
    
    print("\nüé≠ MODO MANUAL (Gestos de mano):")
    print("-" * 80)
    for key, plano in PLANOS.items():
        if plano['gesto_manual'] != 'N/A (solo AUTO)':
            print(f"  {plano['nombre']:25} ‚Üí {plano['gesto_manual']}")
    
    print("\nü§ñ MODO AUTO (Detecci√≥n autom√°tica):")
    print("-" * 80)
    print("  ‚Ä¢ 1 persona: Planos normales seg√∫n distancia")
    print("  ‚Ä¢ 2 personas: Two-Shot o Over-Shoulder")
    print("  ‚Ä¢ 3+ personas: Group Shot autom√°tico")
    
    print("\nüë• MULTI-PERSONA:")
    print("-" * 80)
    print("  '1/2/3/4' - Cambiar foco a persona 1/2/3/4")
    print("  'a' - Auto: encuadrar todas las personas")
    print("  'p' - Split screen (experimental)")
    
    print("\n‚å®Ô∏è  CONTROLES:")
    print("-" * 80)
    print("  'm' - Cambiar entre AUTO/MANUAL")
    print("  'h' - HOLD: Congelar plano actual")
    print("  's' - Capturar screenshot")
    print("  'g' - Mostrar/ocultar grid")
    print("  'c' - Cambiar c√°mara")
    print("  'r' - Reset sistema")
    print("  ESC - Salir")
    
    print("\n" + "=" * 80 + "\n")


# ==================== DETECCI√ìN MULTI-PERSONA ====================
@dataclass
class Person:
    id: int
    pose_landmarks: any
    face_box: tuple  # (x, y, w, h)
    center: tuple  # (x, y)
    confidence: float
    last_seen: float


class MultiPersonTracker:
    def __init__(self, max_persons=4):
        self.max_persons = max_persons
        self.persons = []
        self.next_id = 0
        self.iou_threshold = 0.3
        self.max_age = 1.0  # segundos
    
    def update(self, face_detections, pose_landmarks, current_time):
        """Actualiza tracking de personas detectadas"""
        detected_persons = []
        
        # Detectar caras como proxy para personas
        if face_detections and face_detections.detections:
            for i, detection in enumerate(face_detections.detections[:self.max_persons]):
                bbox = detection.location_data.relative_bounding_box
                x = bbox.xmin
                y = bbox.ymin
                w = bbox.width
                h = bbox.height
                
                center_x = x + w / 2
                center_y = y + h / 2
                
                person = Person(
                    id=self.next_id,
                    pose_landmarks=pose_landmarks if i == 0 else None,
                    face_box=(x, y, w, h),
                    center=(center_x, center_y),
                    confidence=detection.score[0],
                    last_seen=current_time
                )
                detected_persons.append(person)
                self.next_id += 1
        
        # Limpiar personas viejas
        self.persons = [p for p in self.persons if current_time - p.last_seen < self.max_age]
        
        # Actualizar o a√±adir
        if detected_persons:
            self.persons = detected_persons
        
        return self.persons
    
    def get_person_count(self):
        return len(self.persons)
    
    def get_person(self, index):
        if 0 <= index < len(self.persons):
            return self.persons[index]
        return None
    
    def get_all_centers(self):
        return [p.center for p in self.persons]
    
    def get_bounding_box_all(self):
        """Calcula el bounding box que contiene a todas las personas"""
        if not self.persons:
            return (0.5, 0.5, 0.5, 0.5)
        
        min_x = min(p.face_box[0] for p in self.persons)
        min_y = min(p.face_box[1] for p in self.persons)
        max_x = max(p.face_box[0] + p.face_box[2] for p in self.persons)
        max_y = max(p.face_box[1] + p.face_box[3] for p in self.persons)
        
        center_x = (min_x + max_x) / 2
        center_y = (min_y + max_y) / 2
        width = max_x - min_x
        height = max_y - min_y
        
        return (center_x, center_y, width, height)


# ==================== GESTOS MANUALES ====================
def contar_dedos(hand_landmarks):
    dedos = 0
    tips_ids = [4, 8, 12, 16, 20]
    
    if hand_landmarks.landmark[tips_ids[0]].x < hand_landmarks.landmark[tips_ids[0] - 1].x:
        dedos += 1
    
    for i in range(1, 5):
        if hand_landmarks.landmark[tips_ids[i]].y < hand_landmarks.landmark[tips_ids[i] - 2].y:
            dedos += 1
    
    return dedos


def detectar_rock_and_roll(hand_landmarks):
    indice_up = hand_landmarks.landmark[8].y < hand_landmarks.landmark[6].y
    medio_down = hand_landmarks.landmark[12].y > hand_landmarks.landmark[10].y
    anular_down = hand_landmarks.landmark[16].y > hand_landmarks.landmark[14].y
    me√±ique_up = hand_landmarks.landmark[20].y < hand_landmarks.landmark[18].y
    
    return indice_up and medio_down and anular_down and me√±ique_up


def detectar_pulgar_abajo(hand_landmarks):
    pulgar_down = hand_landmarks.landmark[4].y > hand_landmarks.landmark[3].y
    otros_cerrados = all([
        hand_landmarks.landmark[8].y > hand_landmarks.landmark[6].y,
        hand_landmarks.landmark[12].y > hand_landmarks.landmark[10].y,
        hand_landmarks.landmark[16].y > hand_landmarks.landmark[14].y,
        hand_landmarks.landmark[20].y > hand_landmarks.landmark[18].y
    ])
    return pulgar_down and otros_cerrados


def detectar_gesto_paz(hand_landmarks):
    indice_up = hand_landmarks.landmark[8].y < hand_landmarks.landmark[6].y
    medio_up = hand_landmarks.landmark[12].y < hand_landmarks.landmark[10].y
    anular_down = hand_landmarks.landmark[16].y > hand_landmarks.landmark[14].y
    me√±ique_down = hand_landmarks.landmark[20].y > hand_landmarks.landmark[18].y
    return indice_up and medio_up and anular_down and me√±ique_down


def clasificar_gesto_manual(hand_landmarks):
    dedos = contar_dedos(hand_landmarks)
    
    if detectar_rock_and_roll(hand_landmarks):
        return 'EXTREME_CLOSEUP', "ü§ò"
    elif detectar_pulgar_abajo(hand_landmarks):
        return 'EXTREME_WIDE', "üëé"
    elif dedos == 0:
        return 'WIDE', "‚úä"
    elif detectar_gesto_paz(hand_landmarks):
        return 'CLOSEUP', "‚úåÔ∏è"
    elif dedos == 1:
        return 'FULL', "‚òùÔ∏è"
    elif dedos == 3:
        return 'MEDIUM', "ü§ü"
    elif dedos == 4:
        return 'COWBOY', "üññ"
    elif dedos == 5:
        return 'MEDIUM_CLOSEUP', "üñêÔ∏è"
    else:
        return 'WIDE', str(dedos)


# ==================== DETECCI√ìN AUTOM√ÅTICA MULTI-PERSONA ====================
class BodyPositionDetector:
    def __init__(self):
        self.history = deque(maxlen=3)
        self.last_plano = 'MEDIUM'
        self.debug_mode = True
    
    def detect_framing_multi(self, person_tracker, pose_landmarks, frame_height, frame_width):
        """Detecci√≥n de plano considerando m√∫ltiples personas"""
        num_persons = person_tracker.get_person_count()
        
        # Multi-persona
        if num_persons >= 3:
            self.last_plano = 'GROUP_SHOT'
            return 'GROUP_SHOT'
        elif num_persons == 2:
            self.last_plano = 'TWO_SHOT'
            return 'TWO_SHOT'
        
        # Una persona o ninguna - comportamiento normal
        return self.detect_framing(pose_landmarks, frame_height, frame_width)
    
    def detect_framing(self, pose_landmarks, frame_height, frame_width):
        if not pose_landmarks:
            return self.last_plano
        
        lm = pose_landmarks.landmark
        
        shoulder_width = abs(lm[11].x - lm[12].x)
        head_y = lm[0].y
        shoulder_y = (lm[11].y + lm[12].y) / 2
        
        hips_vis = min(lm[23].visibility, lm[24].visibility)
        knees_vis = min(lm[25].visibility, lm[26].visibility)
        ankles_vis = min(lm[27].visibility, lm[28].visibility)
        
        center_x = np.mean([lm[0].x, lm[11].x, lm[12].x])
        center_y = np.mean([lm[0].y, lm[11].y, lm[12].y])
        
        if ankles_vis > 0.3:
            ankle_y = (lm[27].y + lm[28].y) / 2
            body_span = abs(ankle_y - head_y)
        elif knees_vis > 0.3:
            knee_y = (lm[25].y + lm[26].y) / 2
            body_span = abs(knee_y - head_y)
        elif hips_vis > 0.4:
            hip_y = (lm[23].y + lm[24].y) / 2
            body_span = abs(hip_y - head_y)
        else:
            body_span = abs(shoulder_y - head_y)
        
        orientation = self._detect_orientation(lm)
        
        plano = self._classify_simple(
            shoulder_width, body_span, center_x, center_y,
            hips_vis, knees_vis, ankles_vis, orientation
        )
        
        self.history.append(plano)
        plano_final = self._smooth()
        self.last_plano = plano_final
        
        return plano_final
    
    def _detect_orientation(self, lm):
        left_shoulder_z = lm[11].z
        right_shoulder_z = lm[12].z
        nose_z = lm[0].z
        
        if nose_z < (left_shoulder_z + right_shoulder_z) / 2 - 0.1:
            return 'FRONTAL'
        elif nose_z > (left_shoulder_z + right_shoulder_z) / 2 + 0.1:
            return 'ESPALDAS'
        else:
            return 'PERFIL'
    
    def _classify_simple(self, shoulder_width, body_span, center_x, center_y,
                        hips_vis, knees_vis, ankles_vis, orientation):
        near_edge = center_x < 0.15 or center_x > 0.85
        near_top = center_y < 0.2
        near_bottom = center_y > 0.8
        
        if orientation == 'ESPALDAS':
            return 'BACK_SHOT'
        
        if near_top:
            return 'HIGH_ANGLE'
        elif near_bottom:
            return 'LOW_ANGLE'
        
        if near_edge:
            return 'EXTREME_WIDE'
        
        if shoulder_width > 0.50:
            return 'EXTREME_CLOSEUP'
        elif shoulder_width > 0.38:
            return 'CLOSEUP'
        elif shoulder_width > 0.28:
            if hips_vis < 0.3:
                return 'MEDIUM_CLOSEUP'
            else:
                return 'MEDIUM'
        elif shoulder_width > 0.20:
            if knees_vis > 0.3:
                return 'COWBOY'
            else:
                return 'MEDIUM'
        elif shoulder_width > 0.15:
            if ankles_vis > 0.3:
                return 'FULL'
            else:
                return 'COWBOY'
        elif shoulder_width > 0.10:
            return 'WIDE'
        else:
            return 'EXTREME_WIDE'
    
    def _smooth(self):
        if len(self.history) == 0:
            return 'MEDIUM'
        from collections import Counter
        counts = Counter(self.history)
        return counts.most_common(1)[0][0]


# ==================== ORIENTACI√ìN CORPORAL ====================
class BodyOrientation:
    def __init__(self):
        self.history = deque(maxlen=10)
    
    def calculate(self, pose_landmarks):
        if not pose_landmarks:
            return None
        
        lm = pose_landmarks.landmark
        
        left_shoulder = np.array([lm[11].x, lm[11].y, lm[11].z])
        right_shoulder = np.array([lm[12].x, lm[12].y, lm[12].z])
        left_hip = np.array([lm[23].x, lm[23].y, lm[23].z])
        right_hip = np.array([lm[24].x, lm[24].y, lm[24].z])
        
        shoulder_vec = right_shoulder - left_shoulder
        yaw = np.arctan2(shoulder_vec[2], shoulder_vec[0]) * 180 / np.pi
        roll = np.arctan2(shoulder_vec[1], shoulder_vec[0]) * 180 / np.pi
        
        torso_center = (left_shoulder + right_shoulder) / 2
        hip_center = (left_hip + right_hip) / 2
        torso_vec = torso_center - hip_center
        pitch = np.arctan2(torso_vec[2], torso_vec[1]) * 180 / np.pi
        
        orientation = {
            'yaw': yaw,
            'pitch': pitch,
            'roll': roll,
            'visibility': min(lm[11].visibility, lm[12].visibility)
        }
        
        self.history.append(orientation)
        return self._smooth()
    
    def _smooth(self):
        if not self.history:
            return None
        return {
            'yaw': np.mean([o['yaw'] for o in self.history]),
            'pitch': np.mean([o['pitch'] for o in self.history]),
            'roll': np.mean([o['roll'] for o in self.history]),
            'visibility': np.mean([o['visibility'] for o in self.history])
        }


# ==================== ENCUADRE ====================
@dataclass
class FrameTarget:
    x: float = 0.5
    y: float = 0.5
    zoom: float = 1.0


class SmoothFramer:
    def __init__(self, smoothing=0.15):
        self.current = FrameTarget()
        self.target = FrameTarget()
        self.smoothing = smoothing
    
    def update(self, center_x, center_y, zoom, y_offset=0.0):
        self.target.x = center_x
        self.target.y = center_y + y_offset
        self.target.zoom = zoom
        
        self.current.x += (self.target.x - self.current.x) * self.smoothing
        self.current.y += (self.target.y - self.current.y) * self.smoothing
        self.current.zoom += (self.target.zoom - self.current.zoom) * self.smoothing
        
        return self.current


def obtener_centro_seguimiento_multi(person_tracker, focused_person_id, pose_landmarks, 
                                     face_result, w, h, plano_actual):
    """Obtener centro de seguimiento considerando m√∫ltiples personas"""
    num_persons = person_tracker.get_person_count()
    
    # Si hay m√∫ltiples personas y no hay foco espec√≠fico, centrar en el grupo
    if num_persons > 1 and focused_person_id is None:
        bbox = person_tracker.get_bounding_box_all()
        return (bbox[0], bbox[1])
    
    # Si hay foco en una persona espec√≠fica
    if focused_person_id is not None:
        person = person_tracker.get_person(focused_person_id)
        if person:
            return person.center
    
    # Comportamiento normal para una persona
    return obtener_centro_seguimiento(pose_landmarks, face_result, w, h, plano_actual)


def obtener_centro_seguimiento(pose_landmarks, face_result, w, h, plano_actual):
    if not pose_landmarks:
        return (0.5, 0.5)
    
    lm = pose_landmarks.landmark
    
    if plano_actual in ['CLOSEUP', 'EXTREME_CLOSEUP']:
        nose = np.array([lm[0].x, lm[0].y])
        mouth_left = np.array([lm[9].x, lm[9].y])
        mouth_right = np.array([lm[10].x, lm[10].y])
        mouth_center = (mouth_left + mouth_right) / 2
        face_center = (nose + mouth_center) / 2
        return (float(face_center[0]), float(face_center[1]))
    
    elif plano_actual in ['MEDIUM', 'MEDIUM_CLOSEUP']:
        nose = np.array([lm[0].x, lm[0].y])
        shoulders = (np.array([lm[11].x, lm[11].y]) + np.array([lm[12].x, lm[12].y])) / 2
        center = (nose + shoulders * 2) / 3
        return (float(center[0]), float(center[1]))
    
    else:
        key_points = [lm[0], lm[11], lm[12], lm[23], lm[24]]
        center_x = np.mean([p.x for p in key_points])
        center_y = np.mean([p.y for p in key_points])
        return (float(center_x), float(center_y))


def aplicar_encuadre(frame, framer_state):
    h, w = frame.shape[:2]
    
    zoom = framer_state.zoom
    crop_w = int(w / zoom)
    crop_h = int(h / zoom)
    
    center_x = int(framer_state.x * w)
    center_y = int(framer_state.y * h)
    
    x1 = max(0, center_x - crop_w // 2)
    y1 = max(0, center_y - crop_h // 2)
    x2 = min(w, x1 + crop_w)
    y2 = min(h, y1 + crop_h)
    
    if x2 - x1 < crop_w:
        x1 = max(0, x2 - crop_w)
    if y2 - y1 < crop_h:
        y1 = max(0, y2 - crop_h)
    
    cropped = frame[y1:y2, x1:x2]
    if cropped.size == 0:
        return frame
    
    resized = cv2.resize(cropped, (w, h), interpolation=cv2.INTER_LINEAR)
    return resized


# ==================== SPLIT SCREEN ====================
def crear_split_screen(frame, person_tracker, framer):
    """Crea split screen con hasta 4 personas"""
    h, w = frame.shape[:2]
    num_persons = person_tracker.get_person_count()
    
    if num_persons == 0:
        return frame
    elif num_persons == 1:
        return frame
    elif num_persons == 2:
        # Split vertical
        split_frame = np.zeros((h, w, 3), dtype=np.uint8)
        half_w = w // 2
        
        for i in range(2):
            person = person_tracker.get_person(i)
            if person:
                temp_framer = SmoothFramer()
                temp_framer.update(person.center[0], person.center[1], 1.5, 0)
                person_crop = aplicar_encuadre(frame.copy(), temp_framer.current)
                person_crop_resized = cv2.resize(person_crop, (half_w, h))
                split_frame[:, i*half_w:(i+1)*half_w] = person_crop_resized
                
        cv2.line(split_frame, (half_w, 0), (half_w, h), (255, 255, 255), 2)
        return split_frame
    
    elif num_persons == 3:
        # Grid 2x2 (usando 3 cuadrantes)
        split_frame = np.zeros((h, w, 3), dtype=np.uint8)
        half_w = w // 2
        half_h = h // 2
        
        positions = [(0, 0), (half_w, 0), (0, half_h)]
        
        for i in range(3):
            person = person_tracker.get_person(i)
            if person:
                temp_framer = SmoothFramer()
                temp_framer.update(person.center[0], person.center[1], 1.8, 0)
                person_crop = aplicar_encuadre(frame.copy(), temp_framer.current)
                person_crop_resized = cv2.resize(person_crop, (half_w, half_h))
                
                x, y = positions[i]
                split_frame[y:y+half_h, x:x+half_w] = person_crop_resized
        
        cv2.line(split_frame, (half_w, 0), (half_w, h), (255, 255, 255), 2)
        cv2.line(split_frame, (0, half_h), (w, half_h), (255, 255, 255), 2)
        return split_frame
    
    else:  # 4 personas
        # Grid 2x2
        split_frame = np.zeros((h, w, 3), dtype=np.uint8)
        half_w = w // 2
        half_h = h // 2
        
        positions = [(0, 0), (half_w, 0), (0, half_h), (half_w, half_h)]
        
        for i in range(min(4, num_persons)):
            person = person_tracker.get_person(i)
            if person:
                temp_framer = SmoothFramer()
                temp_framer.update(person.center[0], person.center[1], 1.8, 0)
                person_crop = aplicar_encuadre(frame.copy(), temp_framer.current)
                person_crop_resized = cv2.resize(person_crop, (half_w, half_h))
                
                x, y = positions[i]
                split_frame[y:y+half_h, x:x+half_w] = person_crop_resized
        
        cv2.line(split_frame, (half_w, 0), (half_w, h), (255, 255, 255), 2)
        cv2.line(split_frame, (0, half_h), (w, half_h), (255, 255, 255), 2)
        return split_frame


# ==================== CAPTURA DE SCREENSHOTS ====================
def guardar_screenshot(frame, plano_actual):
    if not os.path.exists('screenshots'):
        os.makedirs('screenshots')
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    plano_nombre = PLANOS[plano_actual]['nombre'].replace(' ', '_')
    filename = f"screenshots/shot_{timestamp}_{plano_nombre}.png"
    
    cv2.imwrite(filename, frame)
    print(f"üì∏ Screenshot: {filename}")
    
    return filename


# ==================== VISUALIZACI√ìN ====================
def dibujar_landmarks_multi(frame, pose_results, hands_results, person_tracker):
    """Dibuja landmarks y marca personas detectadas"""
    # Pose
    if pose_results and pose_results.pose_landmarks:
        mp_drawing.draw_landmarks(
            frame,
            pose_results.pose_landmarks,
            mp_pose.POSE_CONNECTIONS,
            landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
        )
    
    # Manos
    if hands_results and hands_results.multi_hand_landmarks:
        for hand_landmarks in hands_results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style()
            )
    
    # Marcar personas detectadas
    h, w = frame.shape[:2]
    for i, person in enumerate(person_tracker.persons):
        x, y, pw, ph = person.face_box
        x1 = int(x * w)
        y1 = int(y * h)
        x2 = int((x + pw) * w)
        y2 = int((y + ph) * h)
        
        color = [(0, 255, 0), (255, 0, 0), (0, 0, 255), (255, 255, 0)][i % 4]
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, f"P{i+1}", (x1, y1-10),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
    
    return frame


def crear_panel_control(modo_control, plano_actual, plano_auto, orientation, fps, 
                       num_cameras, shoulder_width, hold_mode, show_grid, 
                       person_tracker, focused_person_id, split_screen_mode):
    panel = np.zeros((1080, 400, 3), dtype=np.uint8)
    y_offset = 30
    
    cv2.putText(panel, "PANEL DE CONTROL", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 255), 2)
    y_offset += 50
    
    cv2.putText(panel, f"FPS: {fps:.1f}", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
    y_offset += 30
    
    cv2.putText(panel, f"Camaras: {num_cameras}", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 200, 100), 2)
    y_offset += 40
    
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    # MULTI-PERSONA INFO
    num_persons = person_tracker.get_person_count()
    cv2.putText(panel, f"üë• PERSONAS: {num_persons}", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 150, 0), 2)
    y_offset += 30
    
    if focused_person_id is not None:
        cv2.putText(panel, f"Foco: Persona {focused_person_id + 1}", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1)
    else:
        cv2.putText(panel, "Foco: Todas", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
    y_offset += 25
    
    if split_screen_mode:
        cv2.putText(panel, "SPLIT SCREEN: ON", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
    y_offset += 35
    
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    # MODO + HOLD STATUS
    color_modo = (0, 255, 255) if modo_control == 'AUTO' else (255, 100, 255)
    if hold_mode:
        modo_text = f"MODO: {modo_control} LOCKED"
        color_modo = (0, 165, 255)
    else:
        modo_text = f"MODO: {modo_control}"
    
    cv2.putText(panel, modo_text, (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, color_modo, 2)
    y_offset += 40
    
    # PLANO ACTIVO
    cv2.putText(panel, "PLANO ACTIVO:", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 200, 255), 2)
    y_offset += 28
    plano_info = PLANOS[plano_actual]
    cv2.putText(panel, plano_info['nombre'], (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 1)
    y_offset += 22
    cv2.putText(panel, f"({plano_info['descripcion']})", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.45, (180, 180, 180), 1)
    y_offset += 30
    
    if plano_auto != plano_actual and not hold_mode:
        cv2.putText(panel, f"Auto: {PLANOS[plano_auto]['nombre']}", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45, (100, 150, 255), 1)
        y_offset += 25
    
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    # CONTROLES MULTI-PERSONA
    cv2.putText(panel, "MULTI-PERSONA:", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 200, 255), 2)
    y_offset += 28
    
    controles_multi = [
        "'1/2/3/4' - Foco persona",
        "'a' - Todas personas",
        "'p' - Split screen"
    ]
    for ctrl in controles_multi:
        cv2.putText(panel, ctrl, (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.48, (200, 200, 200), 1)
        y_offset += 22
    
    y_offset += 10
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    # CONTROLES GENERALES
    cv2.putText(panel, "CONTROLES:", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 200, 255), 2)
    y_offset += 28
    
    controles = [
        "'m' - AUTO/MANUAL",
        "'h' - HOLD/LOCK",
        "'s' - Screenshot",
        "'g' - Grid",
        "'r' - Reset",
        "'c' - Cambiar camara",
        "ESC - Salir"
    ]
    for ctrl in controles:
        cv2.putText(panel, ctrl, (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.48, (200, 200, 200), 1)
        y_offset += 22
    
    y_offset += 10
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    # ESTADO
    cv2.putText(panel, "ESTADO:", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 200, 255), 2)
    y_offset += 28
    
    lock_status = "ON" if hold_mode else "OFF"
    lock_color = (0, 255, 0) if hold_mode else (100, 100, 100)
    cv2.putText(panel, f"Hold: {lock_status}", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, lock_color, 1)
    y_offset += 24
    
    grid_status = "ON" if show_grid else "OFF"
    grid_color = (0, 255, 0) if show_grid else (100, 100, 100)
    cv2.putText(panel, f"Grid: {grid_status}", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, grid_color, 1)
    
    return panel


def dibujar_info_deteccion(frame, camera_name):
    h, w = frame.shape[:2]
    cv2.putText(frame, f"DETECCION: {camera_name}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
    cv2.putText(frame, "Multi-Persona Tracking", (10, 60),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 1)
    return frame


def dibujar_info_resultado(frame, show_grid):
    """RESULTADO LIMPIO - SOLO GRID"""
    h, w = frame.shape[:2]
    
    if show_grid:
        color_grid = (80, 80, 80)
        cv2.line(frame, (w//3, 0), (w//3, h), color_grid, 1)
        cv2.line(frame, (2*w//3, 0), (2*w//3, h), color_grid, 1)
        cv2.line(frame, (0, h//3), (w, h//3), color_grid, 1)
        cv2.line(frame, (0, 2*h//3), (w, 2*h//3), color_grid, 1)
    
    return frame


# ==================== MAIN ====================
def main():
    print_resumen_planos()
    
    # Intentar detectar hasta 3 c√°maras
    cameras = []
    for i in range(3):
        cap = cv2.VideoCapture(i)
        if cap.isOpened():
            cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
            cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
            cameras.append((f'Camara {i+1}', cap))
            print(f"‚úÖ C√°mara {i+1} detectada")
        else:
            cap.release()
    
    if not cameras:
        print("‚ùå No se detectaron c√°maras")
        return
    
    # Inicializaci√≥n
    person_tracker = MultiPersonTracker(max_persons=4)
    orientation_tracker = BodyOrientation()
    body_position_detector = BodyPositionDetector()
    framers = [SmoothFramer(smoothing=0.15) for _ in cameras]
    fps_history = deque(maxlen=30)
    
    plano_actual = 'MEDIUM'
    plano_auto = 'MEDIUM'
    modo_control = 'AUTO'
    camera_activa = 0
    shoulder_width_debug = 0
    
    hold_mode = False
    show_grid = True
    focused_person_id = None
    split_screen_mode = False
    
    print(f"üé¨ Sistema iniciado - {len(cameras)} c√°mara(s)")
    print("‚ñ∂Ô∏è  Listo\n")
    
    cv2.namedWindow('CONTROL', cv2.WINDOW_NORMAL)
    cv2.namedWindow('DETECCION', cv2.WINDOW_NORMAL)
    cv2.namedWindow('RESULTADO', cv2.WINDOW_NORMAL)
    
    cv2.resizeWindow('CONTROL', 400, 1080)
    cv2.moveWindow('CONTROL', 0, 0)
    
    cv2.resizeWindow('DETECCION', 1280, 540)
    cv2.moveWindow('DETECCION', 420, 0)
    
    cv2.resizeWindow('RESULTADO', 1280, 540)
    cv2.moveWindow('RESULTADO', 420, 540)
    
    while True:
        start_time = time.time()
        
        camera_name, cap = cameras[camera_activa]
        ret, frame = cap.read()
        
        if not ret:
            break
        
        frame = cv2.flip(frame, 1)
        h, w = frame.shape[:2]
        
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        pose_results = pose.process(rgb_frame)
        hands_results = hands.process(rgb_frame)
        face_results = face_detection.process(rgb_frame)
        
        current_time = time.time()
        person_tracker.update(face_results, pose_results.pose_landmarks, current_time)
        
        frame_deteccion = frame.copy()
        frame_deteccion = dibujar_landmarks_multi(frame_deteccion, pose_results, 
                                                   hands_results, person_tracker)
        
        plano_auto = body_position_detector.detect_framing_multi(
            person_tracker, pose_results.pose_landmarks, h, w
        )
        
        if pose_results.pose_landmarks:
            lm = pose_results.pose_landmarks.landmark
            shoulder_width_debug = abs(lm[11].x - lm[12].x)
        
        if hands_results and hands_results.multi_hand_landmarks:
            for hand_landmarks in hands_results.multi_hand_landmarks:
                plano_manual, gesto_emoji = clasificar_gesto_manual(hand_landmarks)
                if modo_control == 'MANUAL' and not hold_mode:
                    plano_actual = plano_manual
        
        if modo_control == 'AUTO' and not hold_mode:
            plano_actual = plano_auto
        
        orientation = orientation_tracker.calculate(pose_results.pose_landmarks)
        
        centro = obtener_centro_seguimiento_multi(
            person_tracker, focused_person_id, pose_results.pose_landmarks, 
            face_results, w, h, plano_actual
        )
        
        zoom_factor = PLANOS[plano_actual]['zoom']
        y_offset = PLANOS[plano_actual]['y_offset']
        framer_state = framers[camera_activa].update(centro[0], centro[1], zoom_factor, y_offset)
        
        frame_resultado = frame.copy()
        
        if split_screen_mode and person_tracker.get_person_count() > 1:
            frame_resultado = crear_split_screen(frame, person_tracker, framers[camera_activa])
        else:
            frame_resultado = aplicar_encuadre(frame_resultado, framer_state)
        
        fps = 1.0 / (time.time() - start_time)
        fps_history.append(fps)
        avg_fps = np.mean(fps_history)
        
        frame_deteccion = dibujar_info_deteccion(frame_deteccion, camera_name)
        frame_resultado = dibujar_info_resultado(frame_resultado, show_grid)
        
        panel_control = crear_panel_control(
            modo_control, plano_actual, plano_auto, orientation, 
            avg_fps, len(cameras), shoulder_width_debug, hold_mode, show_grid,
            person_tracker, focused_person_id, split_screen_mode
        )
        
        cv2.imshow('CONTROL', panel_control)
        cv2.imshow('DETECCION', frame_deteccion)
        cv2.imshow('RESULTADO', frame_resultado)
        
        key = cv2.waitKey(1) & 0xFF
        if key == 27:  # ESC
            print("\nüëã Cerrando...")
            break
        elif key == ord('r'):
            framers = [SmoothFramer(smoothing=0.15) for _ in cameras]
            body_position_detector = BodyPositionDetector()
            orientation_tracker = BodyOrientation()
            hold_mode = False
            focused_person_id = None
            split_screen_mode = False
            print("üîÑ Reset")
        elif key == ord('m'):
            modo_control = 'MANUAL' if modo_control == 'AUTO' else 'AUTO'
            print(f"üîÄ Modo: {modo_control}")
        elif key == ord('h'):
            hold_mode = not hold_mode
            status = "ON" if hold_mode else "OFF"
            print(f"üîí Hold: {status}")
        elif key == ord('s'):
            guardar_screenshot(frame_resultado, plano_actual)
        elif key == ord('g'):
            show_grid = not show_grid
            print(f"üìê Grid: {'ON' if show_grid else 'OFF'}")
        elif key == ord('c') and len(cameras) > 1:
            camera_activa = (camera_activa + 1) % len(cameras)
            print(f"üìπ {cameras[camera_activa][0]}")
        elif key == ord('1'):
            focused_person_id = 0
            print(f"üë§ Foco: Persona 1")
        elif key == ord('2'):
            focused_person_id = 1
            print(f"üë§ Foco: Persona 2")
        elif key == ord('3'):
            focused_person_id = 2
            print(f"üë§ Foco: Persona 3")
        elif key == ord('4'):
            focused_person_id = 3
            print(f"üë§ Foco: Persona 4")
        elif key == ord('a'):
            focused_person_id = None
            print(f"üë• Foco: Todas las personas")
        elif key == ord('p'):
            split_screen_mode = not split_screen_mode
            status = "ON" if split_screen_mode else "OFF"
            print(f"üì∫ Split Screen: {status}")
    
    for _, cap in cameras:
        cap.release()
    cv2.destroyAllWindows()
    pose.close()
    hands.close()
    face_detection.close()
    
    print("‚úÖ Sistema cerrado")


if __name__ == "__main__":
    main()



üìã SISTEMA DE C√ÅMARA INTELIGENTE - MULTI-PERSONA

üé≠ MODO MANUAL (Gestos de mano):
--------------------------------------------------------------------------------
  Plano General Extremo     ‚Üí Pulgar abajo üëé
  Plano General             ‚Üí Pu√±o cerrado ‚úä
  Plano Entero              ‚Üí 1 dedo ‚òùÔ∏è
  Plano Americano           ‚Üí 4 dedos üññ
  Plano Medio               ‚Üí 3 dedos ü§ü
  Plano Medio Corto         ‚Üí 5 dedos (mano abierta) üñêÔ∏è
  Primer Plano              ‚Üí Paz (V) ‚úåÔ∏è
  Primer√≠simo Plano         ‚Üí Rock and roll ü§ò

ü§ñ MODO AUTO (Detecci√≥n autom√°tica):
--------------------------------------------------------------------------------
  ‚Ä¢ 1 persona: Planos normales seg√∫n distancia
  ‚Ä¢ 2 personas: Two-Shot o Over-Shoulder
  ‚Ä¢ 3+ personas: Group Shot autom√°tico

üë• MULTI-PERSONA:
--------------------------------------------------------------------------------
  '1/2/3/4' - Cambiar foco a persona 1/2/3/4
  'a' - Auto: encuadrar tod