In [1]:
import cv2
import mediapipe as mp
import numpy as np
from datetime import datetime

# Inicializar MediaPipe Pose y Hands
mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

# Configurar detectores
pose = mp_pose.Pose(
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5,
    model_complexity=1  # 0=ligero, 1=medio, 2=pesado
)

hands = mp_hands.Hands(
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5,
    max_num_hands=2
)

# Captura de video
cap = cv2.VideoCapture(0)  # 0 = webcam por defecto

print("Presiona 'q' para salir")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # Convertir BGR a RGB
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    
    # Detectar pose y manos
    pose_results = pose.process(image)
    hands_results = hands.process(image)
    
    # Volver a BGR para dibujar
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    # Dibujar landmarks de pose
    if pose_results.pose_landmarks:
        mp_drawing.draw_landmarks(
            image,
            pose_results.pose_landmarks,
            mp_pose.POSE_CONNECTIONS,
            landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
        )
    
    # Dibujar landmarks de manos
    if hands_results.multi_hand_landmarks:
        for hand_landmarks in hands_results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                image,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style()
            )
    
    # Mostrar FPS
    cv2.putText(image, f'FPS: {int(cap.get(cv2.CAP_PROP_FPS))}', 
                (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    cv2.imshow('MediaPipe Pose + Hands', image)
    
    if cv2.waitKey(5) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
pose.close()
hands.close()


Presiona 'q' para salir


In [2]:
def extract_pose_data(pose_landmarks):
    """Extrae coordenadas de landmarks de pose"""
    if not pose_landmarks:
        return None
    
    landmarks = []
    for landmark in pose_landmarks.landmark:
        landmarks.append({
            'x': landmark.x,
            'y': landmark.y,
            'z': landmark.z,
            'visibility': landmark.visibility
        })
    return landmarks

def extract_hand_data(hands_results):
    """Extrae coordenadas y clasificaci√≥n de manos"""
    if not hands_results.multi_hand_landmarks:
        return None
    
    hands_data = []
    for idx, hand_landmarks in enumerate(hands_results.multi_hand_landmarks):
        hand_info = {
            'handedness': hands_results.multi_handedness[idx].classification[0].label,  # "Left" o "Right"
            'score': hands_results.multi_handedness[idx].classification[0].score,
            'landmarks': []
        }
        
        for landmark in hand_landmarks.landmark:
            hand_info['landmarks'].append({
                'x': landmark.x,
                'y': landmark.y,
                'z': landmark.z
            })
        
        hands_data.append(hand_info)
    
    return hands_data

# Testing extracci√≥n
print("Ejemplo de landmarks de pose:", extract_pose_data(pose_results.pose_landmarks)[:3])
print("Ejemplo de landmarks de manos:", extract_hand_data(hands_results))


Ejemplo de landmarks de pose: [{'x': 0.6356474757194519, 'y': 0.5357095003128052, 'z': -1.2878602743148804, 'visibility': 0.9997855424880981}, {'x': 0.653036892414093, 'y': 0.45773619413375854, 'z': -1.1983544826507568, 'visibility': 0.9996163845062256}, {'x': 0.6700222492218018, 'y': 0.45641347765922546, 'z': -1.1983556747436523, 'visibility': 0.9996852874755859}]
Ejemplo de landmarks de manos: None


In [2]:
import cv2
import mediapipe as mp
import numpy as np
from collections import deque
from dataclasses import dataclass
from enum import Enum
import time


# ==================== INICIALIZACI√ìN MEDIAPIPE ====================
mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands
mp_face = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles


# Configuraci√≥n con MEJOR ESTABILIDAD
pose = mp_pose.Pose(
    static_image_mode=False,
    model_complexity=1,
    smooth_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.7,  # AUMENTADO para m√°s estabilidad
    enable_segmentation=False
)

hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.7,  # AUMENTADO
    min_tracking_confidence=0.6     # AUMENTADO
)

face_detection = mp_face.FaceDetection(
    min_detection_confidence=0.5
)


# ==================== SISTEMA DE PLANOS ====================
class TipoPlano(Enum):
    EXTREME_WIDE = 1      
    WIDE = 2              
    FULL = 3              
    COWBOY = 4            
    MEDIUM = 5            
    MEDIUM_CLOSEUP = 6    
    CLOSEUP = 7           
    EXTREME_CLOSEUP = 8
    OVER_SHOULDER = 9
    BACK_SHOT = 10
    LOW_ANGLE = 11
    HIGH_ANGLE = 12


PLANOS = {
    'EXTREME_WIDE': {
        'zoom': 0.7,
        'nombre': 'Plano General Extremo',
        'tipo': TipoPlano.EXTREME_WIDE,
        'descripcion': 'Contexto completo + entorno',
        'y_offset': 0.05,
        'gesto_manual': 'Pulgar abajo üëé',
        'deteccion_auto': 'Persona muy lejos o en borde del frame'
    },
    'WIDE': {
        'zoom': 0.9,
        'nombre': 'Plano General',
        'tipo': TipoPlano.WIDE,
        'descripcion': 'Cuerpo completo + entorno',
        'y_offset': 0.0,
        'gesto_manual': 'Pu√±o cerrado ‚úä',
        'deteccion_auto': 'Cuerpo completo visible con espacio'
    },
    'FULL': {
        'zoom': 1.0,
        'nombre': 'Plano Entero',
        'tipo': TipoPlano.FULL,
        'descripcion': 'De pies a cabeza',
        'y_offset': 0.0,
        'gesto_manual': '1 dedo ‚òùÔ∏è',
        'deteccion_auto': 'Pies y cabeza visibles ajustados'
    },
    'COWBOY': {
        'zoom': 1.3,
        'nombre': 'Plano Americano',
        'tipo': TipoPlano.COWBOY,
        'descripcion': 'Desde rodillas',
        'y_offset': -0.05,
        'gesto_manual': '4 dedos üññ',
        'deteccion_auto': 'Visible desde rodillas hacia arriba'
    },
    'MEDIUM': {
        'zoom': 1.5,
        'nombre': 'Plano Medio',
        'tipo': TipoPlano.MEDIUM,
        'descripcion': 'Cintura hacia arriba',
        'y_offset': -0.08,
        'gesto_manual': '3 dedos ü§ü',
        'deteccion_auto': 'Visible desde cintura/caderas'
    },
    'MEDIUM_CLOSEUP': {
        'zoom': 1.8,
        'nombre': 'Plano Medio Corto',
        'tipo': TipoPlano.MEDIUM_CLOSEUP,
        'descripcion': 'Pecho hacia arriba',
        'y_offset': -0.10,
        'gesto_manual': '5 dedos (mano abierta) üñêÔ∏è',
        'deteccion_auto': 'Solo torso superior visible'
    },
    'CLOSEUP': {
        'zoom': 2.2,
        'nombre': 'Primer Plano',
        'tipo': TipoPlano.CLOSEUP,
        'descripcion': 'Cara y hombros',
        'y_offset': -0.08,
        'gesto_manual': 'Paz (V) ‚úåÔ∏è',
        'deteccion_auto': 'Cara cercana, hombros anchos'
    },
    'EXTREME_CLOSEUP': {
        'zoom': 2.8,
        'nombre': 'Primer√≠simo Plano',
        'tipo': TipoPlano.EXTREME_CLOSEUP,
        'descripcion': 'Solo rostro',
        'y_offset': -0.05,
        'gesto_manual': 'Pulgar arriba üëç',
        'deteccion_auto': 'Cara muy cercana ocupando frame'
    },
    'OVER_SHOULDER': {
        'zoom': 1.6,
        'nombre': 'Sobre el Hombro',
        'tipo': TipoPlano.OVER_SHOULDER,
        'descripcion': 'Desde hombro lateral',
        'y_offset': -0.08,
        'gesto_manual': 'N/A (solo AUTO)',
        'deteccion_auto': 'Perfil con hombro prominente'
    },
    'BACK_SHOT': {
        'zoom': 1.2,
        'nombre': 'Plano de Espaldas',
        'tipo': TipoPlano.BACK_SHOT,
        'descripcion': 'Vista posterior completa',
        'y_offset': 0.0,
        'gesto_manual': 'N/A (solo AUTO)',
        'deteccion_auto': 'Persona de espaldas detectada'
    },
    'LOW_ANGLE': {
        'zoom': 1.3,
        'nombre': 'Contrapicado',
        'tipo': TipoPlano.LOW_ANGLE,
        'descripcion': 'Desde abajo mirando arriba',
        'y_offset': 0.1,
        'gesto_manual': 'N/A (solo AUTO)',
        'deteccion_auto': 'Persona en parte inferior del frame'
    },
    'HIGH_ANGLE': {
        'zoom': 1.3,
        'nombre': 'Picado',
        'tipo': TipoPlano.HIGH_ANGLE,
        'descripcion': 'Desde arriba mirando abajo',
        'y_offset': -0.15,
        'gesto_manual': 'N/A (solo AUTO)',
        'deteccion_auto': 'Persona en parte superior del frame'
    }
}


def print_resumen_planos():
    print("\n" + "=" * 80)
    print("üìã RESUMEN DE PLANOS CINEMATOGR√ÅFICOS")
    print("=" * 80)
    
    print("\nüé≠ MODO MANUAL (Gestos de mano):")
    print("-" * 80)
    for key, plano in PLANOS.items():
        if plano['gesto_manual'] != 'N/A (solo AUTO)':
            print(f"  {plano['nombre']:25} ‚Üí {plano['gesto_manual']}")
    
    print("\nü§ñ MODO AUTO (Detecci√≥n autom√°tica):")
    print("-" * 80)
    print("  ‚Ä¢ Ac√©rcate a la c√°mara ‚Üí Primer Plano / Extremo")
    print("  ‚Ä¢ Al√©jate de la c√°mara ‚Üí Plano Entero / General")
    print("  ‚Ä¢ Mu√©vete al borde ‚Üí General Extremo")
    print("  ‚Ä¢ G√≠rate de espaldas ‚Üí Plano de Espaldas")
    
    print("\nüìπ SISTEMA MULTI-C√ÅMARA:")
    print("-" * 80)
    print("  ‚Ä¢ Presiona 'c' para cambiar entre c√°maras disponibles")
    
    print("\n" + "=" * 80 + "\n")


# ==================== GESTOS MANUALES ====================
def contar_dedos(hand_landmarks):
    dedos = 0
    tips_ids = [4, 8, 12, 16, 20]
    
    if hand_landmarks.landmark[tips_ids[0]].x < hand_landmarks.landmark[tips_ids[0] - 1].x:
        dedos += 1
    
    for i in range(1, 5):
        if hand_landmarks.landmark[tips_ids[i]].y < hand_landmarks.landmark[tips_ids[i] - 2].y:
            dedos += 1
    
    return dedos


def detectar_pulgar_arriba(hand_landmarks):
    pulgar_up = hand_landmarks.landmark[4].y < hand_landmarks.landmark[3].y
    otros_cerrados = all([
        hand_landmarks.landmark[8].y > hand_landmarks.landmark[6].y,
        hand_landmarks.landmark[12].y > hand_landmarks.landmark[10].y,
        hand_landmarks.landmark[16].y > hand_landmarks.landmark[14].y,
        hand_landmarks.landmark[20].y > hand_landmarks.landmark[18].y
    ])
    return pulgar_up and otros_cerrados


def detectar_pulgar_abajo(hand_landmarks):
    pulgar_down = hand_landmarks.landmark[4].y > hand_landmarks.landmark[3].y
    otros_cerrados = all([
        hand_landmarks.landmark[8].y > hand_landmarks.landmark[6].y,
        hand_landmarks.landmark[12].y > hand_landmarks.landmark[10].y,
        hand_landmarks.landmark[16].y > hand_landmarks.landmark[14].y,
        hand_landmarks.landmark[20].y > hand_landmarks.landmark[18].y
    ])
    return pulgar_down and otros_cerrados


def detectar_gesto_paz(hand_landmarks):
    indice_up = hand_landmarks.landmark[8].y < hand_landmarks.landmark[6].y
    medio_up = hand_landmarks.landmark[12].y < hand_landmarks.landmark[10].y
    anular_down = hand_landmarks.landmark[16].y > hand_landmarks.landmark[14].y
    me√±ique_down = hand_landmarks.landmark[20].y > hand_landmarks.landmark[18].y
    return indice_up and medio_up and anular_down and me√±ique_down


def clasificar_gesto_manual(hand_landmarks):
    dedos = contar_dedos(hand_landmarks)
    
    if detectar_pulgar_arriba(hand_landmarks):
        return 'EXTREME_CLOSEUP', "üëç"
    elif detectar_pulgar_abajo(hand_landmarks):
        return 'EXTREME_WIDE', "üëé"
    elif dedos == 0:
        return 'WIDE', "‚úä"
    elif detectar_gesto_paz(hand_landmarks):
        return 'CLOSEUP', "‚úåÔ∏è"
    elif dedos == 1:
        return 'FULL', "‚òùÔ∏è"
    elif dedos == 3:
        return 'MEDIUM', "ü§ü"
    elif dedos == 4:
        return 'COWBOY', "üññ"
    elif dedos == 5:
        return 'MEDIUM_CLOSEUP', "üñêÔ∏è"
    else:
        return 'WIDE', str(dedos)


# ==================== DETECCI√ìN AUTOM√ÅTICA MEJORADA ====================
class BodyPositionDetector:
    """Detecta plano autom√°ticamente - VERSI√ìN ARREGLADA"""
    def __init__(self):
        self.history = deque(maxlen=3)  # MUY corto para respuesta r√°pida
        self.last_plano = 'MEDIUM'
        self.debug_mode = True  # Para ver qu√© est√° detectando
    
    def detect_framing(self, pose_landmarks, frame_height, frame_width):
        if not pose_landmarks:
            return self.last_plano
        
        lm = pose_landmarks.landmark
        
        # === CALCULAR M√âTRICAS B√ÅSICAS ===
        # Ancho de hombros (indicador principal de distancia)
        shoulder_width = abs(lm[11].x - lm[12].x)
        
        # Altura de la persona en el frame
        head_y = lm[0].y
        shoulder_y = (lm[11].y + lm[12].y) / 2
        
        # Visibilidad de partes del cuerpo
        hips_vis = min(lm[23].visibility, lm[24].visibility)
        knees_vis = min(lm[25].visibility, lm[26].visibility)
        ankles_vis = min(lm[27].visibility, lm[28].visibility)
        
        # Posici√≥n en el frame (para bordes)
        center_x = np.mean([lm[0].x, lm[11].x, lm[12].x])
        center_y = np.mean([lm[0].y, lm[11].y, lm[12].y])
        
        # Calcular body_span
        if ankles_vis > 0.3:
            ankle_y = (lm[27].y + lm[28].y) / 2
            body_span = abs(ankle_y - head_y)
        elif knees_vis > 0.3:
            knee_y = (lm[25].y + lm[26].y) / 2
            body_span = abs(knee_y - head_y)
        elif hips_vis > 0.4:
            hip_y = (lm[23].y + lm[24].y) / 2
            body_span = abs(hip_y - head_y)
        else:
            body_span = abs(shoulder_y - head_y)
        
        # Detectar orientaci√≥n
        orientation = self._detect_orientation(lm)
        
        # === CLASIFICACI√ìN SIMPLE Y DIRECTA ===
        plano = self._classify_simple(
            shoulder_width, body_span, center_x, center_y,
            hips_vis, knees_vis, ankles_vis, orientation
        )
        
        # Debug info (opcional)
        if self.debug_mode:
            debug_info = f"SW:{shoulder_width:.2f} BS:{body_span:.2f} H:{hips_vis:.1f} K:{knees_vis:.1f} A:{ankles_vis:.1f} ‚Üí {plano}"
            # print(debug_info)  # Descomentar si quieres ver debug en consola
        
        self.history.append(plano)
        plano_final = self._smooth()
        self.last_plano = plano_final
        
        return plano_final
    
    def _detect_orientation(self, lm):
        """Detecta si est√° de frente o de espaldas"""
        # Usar visibilidad de ojos vs hombros
        eyes_vis = (lm[2].visibility + lm[5].visibility) / 2
        
        # Z-axis de los hombros
        left_shoulder_z = lm[11].z
        right_shoulder_z = lm[12].z
        nose_z = lm[0].z
        
        # Si la nariz est√° m√°s cerca que los hombros = de frente
        # Si los hombros est√°n m√°s cerca = de espaldas
        if nose_z < (left_shoulder_z + right_shoulder_z) / 2 - 0.1:
            return 'FRONTAL'
        elif nose_z > (left_shoulder_z + right_shoulder_z) / 2 + 0.1:
            return 'ESPALDAS'
        else:
            return 'PERFIL'
    
    def _classify_simple(self, shoulder_width, body_span, center_x, center_y,
                        hips_vis, knees_vis, ankles_vis, orientation):
        """Clasificaci√≥n SIMPLE basada en shoulder_width principalmente"""
        
        # Detectar si est√° en el borde
        near_edge = center_x < 0.15 or center_x > 0.85
        near_top = center_y < 0.2
        near_bottom = center_y > 0.8
        
        # PLANOS ESPECIALES primero
        if orientation == 'ESPALDAS':
            return 'BACK_SHOT'
        
        if near_top:
            return 'HIGH_ANGLE'
        elif near_bottom:
            return 'LOW_ANGLE'
        
        if near_edge:
            return 'EXTREME_WIDE'
        
        # PLANOS POR DISTANCIA (shoulder_width es el mejor indicador)
        # Mientras m√°s cerca, m√°s ancho aparecen los hombros
        
        if shoulder_width > 0.50:  # MUY cerca
            return 'EXTREME_CLOSEUP'
        
        elif shoulder_width > 0.38:  # Cerca
            return 'CLOSEUP'
        
        elif shoulder_width > 0.28:  # Media distancia, torso visible
            if hips_vis < 0.3:  # No se ven caderas
                return 'MEDIUM_CLOSEUP'
            else:
                return 'MEDIUM'
        
        elif shoulder_width > 0.20:  # Distancia media-larga
            if knees_vis > 0.3:  # Se ven rodillas
                return 'COWBOY'
            else:
                return 'MEDIUM'
        
        elif shoulder_width > 0.15:  # Lejos
            if ankles_vis > 0.3:  # Se ven tobillos
                return 'FULL'
            else:
                return 'COWBOY'
        
        elif shoulder_width > 0.10:  # Muy lejos
            return 'WIDE'
        
        else:  # Lej√≠simos
            return 'EXTREME_WIDE'
    
    def _smooth(self):
        """Suavizado m√≠nimo"""
        if len(self.history) == 0:
            return 'MEDIUM'
        
        # Simplemente retornar el m√°s com√∫n de los √∫ltimos 3 frames
        from collections import Counter
        counts = Counter(self.history)
        return counts.most_common(1)[0][0]


# ==================== ORIENTACI√ìN CORPORAL ====================
class BodyOrientation:
    def __init__(self):
        self.history = deque(maxlen=10)  # Suavizado mayor
    
    def calculate(self, pose_landmarks):
        if not pose_landmarks:
            return None
        
        lm = pose_landmarks.landmark
        
        left_shoulder = np.array([lm[11].x, lm[11].y, lm[11].z])
        right_shoulder = np.array([lm[12].x, lm[12].y, lm[12].z])
        left_hip = np.array([lm[23].x, lm[23].y, lm[23].z])
        right_hip = np.array([lm[24].x, lm[24].y, lm[24].z])
        
        shoulder_vec = right_shoulder - left_shoulder
        yaw = np.arctan2(shoulder_vec[2], shoulder_vec[0]) * 180 / np.pi
        roll = np.arctan2(shoulder_vec[1], shoulder_vec[0]) * 180 / np.pi
        
        torso_center = (left_shoulder + right_shoulder) / 2
        hip_center = (left_hip + right_hip) / 2
        torso_vec = torso_center - hip_center
        pitch = np.arctan2(torso_vec[2], torso_vec[1]) * 180 / np.pi
        
        orientation = {
            'yaw': yaw,
            'pitch': pitch,
            'roll': roll,
            'visibility': min(lm[11].visibility, lm[12].visibility)
        }
        
        self.history.append(orientation)
        return self._smooth()
    
    def _smooth(self):
        if not self.history:
            return None
        return {
            'yaw': np.mean([o['yaw'] for o in self.history]),
            'pitch': np.mean([o['pitch'] for o in self.history]),
            'roll': np.mean([o['roll'] for o in self.history]),
            'visibility': np.mean([o['visibility'] for o in self.history])
        }


# ==================== ENCUADRE ====================
@dataclass
class FrameTarget:
    x: float = 0.5
    y: float = 0.5
    zoom: float = 1.0


class SmoothFramer:
    def __init__(self, smoothing=0.15):  # Suavizado aumentado
        self.current = FrameTarget()
        self.target = FrameTarget()
        self.smoothing = smoothing
    
    def update(self, center_x, center_y, zoom, y_offset=0.0):
        self.target.x = center_x
        self.target.y = center_y + y_offset
        self.target.zoom = zoom
        
        self.current.x += (self.target.x - self.current.x) * self.smoothing
        self.current.y += (self.target.y - self.current.y) * self.smoothing
        self.current.zoom += (self.target.zoom - self.current.zoom) * self.smoothing
        
        return self.current


def obtener_centro_seguimiento(pose_landmarks, face_result, w, h, plano_actual):
    if not pose_landmarks:
        return (0.5, 0.5)
    
    lm = pose_landmarks.landmark
    
    if plano_actual in ['CLOSEUP', 'EXTREME_CLOSEUP']:
        nose = np.array([lm[0].x, lm[0].y])
        mouth_left = np.array([lm[9].x, lm[9].y])
        mouth_right = np.array([lm[10].x, lm[10].y])
        mouth_center = (mouth_left + mouth_right) / 2
        face_center = (nose + mouth_center) / 2
        return (float(face_center[0]), float(face_center[1]))
    
    elif plano_actual in ['MEDIUM', 'MEDIUM_CLOSEUP']:
        nose = np.array([lm[0].x, lm[0].y])
        shoulders = (np.array([lm[11].x, lm[11].y]) + np.array([lm[12].x, lm[12].y])) / 2
        center = (nose + shoulders * 2) / 3
        return (float(center[0]), float(center[1]))
    
    else:
        key_points = [lm[0], lm[11], lm[12], lm[23], lm[24]]
        center_x = np.mean([p.x for p in key_points])
        center_y = np.mean([p.y for p in key_points])
        return (float(center_x), float(center_y))


def aplicar_encuadre(frame, framer_state):
    h, w = frame.shape[:2]
    
    zoom = framer_state.zoom
    crop_w = int(w / zoom)
    crop_h = int(h / zoom)
    
    center_x = int(framer_state.x * w)
    center_y = int(framer_state.y * h)
    
    x1 = max(0, center_x - crop_w // 2)
    y1 = max(0, center_y - crop_h // 2)
    x2 = min(w, x1 + crop_w)
    y2 = min(h, y1 + crop_h)
    
    if x2 - x1 < crop_w:
        x1 = max(0, x2 - crop_w)
    if y2 - y1 < crop_h:
        y1 = max(0, y2 - crop_h)
    
    cropped = frame[y1:y2, x1:x2]
    if cropped.size == 0:
        return frame
    
    resized = cv2.resize(cropped, (w, h), interpolation=cv2.INTER_LINEAR)
    return resized


# ==================== VISUALIZACI√ìN ====================
def dibujar_landmarks(frame, pose_results, hands_results):
    """Dibuja landmarks SIN face detection"""
    if pose_results and pose_results.pose_landmarks:
        mp_drawing.draw_landmarks(
            frame,
            pose_results.pose_landmarks,
            mp_pose.POSE_CONNECTIONS,
            landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
        )
    
    if hands_results and hands_results.multi_hand_landmarks:
        for hand_landmarks in hands_results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style()
            )
    
    return frame


def crear_panel_control(modo_control, plano_actual, plano_auto, gesto_emoji, 
                       orientation, fps, num_cameras, shoulder_width=0):
    panel = np.zeros((1080, 400, 3), dtype=np.uint8)
    y_offset = 30
    
    cv2.putText(panel, "PANEL DE CONTROL", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 255), 2)
    y_offset += 50
    
    cv2.putText(panel, f"FPS: {fps:.1f}", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
    y_offset += 30
    
    cv2.putText(panel, f"Camaras: {num_cameras}", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 200, 100), 2)
    y_offset += 40
    
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    color_modo = (0, 255, 255) if modo_control == 'AUTO' else (255, 100, 255)
    cv2.putText(panel, f"MODO: {modo_control}", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.75, color_modo, 2)
    y_offset += 40
    
    cv2.putText(panel, "PLANO ACTIVO:", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 200, 255), 2)
    y_offset += 28
    plano_info = PLANOS[plano_actual]
    cv2.putText(panel, plano_info['nombre'], (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 1)
    y_offset += 22
    cv2.putText(panel, f"({plano_info['descripcion']})", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.45, (180, 180, 180), 1)
    y_offset += 35
    
    # Mostrar plano AUTO detectado (diferente del activo si est√°s en MANUAL)
    if plano_auto != plano_actual:
        cv2.putText(panel, f"Auto detecta: {PLANOS[plano_auto]['nombre']}", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45, (100, 150, 255), 1)
        y_offset += 25
    
    # Debug: mostrar shoulder_width
    if shoulder_width > 0:
        cv2.putText(panel, f"Distancia: {shoulder_width:.2f}", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45, (150, 150, 150), 1)
        y_offset += 25
    
    if gesto_emoji:
        cv2.putText(panel, f"Gesto: {gesto_emoji}", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
        y_offset += 35
    
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    if orientation and orientation['visibility'] > 0.5:
        cv2.putText(panel, "ORIENTACION:", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 200, 255), 2)
        y_offset += 28
        cv2.putText(panel, f"Yaw: {orientation['yaw']:.0f}¬∞", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
        y_offset += 24
        cv2.putText(panel, f"Pitch: {orientation['pitch']:.0f}¬∞", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
        y_offset += 24
        cv2.putText(panel, f"Roll: {orientation['roll']:.0f}¬∞", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
        y_offset += 35
    
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    cv2.putText(panel, "CONTROLES:", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 200, 255), 2)
    y_offset += 28
    
    controles = [
        "'m' - AUTO/MANUAL",
        "'r' - Reset",
        "'c' - Cambiar camara",
        "'d' - Debug ON/OFF",
        "ESC - Salir"
    ]
    for ctrl in controles:
        cv2.putText(panel, ctrl, (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
        y_offset += 24
    
    y_offset += 10
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    cv2.putText(panel, "GESTOS MANUALES:", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 200, 255), 2)
    y_offset += 28
    
    gestos = [
        "Pulgar abajo - General Extremo",
        "Puno - General",
        "1 dedo - Entero",
        "4 dedos - Americano",
        "3 dedos - Medio",
        "5 dedos - Medio Corto",
        "Paz (V) - Primer Plano",
        "Pulgar arriba - Extremo"
    ]
    for gesto in gestos:
        cv2.putText(panel, gesto, (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45, (180, 180, 180), 1)
        y_offset += 22
    
    y_offset += 10
    cv2.line(panel, (10, y_offset), (390, y_offset), (50, 50, 50), 1)
    y_offset += 25
    
    cv2.putText(panel, "MODO AUTO:", (10, y_offset),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 200, 255), 2)
    y_offset += 28
    
    tips = [
        "Acercate = Primer Plano",
        "Alejate = Plano General",
        "Borde del frame = Extremo",
        "Date la vuelta = Espaldas"
    ]
    for tip in tips:
        cv2.putText(panel, tip, (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45, (150, 150, 255), 1)
        y_offset += 22
    
    return panel


def dibujar_info_deteccion(frame, camera_name):
    h, w = frame.shape[:2]
    cv2.putText(frame, f"DETECCION: {camera_name}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
    cv2.putText(frame, "Landmarks Pose + Manos", (10, 60),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 1)
    return frame


def dibujar_info_resultado(frame, plano, fps, camera_name):
    h, w = frame.shape[:2]
    
    plano_info = PLANOS[plano]
    cv2.putText(frame, f"{plano_info['nombre']} [{camera_name}]", (10, h - 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2)
    cv2.putText(frame, plano_info['descripcion'], (10, h - 20),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 2)
    
    cv2.putText(frame, f"FPS: {fps:.1f}", (w - 150, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    
    # Regla de tercios
    color_grid = (80, 80, 80)
    cv2.line(frame, (w//3, 0), (w//3, h), color_grid, 1)
    cv2.line(frame, (2*w//3, 0), (2*w//3, h), color_grid, 1)
    cv2.line(frame, (0, h//3), (w, h//3), color_grid, 1)
    cv2.line(frame, (0, 2*h//3), (w, 2*h//3), color_grid, 1)
    
    return frame


# ==================== MAIN ====================
def main():
    print_resumen_planos()
    
    # C√°maras
    cap1 = cv2.VideoCapture(0)
    cap2 = cv2.VideoCapture(1)
    
    cameras = []
    if cap1.isOpened():
        cap1.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        cap1.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
        cameras.append(('Camara 1', cap1))
        print("‚úÖ C√°mara 1 detectada")
    
    if cap2.isOpened():
        cap2.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        cap2.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
        cameras.append(('Camara 2', cap2))
        print("‚úÖ C√°mara 2 detectada")
    
    if not cameras:
        print("‚ùå No se detectaron c√°maras")
        return
    
    # Sistemas
    orientation_tracker = BodyOrientation()
    body_position_detector = BodyPositionDetector()
    framers = [SmoothFramer(smoothing=0.15) for _ in cameras]
    fps_history = deque(maxlen=30)
    
    # Estado
    plano_actual = 'MEDIUM'
    plano_auto = 'MEDIUM'
    gesto_emoji = None
    modo_control = 'AUTO'  # Empezar en AUTO
    camera_activa = 0
    shoulder_width_debug = 0
    
    print(f"\nüé¨ Sistema iniciado con {len(cameras)} c√°mara(s)")
    print("ü§ñ Modo AUTO activado - mu√©vete para cambiar planos")
    print("üìè Landmarks con tracking mejorado")
    
    # Ventanas
    cv2.namedWindow('CONTROL', cv2.WINDOW_NORMAL)
    cv2.namedWindow('DETECCION', cv2.WINDOW_NORMAL)
    cv2.namedWindow('RESULTADO', cv2.WINDOW_NORMAL)
    
    cv2.resizeWindow('CONTROL', 400, 1080)
    cv2.moveWindow('CONTROL', 0, 0)
    
    cv2.resizeWindow('DETECCION', 1280, 540)
    cv2.moveWindow('DETECCION', 420, 0)
    
    cv2.resizeWindow('RESULTADO', 1280, 540)
    cv2.moveWindow('RESULTADO', 420, 540)
    
    print("\n‚ñ∂Ô∏è  Sistema listo. Ac√©rcate/al√©jate para probar el modo AUTO\n")
    
    while True:
        start_time = time.time()
        
        camera_name, cap = cameras[camera_activa]
        ret, frame = cap.read()
        
        if not ret:
            break
        
        frame = cv2.flip(frame, 1)
        h, w = frame.shape[:2]
        
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Procesar
        pose_results = pose.process(rgb_frame)
        hands_results = hands.process(rgb_frame)
        face_results = face_detection.process(rgb_frame)
        
        # DETECCI√ìN frame
        frame_deteccion = frame.copy()
        frame_deteccion = dibujar_landmarks(frame_deteccion, pose_results, hands_results)
        
        # Detecci√≥n autom√°tica
        plano_auto = body_position_detector.detect_framing(
            pose_results.pose_landmarks, h, w
        )
        
        # Guardar shoulder_width para debug
        if pose_results.pose_landmarks:
            lm = pose_results.pose_landmarks.landmark
            shoulder_width_debug = abs(lm[11].x - lm[12].x)
        
        # Gestos manuales
        if hands_results and hands_results.multi_hand_landmarks:
            for hand_landmarks in hands_results.multi_hand_landmarks:
                plano_manual, gesto_emoji = clasificar_gesto_manual(hand_landmarks)
                if modo_control == 'MANUAL':
                    plano_actual = plano_manual
        else:
            gesto_emoji = None
        
        # Aplicar modo
        if modo_control == 'AUTO':
            plano_actual = plano_auto
        
        # Orientaci√≥n
        orientation = orientation_tracker.calculate(pose_results.pose_landmarks)
        
        # Encuadre
        centro = obtener_centro_seguimiento(
            pose_results.pose_landmarks, face_results, w, h, plano_actual
        )
        zoom_factor = PLANOS[plano_actual]['zoom']
        y_offset = PLANOS[plano_actual]['y_offset']
        framer_state = framers[camera_activa].update(centro[0], centro[1], zoom_factor, y_offset)
        
        # RESULTADO frame (limpio)
        frame_resultado = frame.copy()
        frame_resultado = aplicar_encuadre(frame_resultado, framer_state)
        
        # FPS
        fps = 1.0 / (time.time() - start_time)
        fps_history.append(fps)
        avg_fps = np.mean(fps_history)
        
        # Info
        frame_deteccion = dibujar_info_deteccion(frame_deteccion, camera_name)
        frame_resultado = dibujar_info_resultado(frame_resultado, plano_actual, avg_fps, camera_name)
        
        # PANEL DE CONTROL
        panel_control = crear_panel_control(
            modo_control, plano_actual, plano_auto, gesto_emoji, 
            orientation, avg_fps, len(cameras), shoulder_width_debug
        )
        
        # Mostrar
        cv2.imshow('CONTROL', panel_control)
        cv2.imshow('DETECCION', frame_deteccion)
        cv2.imshow('RESULTADO', frame_resultado)
        
        # Controles
        key = cv2.waitKey(1) & 0xFF
        if key == 27:  # ESC
            print("\nüëã Cerrando...")
            break
        elif key == ord('r'):
            framers = [SmoothFramer(smoothing=0.15) for _ in cameras]
            body_position_detector = BodyPositionDetector()
            orientation_tracker = BodyOrientation()
            print("üîÑ Reset")
        elif key == ord('m'):
            modo_control = 'MANUAL' if modo_control == 'AUTO' else 'AUTO'
            print(f"üîÄ Modo: {modo_control}")
        elif key == ord('c') and len(cameras) > 1:
            camera_activa = (camera_activa + 1) % len(cameras)
            print(f"üìπ {cameras[camera_activa][0]}")
        elif key == ord('d'):
            body_position_detector.debug_mode = not body_position_detector.debug_mode
            print(f"üêõ Debug: {'ON' if body_position_detector.debug_mode else 'OFF'}")
    
    # Cleanup
    for _, cap in cameras:
        cap.release()
    cv2.destroyAllWindows()
    pose.close()
    hands.close()
    face_detection.close()
    
    print("‚úÖ Cerrado")


if __name__ == "__main__":
    main()



üìã RESUMEN DE PLANOS CINEMATOGR√ÅFICOS

üé≠ MODO MANUAL (Gestos de mano):
--------------------------------------------------------------------------------
  Plano General Extremo     ‚Üí Pulgar abajo üëé
  Plano General             ‚Üí Pu√±o cerrado ‚úä
  Plano Entero              ‚Üí 1 dedo ‚òùÔ∏è
  Plano Americano           ‚Üí 4 dedos üññ
  Plano Medio               ‚Üí 3 dedos ü§ü
  Plano Medio Corto         ‚Üí 5 dedos (mano abierta) üñêÔ∏è
  Primer Plano              ‚Üí Paz (V) ‚úåÔ∏è
  Primer√≠simo Plano         ‚Üí Pulgar arriba üëç

ü§ñ MODO AUTO (Detecci√≥n autom√°tica):
--------------------------------------------------------------------------------
  ‚Ä¢ Ac√©rcate a la c√°mara ‚Üí Primer Plano / Extremo
  ‚Ä¢ Al√©jate de la c√°mara ‚Üí Plano Entero / General
  ‚Ä¢ Mu√©vete al borde ‚Üí General Extremo
  ‚Ä¢ G√≠rate de espaldas ‚Üí Plano de Espaldas

üìπ SISTEMA MULTI-C√ÅMARA:
--------------------------------------------------------------------------------
  ‚Ä¢ 