In [125]:
image_path = "test_Im.png"
grimace_2_path = "grimace_2.png"
grimace_3_path = "grimace_3.png"
not_a_face_path = "not_a_face.png"
without_face_path = "without_face.png"
without_right_hand_path = "without_right_hand.png"
without_left_hand_path = "without_left_hand.png"
hand_model_path = "hand_landmarker.task"
face_model_path = "face_landmarker.task"
video_path = "youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right.mp4"

[1, 2, 3, 4, 5, 9, 10, 11, 12, 21, 22,23, 24, 25, 26, 27, 33, 39, 42, 43, 44, 45, 46, 47, 50, 51]

In [134]:
import os
import logging

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # 0=all, 1=INFO, 2=WARNING, 3=ERROR
logging.getLogger("mediapipe").setLevel(logging.ERROR)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress TensorFlow logging
os.environ['GLOG_minloglevel'] = '3'      # Suppress Google logging (used by MediaPipe)
os.environ['MEDIAPIPE_DISABLE_GPU'] = '1'  # Optional: Disable GPU logging messages
logging.getLogger("mediapipe").setLevel(logging.ERROR)
logging.getLogger("absl").setLevel(logging.ERROR)
logging.getLogger("tensorflow").setLevel(logging.ERROR)

import cv2
import mediapipe as mp
import numpy as np
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe.framework.formats import landmark_pb2
import time
from pathlib import Path
import tempfile
import json
from datetime import datetime

def detect(image_path, hand_model_path, face_model_path, min_hand_detection_confidence=0.5, min_hand_presence_confidence=0.5, min_face_detection_confidence=0.5, min_face_presence_confidence=0.5, num_hands=2, dominand_hand='Right', visualize=False, output_face_blendshapes=True, adaptive_threshold=True, max_attempts=3, threshold_reduction_factor=0.7, min_threshold=0.2):
    """
    Detects hands and face in an image, extracts hand landmark coordinates and face blendshapes.
    
    Args:
        image_path (str): Path to the image file
        min_hand_detection_confidence (float): Confidence threshold for hand detection (0.0-1.0)
        min_hand_presence_confidence (float): Confidence threshold for hand presence (0.0-1.0)
        num_hands (int): Maximum number of hands to detect
        dominand_hand (str): Dominant hand preference ('Left' or 'Right')
        visualize (bool): Whether to visualize the results
        output_face_blendshapes (bool): Whether to detect and extract face blendshapes
        
    Returns:
        tuple: (dom_landmarks, non_dom_landmarks, wrists, confidence_scores, detection_status, 
                blendshape_scores, face_landmark_5, face_detected)
               - dom_landmarks: NumPy array of shape [20, 3] with coordinates of dominant hand landmarks
               - non_dom_landmarks: NumPy array of shape [20, 3] with coordinates of non-dominant hand landmarks
               - wrists: NumPy array of shape [2, 2] with coordinates of both wrists [x, y]
               - confidence_scores: NumPy array of shape [2] with confidence scores [dominant_hand, non_dominant_hand]
               - detection_status: NumPy array of shape [2] with binary detection status [dominant_hand, non_dominant_hand]
               - blendshape_scores: NumPy array of shape [26] with selected face blendshape scores
               - face_landmark_5: NumPy array of shape [2] with coordinates of the 5th face landmark [x, y]
               - face_detected: Binary value (1 if face detected, 0 if not)
    """
    # Initialize output arrays for face detection
    blendshape_scores = np.zeros(52)
    nose_landmark = np.zeros(2)
    left_eye_landmark = np.zeros(2)
    right_eye_landmark = np.zeros(2)
    face_detected = 0
    
    # PART 1: HAND LANDMARK DETECTION
    # 1.1: Configure the hand landmarker
    hand_base_options = python.BaseOptions(
        model_asset_path=hand_model_path
    )

    VisionRunningMode = mp.tasks.vision.RunningMode
    # Configure detection options
    hand_options = vision.HandLandmarkerOptions(
        base_options=hand_base_options,
        num_hands=num_hands,                             
        min_hand_detection_confidence=min_hand_detection_confidence,       
        min_hand_presence_confidence=min_hand_presence_confidence,        
        min_tracking_confidence=0.5,             
        running_mode=VisionRunningMode.IMAGE
    )

    # Create the hand detector
    hand_detector = vision.HandLandmarker.create_from_options(hand_options)

    # 1.2: Load the input image
    image = mp.Image.create_from_file(image_path)

    # 1.3: Detect hand landmarks
    hand_detection_result = hand_detector.detect(image)
    
    # Initialize hand output arrays with zeros
    dom_landmarks = np.zeros((20, 3))       # 20 landmarks (excluding wrist), [x,y,z]
    non_dom_landmarks = np.zeros((20, 3))   # 20 landmarks (excluding wrist), [x,y,z]
    wrists = np.zeros((2, 2))               # 2 wrists, [x,y]
    confidence_scores = np.zeros(2)         # Confidence scores for [dominant, non-dominant]
    interpolation_scores = np.zeros(2) #Interpolation scores for [dominant, non-dominant]. Used later.
    detection_status = np.zeros(2, dtype=np.int32)  # Binary detection status [dominant, non-dominant]
    nose_to_wrist_dist = np.zeros((2, 2))
    
    # 1.4: Process hand landmarks if hands are detected
    if hand_detection_result.hand_landmarks and hand_detection_result.handedness:
        dom_hand_found = False
        non_dom_hand_found = False
        
        # First, find the dominant and non-dominant hands in detection results
        for idx, handedness in enumerate(hand_detection_result.handedness):
            hand_type = handedness[0].category_name  # 'Left' or 'Right'
            hand_score = handedness[0].score  # Confidence score for the handedness classification
            
            if hand_type == dominand_hand:
                # This is the dominant hand
                dom_hand_found = True
                detection_status[0] = 1  # Set detection status to 1 (detected)
                confidence_scores[0] = hand_score  # Store confidence score
                interpolation_scores[0] = 1
                
                # Store dominant hand wrist coordinates [x,y]
                dom_hand_landmarks = hand_detection_result.hand_landmarks[idx]
                wrists[0, 0] = dom_hand_landmarks[0].x
                wrists[0, 1] = dom_hand_landmarks[0].y
                
                # Store all other dominant hand landmarks (excluding wrist)
                for i in range(1, 21):  # Landmarks 1-20 (skipping wrist which is index 0)
                    dom_landmarks[i-1, 0] = dom_hand_landmarks[i].x
                    dom_landmarks[i-1, 1] = dom_hand_landmarks[i].y
                    dom_landmarks[i-1, 2] = dom_hand_landmarks[i].z
                    
            elif hand_type != dominand_hand:
                # This is the non-dominant hand
                non_dom_hand_found = True
                detection_status[1] = 1  # Set detection status to 1 (detected)
                confidence_scores[1] = hand_score  # Store confidence score
                interpolation_scores[1] = 1
                
                # Store non-dominant hand wrist coordinates [x,y]
                non_dom_hand_landmarks = hand_detection_result.hand_landmarks[idx]
                wrists[1, 0] = non_dom_hand_landmarks[0].x
                wrists[1, 1] = non_dom_hand_landmarks[0].y
                
                # Store all other non-dominant hand landmarks (excluding wrist)
                for i in range(1, 21):  # Landmarks 1-20 (skipping wrist)
                    non_dom_landmarks[i-1, 0] = non_dom_hand_landmarks[i].x
                    non_dom_landmarks[i-1, 1] = non_dom_hand_landmarks[i].y
                    non_dom_landmarks[i-1, 2] = non_dom_hand_landmarks[i].z
                    
        # Log information about which hands were found
        print(f"Dominant hand ({dominand_hand}) detected: {dom_hand_found}")
        print(f"Non-dominant hand detected: {non_dom_hand_found}")
    

   # PART 2: FACE LANDMARK DETECTION (If requested)
    if output_face_blendshapes:
        try:
            # 2.1: Configure the face landmarker
            face_base_options = python.BaseOptions(
                model_asset_path=face_model_path
            )
            
            # Configure face detection options
            face_options = vision.FaceLandmarkerOptions(
                base_options=face_base_options,
                min_face_detection_confidence=min_face_detection_confidence,
                min_face_presence_confidence=min_face_presence_confidence,
                output_face_blendshapes=True,
                num_faces=1,
                running_mode=VisionRunningMode.IMAGE
            )
            
            # Create the face detector
            face_detector = vision.FaceLandmarker.create_from_options(face_options)
            
            # 2.2: Detect face landmarks (reuse the same image)
            face_detection_result = face_detector.detect(image)
            
            # 2.3: Process face blendshapes if face is detected
            if (face_detection_result.face_blendshapes and len(face_detection_result.face_blendshapes) > 0 and
                face_detection_result.face_landmarks and len(face_detection_result.face_landmarks) > 0):
                
                # Set face detected flag to 1
                face_detected = 1
                
                # Get all blendshapes from the first face
                all_blendshapes = face_detection_result.face_blendshapes[0]
                
                # Initialize blendshape_scores with the correct size to hold all blendshapes
                # Assuming MediaPipe returns all 52 blendshapes
                blendshape_scores = np.zeros(len(all_blendshapes))
                
                # Fill the blendshape_scores array with ALL scores
                for i in range(len(all_blendshapes)):
                    blendshape_scores[i] = all_blendshapes[i].score
                
                # Get nose coordinates
                nose = face_detection_result.face_landmarks[0][4]
                nose_landmark[0] = nose.x
                nose_landmark[1] = nose.y
    
                # Get eye coordinates
                left_eye = face_detection_result.face_landmarks[0][473]
                left_eye_landmark[0] = left_eye.x
                left_eye_landmark[1] = left_eye.y
    
                right_eye = face_detection_result.face_landmarks[0][468]
                right_eye_landmark[0] = right_eye.x
                right_eye_landmark[1] = right_eye.y
            
        except Exception as e:
            print(f"Error during face detection: {e}")
            # Keep default zero values for face outputs if detection fails
    
    
    
    # PART 3: VISUALIZATION
    if visualize:
        # Load the image with OpenCV for visualization
        img_cv = cv2.imread(image_path)
        img_height, img_width, _ = img_cv.shape

        # 3.1: Draw hand landmarks if hands are detected
        if hand_detection_result.hand_landmarks:
            print(f"Visualizing {len(hand_detection_result.hand_landmarks)} hands")
            
            # Define connections between landmarks for hand skeleton
            connections = [
                # Thumb connections
                (0, 1), (1, 2), (2, 3), (3, 4),
                # Index finger connections
                (0, 5), (5, 6), (6, 7), (7, 8),
                # Middle finger connections
                (0, 9), (9, 10), (10, 11), (11, 12),
                # Ring finger connections
                (0, 13), (13, 14), (14, 15), (15, 16),
                # Pinky finger connections
                (0, 17), (17, 18), (18, 19), (19, 20),
                # Palm connections
                (0, 5), (5, 9), (9, 13), (13, 17)
            ]
            
            for idx, hand_landmarks in enumerate(hand_detection_result.hand_landmarks):
                # Determine if this is the dominant hand
                is_dominant = False
                if hand_detection_result.handedness:
                    hand_type = hand_detection_result.handedness[idx][0].category_name
                    is_dominant = (hand_type == dominand_hand)
                
                # Use different colors for dominant vs non-dominant hand
                hand_color = (0, 0, 255) if is_dominant else (255, 0, 0)  # Blue for dominant, Red for non-dominant
                
                # Draw all landmark points
                for landmark in hand_landmarks:
                    # Convert normalized coordinates to pixel coordinates
                    x = int(landmark.x * img_width)
                    y = int(landmark.y * img_height)
                    
                    # Draw the landmark point
                    cv2.circle(img_cv, (x, y), 5, hand_color, -1)
                
                # Draw connections between landmarks (hand skeleton)
                for connection in connections:
                    start_idx, end_idx = connection
                    
                    if start_idx < len(hand_landmarks) and end_idx < len(hand_landmarks):
                        start_point = hand_landmarks[start_idx]
                        end_point = hand_landmarks[end_idx]
                        
                        # Convert normalized coordinates to pixel coordinates
                        start_x = int(start_point.x * img_width)
                        start_y = int(start_point.y * img_height)
                        end_x = int(end_point.x * img_width)
                        end_y = int(end_point.y * img_height)
                        
                        # Draw the connection line
                        cv2.line(img_cv, (start_x, start_y), (end_x, end_y), hand_color, 2)
                
                # Add hand type label (Left/Right, Dominant/Non-dominant)
                if hand_detection_result.handedness:
                    handedness = hand_detection_result.handedness[idx]
                    hand_type = handedness[0].category_name  # 'Left' or 'Right'
                    hand_score = handedness[0].score
                    dom_status = "Dominant" if hand_type == dominand_hand else "Non-dominant"
                    cv2.putText(img_cv, f"{hand_type} Hand - {dom_status} ({hand_score:.2f})", 
                            (10, 30 + idx * 30), cv2.FONT_HERSHEY_SIMPLEX, 
                            0.8, hand_color, 2)
                    
                    # Calculate and draw a bounding box
                    x_coords = [landmark.x for landmark in hand_landmarks]
                    y_coords = [landmark.y for landmark in hand_landmarks]
                    min_x, max_x = min(x_coords), max(x_coords)
                    min_y, max_y = min(y_coords), max(y_coords)
                    
                    # Convert to pixel coordinates
                    min_x, max_x = int(min_x * img_width), int(max_x * img_width)
                    min_y, max_y = int(min_y * img_height), int(max_y * img_height)
                    
                    # Draw bounding box
                    cv2.rectangle(img_cv, (min_x, min_y), (max_x, max_y), hand_color, 2)

        # 3.2: Draw Nose if face was detected
        if face_detected == 1:
            # Convert normalized coordinates to pixel coordinates
            face_x = int(nose_landmark[0] * img_width)
            face_y = int(nose_landmark[1] * img_height)
            
            # Draw the Nose with a distinctive color and size
            cv2.circle(img_cv, (face_x, face_y), 8, (0, 255, 255), -1)  # Yellow circle
            cv2.putText(img_cv, "Nose", (face_x + 10, face_y), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)

            # Draw eyes
            left_eye_x = int(left_eye_landmark[0] * img_width)
            left_eye_y = int(left_eye_landmark[1] * img_height)
            right_eye_x = int(right_eye_landmark[0] * img_width)
            right_eye_y = int(right_eye_landmark[1] * img_height)
            
            cv2.circle(img_cv, (left_eye_x, left_eye_y), 6, (255, 255, 0), -1)  # Cyan circle
            cv2.circle(img_cv, (right_eye_x, right_eye_y), 6, (255, 255, 0), -1)  # Cyan circle
            cv2.line(img_cv, (left_eye_x, left_eye_y), (right_eye_x, right_eye_y), (255, 255, 0), 2)
        # 3.3: Add detection status information to visualization
        y_pos = img_height - 80
        hand_status_text = f"Hand Detection: Dom={detection_status[0]}, Non-Dom={detection_status[1]}"
        hand_conf_text = f"Hand Confidence: Dom={confidence_scores[0]:.2f}, Non-Dom={confidence_scores[1]:.2f}"
        face_status_text = f"Face Detection: {face_detected}"
        
        cv2.putText(img_cv, hand_status_text, (10, y_pos), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
        cv2.putText(img_cv, hand_conf_text, (10, y_pos + 30), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
        cv2.putText(img_cv, face_status_text, (10, y_pos + 60), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

        # 3.4: Display the result
        cv2.imshow('Hand and Face Landmarks', img_cv)
        cv2.waitKey(0)
        cv2.destroyAllWindows()


    if face_detected==1:
        #Calculate distance between the eyes
        eyes_diff = right_eye_landmark-left_eye_landmark
        eyes_distance = np.sqrt(eyes_diff.dot(eyes_diff))
        if detection_status[0]==1 and detection_status[1]==1:
            nose_to_wrist_dist = (wrists-nose_landmark) / eyes_distance
            #Make every hand's landmark potision relative to the wrist, and scaled by the eye's distance
            dom_landmarks[:, 0:2] = (dom_landmarks[:, 0:2] - wrists[0, :]) / eyes_distance
            non_dom_landmarks[:, 0:2] = (non_dom_landmarks[:, 0:2] - wrists[1, :]) / eyes_distance
        elif detection_status[0]==1 and detection_status[1]==0:
            nose_to_wrist_dist[0, :] = (wrists[0, :]-nose_landmark) / eyes_distance
            #Make every hand's landmark potision relative to the wrist, and scaled by the eye's distance
            dom_landmarks[:, 0:2] = (dom_landmarks[:, 0:2] - wrists[0, :]) / eyes_distance
        elif detection_status[0]==0 and detection_status[1]==1:
            nose_to_wrist_dist[1,:] = (wrists[1,:]-nose_landmark) / eyes_distance
            #Make every hand's landmark potision relative to the wrist, and scaled by the eye's distance
            non_dom_landmarks[:, 0:2] = (non_dom_landmarks[:, 0:2] - wrists[0, :]) / eyes_distance
        
    elif face_detected==0 and detection_status[0]==1:
        #Calculate palm width distance as fallback scaling factor
        palm_width_diff = dom_landmarks[5, :]- dom_landmarks[17, :]
        palm_width_dist = np.sqrt(palm_width_diff.dot(palm_width_diff))
        if detection_status[1]==1:
            nose_to_wrist_dist = (wrists-nose_landmark) / palm_width_dist
            #Make every hand's landmark potision relative to the wrist, and scaled by the palm width 
            dom_landmarks[:, 0:2] = (dom_landmarks[:, 0:2] - wrists[0, :]) / palm_width_dist
            non_dom_landmarks[:, 0:2] = (non_dom_landmarks[:, 0:2] - wrists[1, :]) / palm_width_dist
        elif detection_status[1]==0:
            nose_to_wrist_dist[0,:] = (wrists[0,:]-nose_landmark) / palm_width_dist
            #Make every hand's landmark potision relative to the wrist, and scaled by the palm width 
            dom_landmarks[:, 0:2] = (dom_landmarks[:, 0:2] - wrists[0, :]) / palm_width_dist
    elif face_detected==0 and detection_status[0]==0 and detection_status[1]==1:
        #Calculate palm width distance as fallback scaling factor
        palm_width_diff = non_dom_landmarks[5, :]- non_dom_landmarks[17, :]
        palm_width_dist = np.sqrt(palm_width_diff.dot(palm_width_diff))
        nose_to_wrist_dist[1,:] = (wrists[1,:]-nose_landmark) / palm_width_dist
        #Make every hand's landmark potision relative to the wrist, and scaled by the palm width 
        non_dom_landmarks[:, 0:2] = (non_dom_landmarks[:, 0:2] - wrists[1, :]) / palm_width_dist
    

    
    # Return all requested outputs
    return dom_landmarks, non_dom_landmarks, confidence_scores, interpolation_scores, detection_status, blendshape_scores, face_detected, nose_to_wrist_dist

In [135]:
lol = detect(grimace_2_path, hand_model_path, face_model_path, min_hand_detection_confidence=0.5, min_hand_presence_confidence=0.5, num_hands=2, dominand_hand='Left', visualize=True)

I0000 00:00:1742824825.617509    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742824825.620415  282728 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742824825.856196  282729 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742824825.881208  282730 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742824825.948322    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742824825.952441  282744 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

Dominant hand (Left) detected: True
Non-dominant hand detected: True
Visualizing 2 hands


lol[-1]

In [136]:
lol

(array([[-0.22157899, -0.47131054, -0.03073101],
        [-0.17301513, -0.94633667, -0.04778747],
        [ 0.09016798, -1.17645313, -0.06066544],
        [ 0.44656346, -1.2285489 , -0.07052706],
        [ 0.75239831, -1.00154878, -0.03440262],
        [ 0.74546371, -0.32252427, -0.05854694],
        [ 0.52926255,  0.01389155, -0.07399104],
        [ 0.3284734 ,  0.18489803, -0.08165359],
        [ 0.96142811, -0.83551715, -0.02785412],
        [ 0.733497  , -0.12437514, -0.05066952],
        [ 0.38909197,  0.19143662, -0.05764463],
        [ 0.1054417 ,  0.3583336 , -0.05927849],
        [ 1.04701376, -0.64099421, -0.02452194],
        [ 0.80774103, -0.04213003, -0.04881671],
        [ 0.46820677,  0.20524576, -0.04839684],
        [ 0.19587368,  0.31181122, -0.04229698],
        [ 1.07308434, -0.42637636, -0.02360797],
        [ 1.01975656, -0.01941465, -0.04110365],
        [ 0.78549988,  0.19136538, -0.03911975],
        [ 0.56846131,  0.24917491, -0.03242829]]),
 array([[ 0.434435

In [139]:
def adaptive_detect(image_path, hand_model_path, face_model_path, min_hand_detection_confidence=0.5, min_hand_presence_confidence=0.5, 
                   min_face_detection_confidence=0.5, min_face_presence_confidence=0.5, 
                   num_hands=2, dominand_hand='Right', visualize=False, output_face_blendshapes=True,
                   max_attempts=3, threshold_reduction_factor=0.7, min_threshold=0.2):
    """
    Adaptively detects hands and face by progressively lowering detection thresholds
    for undetected body parts.
    
    Args:
        image_path (str): Path to the image file
        min_hand_detection_confidence (float): Initial confidence threshold for hand detection
        min_hand_presence_confidence (float): Initial confidence threshold for hand presence
        min_face_detection_confidence (float): Initial confidence threshold for face detection
        min_face_presence_confidence (float): Initial confidence threshold for face presence
        num_hands (int): Maximum number of hands to detect
        dominand_hand (str): Dominant hand preference ('Left' or 'Right')
        visualize (bool): Whether to visualize the final results
        output_face_blendshapes (bool): Whether to detect and extract face blendshapes
        max_attempts (int): Maximum number of detection attempts with lowered thresholds
        threshold_reduction_factor (float): Factor to multiply thresholds by on each attempt (0-1)
        min_threshold (float): Minimum threshold to prevent excessive lowering
        
    Returns:
        Same output as the detect() function
    """
    # Import the original detect function
    #from your_module import detect  # Replace with actual module name
    
    # Store original thresholds
    orig_hand_detection_conf = min_hand_detection_confidence
    orig_hand_presence_conf = min_hand_presence_confidence
    orig_face_detection_conf = min_face_detection_confidence
    orig_face_presence_conf = min_face_presence_confidence
    
    # Initialize best results and detection status
    best_results = None
    best_detection_status = [0, 0]  # [dom_hand, non_dom_hand]
    best_face_detected = 0
    

    
    # Try detection with progressively lower thresholds
    for attempt in range(max_attempts):
        print(f"\n--- Attempt {attempt+1}/{max_attempts} ---")
        
        # Calculate current thresholds
        if attempt > 0:
            # Only lower thresholds for undetected parts
            # For hands
            if best_detection_status[0] == 0:  # Dominant hand not detected
                hand_detection_conf_dom = max(orig_hand_detection_conf * (threshold_reduction_factor ** attempt), min_threshold)
                hand_presence_conf_dom = max(orig_hand_presence_conf * (threshold_reduction_factor ** attempt), min_threshold)
                print(f"Lowering dominant hand thresholds: {hand_detection_conf_dom:.3f}, {hand_presence_conf_dom:.3f}")
            else:
                hand_detection_conf_dom = orig_hand_detection_conf
                hand_presence_conf_dom = orig_hand_presence_conf
                
            if best_detection_status[1] == 0:  # Non-dominant hand not detected
                hand_detection_conf_non_dom = max(orig_hand_detection_conf * (threshold_reduction_factor ** attempt), min_threshold)
                hand_presence_conf_non_dom = max(orig_hand_presence_conf * (threshold_reduction_factor ** attempt), min_threshold)
                print(f"Lowering non-dominant hand thresholds: {hand_detection_conf_non_dom:.3f}, {hand_presence_conf_non_dom:.3f}")
            else:
                hand_detection_conf_non_dom = orig_hand_detection_conf
                hand_presence_conf_non_dom = orig_hand_presence_conf
            
            # Use the minimum of the two calculated thresholds (MediaPipe doesn't support per-hand thresholds)
            current_hand_detection_conf = min(hand_detection_conf_dom, hand_detection_conf_non_dom)
            current_hand_presence_conf = min(hand_presence_conf_dom, hand_presence_conf_non_dom)
            
            # For face
            if output_face_blendshapes and best_face_detected == 0:  # Face not detected
                current_face_detection_conf = max(orig_face_detection_conf * (threshold_reduction_factor ** attempt), min_threshold)
                current_face_presence_conf = max(orig_face_presence_conf * (threshold_reduction_factor ** attempt), min_threshold)
                print(f"Lowering face thresholds: {current_face_detection_conf:.3f}, {current_face_presence_conf:.3f}")
            else:
                current_face_detection_conf = orig_face_detection_conf
                current_face_presence_conf = orig_face_presence_conf
        else:
            # Use original thresholds for first attempt
            current_hand_detection_conf = orig_hand_detection_conf
            current_hand_presence_conf = orig_hand_presence_conf
            current_face_detection_conf = orig_face_detection_conf
            current_face_presence_conf = orig_face_presence_conf
            print(f"Using original thresholds: hands={current_hand_detection_conf}, face={current_face_detection_conf}")
        
        # Call detect with current thresholds (don't visualize intermediate attempts)
        results = detect(image_path,  hand_model_path=hand_model_path, face_model_path=face_model_path,
                        min_hand_detection_confidence=current_hand_detection_conf,
                        min_hand_presence_confidence=current_hand_presence_conf,
                        min_face_detection_confidence=current_face_detection_conf,
                        min_face_presence_confidence=current_face_presence_conf,
                        num_hands=num_hands,
                        dominand_hand=dominand_hand,
                        visualize=False,
                        output_face_blendshapes=output_face_blendshapes)
        
        # Unpack results
        dom_landmarks, non_dom_landmarks, confidence_scores, interpolation_scores, detection_status, blendshape_scores, face_detected, nose_to_wrist_dist = results
        
        # Compare with best results so far
        current_detection_count = detection_status[0] + detection_status[1] + face_detected
        best_detection_count = best_detection_status[0] + best_detection_status[1] + best_face_detected
        
        if best_results is None or current_detection_count > best_detection_count:
            best_results = results
            best_detection_status = [detection_status[0], detection_status[1]]
            best_face_detected = face_detected
            
            print(f"New best detection: dominant hand={detection_status[0]}, "
                  f"non-dominant hand={detection_status[1]}, face={face_detected}")
            
            # If everything is detected, we can stop early
            if detection_status[0] == 1 and detection_status[1] == 1 and (face_detected == 1 or not output_face_blendshapes):
                print("All body parts detected. Stopping early.")
                break
        else:
            print("No improvement in detection. Continuing to next attempt.")
    
    # Run final detection with visualization if requested
    if visualize:
        print("\n--- Visualizing final results ---")
        # Call detect one more time with the parameters that gave best results, but with visualize=True
        # For simplicity, we'll just use the best thresholds we found
        # This is slightly inefficient (one extra detection) but keeps the code clean
        
        # Determine which thresholds gave the best results
        if best_detection_status[0] == 0:  # If dominant hand not detected in best result
            hand_detection_conf = min_threshold
            hand_presence_conf = min_threshold
        else:
            hand_detection_conf = orig_hand_detection_conf
            hand_presence_conf = orig_hand_presence_conf
            
        if output_face_blendshapes and best_face_detected == 0:  # If face not detected in best result
            face_detection_conf = min_threshold
            face_presence_conf = min_threshold
        else:
            face_detection_conf = orig_face_detection_conf
            face_presence_conf = orig_face_presence_conf
        
        # Run final detection with visualization
        final_results = detect(image_path, hand_model_path=hand_model_path, face_model_path=face_model_path,
                              min_hand_detection_confidence=hand_detection_conf,
                              min_hand_presence_confidence=hand_presence_conf, 
                              min_face_detection_confidence=face_detection_conf,
                              min_face_presence_confidence=face_presence_conf,
                              num_hands=num_hands,
                              dominand_hand=dominand_hand,
                              visualize=True,
                              output_face_blendshapes=output_face_blendshapes)
        
        # Use these results if they're better than our best so far
        dom_landmarks, non_dom_landmarks, confidence_scores, interpolation_scores, detection_status, blendshape_scores, face_detected, nose_to_wrist_dist = final_results
        current_detection_count = detection_status[0] + detection_status[1] + face_detected
        best_detection_count = best_detection_status[0] + best_detection_status[1] + best_face_detected
        
        if current_detection_count > best_detection_count:
            best_results = final_results
    
    # Print final detection summary
    print("\n=== Detection Summary ===")
    dom_landmarks, non_dom_landmarks, confidence_scores, interpolation_scores, detection_status, blendshape_scores, face_detected, nose_to_wrist_dist = best_results
    print(f"Dominant hand detected: {detection_status[0] == 1} (confidence: {confidence_scores[0]:.3f})")
    print(f"Non-dominant hand detected: {detection_status[1] == 1} (confidence: {confidence_scores[1]:.3f})")
    if output_face_blendshapes:
        print(f"Face detected: {face_detected == 1}")
    print(f"Total detection attempts: {attempt+1}")
    return best_results

In [140]:
best_results = adaptive_detect(without_left_hand_path, hand_model_path=hand_model_path, face_model_path=face_model_path,  min_hand_detection_confidence=0.5, min_hand_presence_confidence=0.5, num_hands=2, dominand_hand='Left', visualize=True,output_face_blendshapes=True,max_attempts=3, threshold_reduction_factor=0.7, min_threshold=0.2)


--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Left) detected: False
Non-dominant hand detected: True
New best detection: dominant hand=0, non-dominant hand=1, face=1

--- Attempt 2/3 ---
Lowering dominant hand thresholds: 0.350, 0.350


I0000 00:00:1742824986.936870    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742824986.940818  284930 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742824987.039888  284934 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742824987.056971  284940 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742824987.133708    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742824987.135590  284946 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

Dominant hand (Left) detected: False
Non-dominant hand detected: True
No improvement in detection. Continuing to next attempt.

--- Attempt 3/3 ---
Lowering dominant hand thresholds: 0.245, 0.245


I0000 00:00:1742824987.304140    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742824987.307504  284978 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742824987.308167    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742824987.314875  284980 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742824987.507001  284979 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742824987.543464    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742824987.547

Dominant hand (Left) detected: False
Non-dominant hand detected: True
No improvement in detection. Continuing to next attempt.

--- Visualizing final results ---
Dominant hand (Left) detected: False
Non-dominant hand detected: True


I0000 00:00:1742824987.738748    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742824987.741043  285010 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742824987.741756    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742824987.750388  285011 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742824987.779803  285022 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742824987.819273    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742824987.822

Visualizing 1 hands

=== Detection Summary ===
Dominant hand detected: False (confidence: 0.000)
Non-dominant hand detected: True (confidence: 0.948)
Face detected: True
Total detection attempts: 3


dom_landmarks, non_dom_landmarks, confidence_scores, detection_status, blendshape_scores, face_detected, nose_to_wrist_dist

In [141]:
best_results

(array([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]),
 array([[ 4.4302598 ,  5.79258936, -0.01529764],
        [ 4.97843194,  5.60716573, -0.03023903],
        [ 5.4227688 ,  5.474167  , -0.04531926],
        [ 5.6396736 ,  5.29133956, -0.06161126],
        [ 5.46952226,  5.95203357, -0.02546122],
        [ 6.14746077,  5.96317265, -0.03837759],
        [ 6.5033938 ,  5.98504281, -0.04724246],
        [ 6.77242064,  5.99687042, -0.05398038],
        [ 5.45199885,  6.19957023, -0.02953801],
        [ 6.20066676,  6.22105431, -0.03755009],
        [ 6.60851154,  6.23570749, -0.04424638],
        [ 6.9013028

In [142]:


def process_video(video_path, adaptive_detect_func, hand_model_path=hand_model_path, face_model_path=face_model_path,
                 min_hand_detection_confidence=0.5, min_hand_presence_confidence=0.5,
                 min_face_detection_confidence=0.5, min_face_presence_confidence=0.5,
                 num_hands=2, output_face_blendshapes=True,
                 max_attempts=3, threshold_reduction_factor=0.7, min_threshold=0.2, 
                 frame_step=1, start_time_seconds=0, end_time_seconds=None,
                 save_failure_screenshots=False):
    """
    Process a video frame-by-frame using the adaptive_detect function and save results.
    
    Args:
        video_path (str): Path to the video file
        adaptive_detect_func: The adaptive detection function to use
        min_hand_detection_confidence (float): Initial confidence threshold for hand detection
        min_hand_presence_confidence (float): Initial confidence threshold for hand presence
        min_face_detection_confidence (float): Initial confidence threshold for face detection
        min_face_presence_confidence (float): Initial confidence threshold for face presence
        num_hands (int): Maximum number of hands to detect
        dominand_hand (str): Dominant hand preference ('Left' or 'Right')
        output_face_blendshapes (bool): Whether to detect face blendshapes
        max_attempts (int): Maximum detection attempts for adaptive detection
        threshold_reduction_factor (float): Factor to reduce thresholds by
        min_threshold (float): Minimum threshold limit
        frame_step (int): Process every Nth frame (1 = all frames)
        start_time_seconds (float): Time in seconds to start processing from
        end_time_seconds (float): Time in seconds to end processing (None = process until end)
        save_failure_screenshots (bool): Save screenshots for all frames with any detection failures
        
    Returns:
        str: Path to the directory containing saved frame results
    """
    # Extract video name for directory creation
    video_path = Path(video_path)
    video_name = video_path.stem  # Get filename without extension
    
    # Extract dominant hand information from filename
    if video_name.endswith("_Right"):
        extracted_dominant_hand = "Right"
    elif video_name.endswith("_Left"):
        extracted_dominant_hand = "Left"
    else:
        # Default if not specified in filename
        extracted_dominant_hand = "Right"
        print(f"Warning: Could not determine dominant hand from filename, using default: {dominand_hand}")

    # Use the extracted dominant hand instead of the parameter
    dominand_hand = extracted_dominant_hand
    print(f"Detected dominant hand from filename: {dominand_hand}")

    # Create output directory
    output_dir = Path(f"{video_name}_landmarks")
    output_dir.mkdir(exist_ok=True)
    
    # Create screenshots directory if screenshot option is enabled
    screenshots_dir = None
    if save_failure_screenshots:
        screenshots_dir = output_dir / "failure_screenshots"
        screenshots_dir.mkdir(exist_ok=True)
    
    # Create a log file to track processing
    log_file = output_dir / "processing_log.txt"
    
    # Create a detailed statistics file
    stats_file = output_dir / "detection_statistics.json"
    
    # Initialize statistics tracking
    stats = {
        "video_info": {
            "name": video_name,
            "path": str(video_path),
            "total_frames": 0,
            "processed_frames": 0,
            "fps": 0,
            "duration_seconds": 0,
            "start_time": start_time_seconds,
            "end_time": end_time_seconds,
            "dominant_hand": dominand_hand,
            "processing_started": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "processing_completed": None
        },
        "detection_rates": {
            "dominant_hand": {
                "detected": 0,
                "failed": 0,
                "detection_rate": 0
            },
            "non_dominant_hand": {
                "detected": 0,
                "failed": 0,
                "detection_rate": 0
            },
            "face": {
                "detected": 0,
                "failed": 0,
                "detection_rate": 0
            },
            "overall": {
                "all_detected": 0,
                "partial_detections": 0,
                "no_detections": 0,
                "success_rate": 0
            }
        },
        "failed_frames": {
            "dominant_hand_failures": [],
            "non_dominant_hand_failures": [],
            "face_failures": [],
            "all_failures": []
        },
        "processing_performance": {
            "average_processing_time_ms": 0,
            "total_processing_time_seconds": 0
        }
    }
    
    with open(log_file, "w") as log:
        log.write(f"Processing video: {video_path}\n")
        log.write(f"Started at: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
        log.write(f"Parameters:\n")
        log.write(f"  - frame_step: {frame_step}\n")
        log.write(f"  - start_time: {start_time_seconds} seconds\n")
        if end_time_seconds is not None:
            log.write(f"  - end_time: {end_time_seconds} seconds\n")
        log.write(f"  - dominand_hand: {dominand_hand}\n")
        log.write(f"  - num_hands: {num_hands}\n")
        log.write(f"  - detection confidence thresholds: {min_hand_detection_confidence}, {min_face_detection_confidence}\n")
        log.write("\n--- Frame processing log ---\n")
    
    # Open the video file
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        raise ValueError(f"Could not open video file: {video_path}")
    
    # Get video properties
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    duration_seconds = total_frames / fps if fps > 0 else 0
    
    # Update stats with video info
    stats["video_info"]["total_frames"] = total_frames
    stats["video_info"]["fps"] = fps
    stats["video_info"]["duration_seconds"] = duration_seconds
    
    # Convert time to frame indices
    start_frame = int(max(0, start_time_seconds * fps))
    
    # Set end frame if specified
    if end_time_seconds is not None:
        end_frame = min(total_frames, int(end_time_seconds * fps))
    else:
        end_frame = total_frames
    
    print(f"Video: {video_name}")
    print(f"Total frames: {total_frames}")
    print(f"FPS: {fps}")
    print(f"Duration: {duration_seconds:.2f} seconds")
    print(f"Processing frames {start_frame} to {end_frame} (time {start_time_seconds:.2f}s to {end_time_seconds if end_time_seconds is not None else duration_seconds:.2f}s)")
    print(f"Output directory: {output_dir}")
    
    # Process frames
    frame_idx = 0
    processed_count = 0
    total_processing_time = 0
    
    # Skip to start_frame
    if start_frame > 0:
        cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
        frame_idx = start_frame
    
    with tempfile.TemporaryDirectory() as temp_dir:
        while frame_idx < end_frame:
            # Read the next frame
            ret, frame = cap.read()
            if not ret:
                break  # End of video
            
            # Only process every frame_step frames
            if (frame_idx - start_frame) % frame_step != 0:
                frame_idx += 1
                continue
                
            # Get timestamp in milliseconds
            timestamp_ms = int(frame_idx * 1000 / fps)
            timestamp_formatted = f"{timestamp_ms//60000:02d}m{(timestamp_ms//1000)%60:02d}s{timestamp_ms%1000:03d}ms"
            
            # Temporary frame path
            temp_frame_path = Path(temp_dir) / f"temp_frame_{frame_idx}.jpg"
            
            # Save the current frame as an image
            cv2.imwrite(str(temp_frame_path), frame)
            
            # Process the frame with adaptive_detect
            print(f"Processing frame {frame_idx}/{total_frames} (timestamp: {timestamp_formatted})")
            
            start_time = time.time()
            try:
                # Use adaptive_detect on the frame
                results = adaptive_detect_func(
                    str(temp_frame_path), hand_model_path, face_model_path,
                    min_hand_detection_confidence=min_hand_detection_confidence,
                    min_hand_presence_confidence=min_hand_presence_confidence,
                    min_face_detection_confidence=min_face_detection_confidence,
                    min_face_presence_confidence=min_face_presence_confidence,
                    num_hands=num_hands,
                    dominand_hand=dominand_hand,
                    visualize=False,
                    output_face_blendshapes=output_face_blendshapes,
                    max_attempts=max_attempts,
                    threshold_reduction_factor=threshold_reduction_factor,
                    min_threshold=min_threshold
                )
                
                # Calculate processing time
                proc_time = time.time() - start_time
                total_processing_time += proc_time
                
                # Unpack results
                dom_landmarks, non_dom_landmarks, confidence_scores, interpolation_scores, detection_status, blendshape_scores, face_detected, nose_to_wrist_dist = results
                
                # Update detection statistics
                dom_hand_detected = detection_status[0] == 1
                non_dom_hand_detected = detection_status[1] == 1
                face_was_detected = face_detected == 1
                
                if dom_hand_detected:
                    stats["detection_rates"]["dominant_hand"]["detected"] += 1
                else:
                    stats["detection_rates"]["dominant_hand"]["failed"] += 1
                    stats["failed_frames"]["dominant_hand_failures"].append({
                        "frame": frame_idx,
                        "timestamp_ms": timestamp_ms,
                        "file": f"{video_name}_frame{frame_idx:06d}_{timestamp_formatted}.npz"
                    })
                
                if non_dom_hand_detected:
                    stats["detection_rates"]["non_dominant_hand"]["detected"] += 1
                else:
                    stats["detection_rates"]["non_dominant_hand"]["failed"] += 1
                    stats["failed_frames"]["non_dominant_hand_failures"].append({
                        "frame": frame_idx,
                        "timestamp_ms": timestamp_ms,
                        "file": f"{video_name}_frame{frame_idx:06d}_{timestamp_formatted}.npz"
                    })
                
                if face_was_detected:
                    stats["detection_rates"]["face"]["detected"] += 1
                else:
                    stats["detection_rates"]["face"]["failed"] += 1
                    stats["failed_frames"]["face_failures"].append({
                        "frame": frame_idx,
                        "timestamp_ms": timestamp_ms,
                        "file": f"{video_name}_frame{frame_idx:06d}_{timestamp_formatted}.npz"
                    })
                
                # Track combined detection status
                detection_count = dom_hand_detected + non_dom_hand_detected + face_was_detected
                
                if detection_count == 3:
                    stats["detection_rates"]["overall"]["all_detected"] += 1
                elif detection_count == 0:
                    stats["detection_rates"]["overall"]["no_detections"] += 1
                    stats["failed_frames"]["all_failures"].append({
                        "frame": frame_idx,
                        "timestamp_ms": timestamp_ms,
                        "file": f"{video_name}_frame{frame_idx:06d}_{timestamp_formatted}.npz"
                    })
                else:
                    stats["detection_rates"]["overall"]["partial_detections"] += 1
                
                # Save screenshot if any detection failed and screenshots are enabled
                if save_failure_screenshots and (not dom_hand_detected or not non_dom_hand_detected or not face_was_detected):
                    # Create a detailed failure type description for the filename
                    failure_type = []
                    if not dom_hand_detected:
                        failure_type.append("DomHand")
                    if not non_dom_hand_detected:
                        failure_type.append("NonDomHand")
                    if not face_was_detected:
                        failure_type.append("Face")
                    
                    failure_str = "_".join(failure_type)
                    screenshot_filename = f"{video_name}_frame{frame_idx:06d}_{timestamp_formatted}_missing_{failure_str}.jpg"
                    screenshot_path = screenshots_dir / screenshot_filename
                    
                    # Copy the frame to the screenshots directory
                    cv2.imwrite(str(screenshot_path), frame)
                    print(f"Saved failure screenshot: {screenshot_filename}")
                
                # Create output filename with frame info
                output_filename = f"{video_name}_frame{frame_idx:06d}_{timestamp_formatted}.npz"
                output_path = output_dir / output_filename
                
                # Save all results in a single .npz file
                np.savez(
                    output_path,
                    dom_landmarks=dom_landmarks,
                    non_dom_landmarks=non_dom_landmarks,
                    confidence_scores=confidence_scores,
                    interpolation_scores=interpolation_scores,
                    detection_status=detection_status,
                    blendshape_scores=blendshape_scores,
                    face_detected=face_detected,
                    nose_to_wrist_dist=nose_to_wrist_dist,
                    frame_idx=np.array([frame_idx]),
                    timestamp_ms=np.array([timestamp_ms])
                )
                
                # Update processing log
                detection_summary = f"Dom: {detection_status[0]}, Non-dom: {detection_status[1]}, Face: {face_detected}"
                log_entry = f"Frame {frame_idx}: {detection_summary} (proc time: {proc_time:.2f}s)\n"
                
                with open(log_file, "a") as log:
                    log.write(log_entry)
                
                processed_count += 1
                
            except Exception as e:
                print(f"Error processing frame {frame_idx}: {e}")
                with open(log_file, "a") as log:
                    log.write(f"Error on frame {frame_idx}: {str(e)}\n")
            
            # Clean up temporary frame file
            if temp_frame_path.exists():
                temp_frame_path.unlink()
                
            frame_idx += 1
    
    # Close the video file
    cap.release()
    
    # Update final statistics
    stats["video_info"]["processed_frames"] = processed_count
    stats["video_info"]["processing_completed"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
    # Calculate detection rates
    if processed_count > 0:
        stats["detection_rates"]["dominant_hand"]["detection_rate"] = (
            stats["detection_rates"]["dominant_hand"]["detected"] / processed_count * 100
        )
        stats["detection_rates"]["non_dominant_hand"]["detection_rate"] = (
            stats["detection_rates"]["non_dominant_hand"]["detected"] / processed_count * 100
        )
        stats["detection_rates"]["face"]["detection_rate"] = (
            stats["detection_rates"]["face"]["detected"] / processed_count * 100
        )
        stats["detection_rates"]["overall"]["success_rate"] = (
            stats["detection_rates"]["overall"]["all_detected"] / processed_count * 100
        )
    
    # Calculate processing performance
    if processed_count > 0:
        stats["processing_performance"]["average_processing_time_ms"] = (
            total_processing_time / processed_count * 1000
        )
    stats["processing_performance"]["total_processing_time_seconds"] = total_processing_time
    
    # Save statistics to JSON file
    with open(stats_file, "w") as f:
        json.dump(stats, f, indent=2)
    
    # Add summary statistics to log file
    with open(log_file, "a") as log:
        log.write(f"\n\n===== PROCESSING SUMMARY =====\n")
        log.write(f"Completed at: {stats['video_info']['processing_completed']}\n")
        log.write(f"Frames processed: {processed_count} from {start_frame} to {min(end_frame, frame_idx-1)}\n\n")
        
        log.write("DETECTION RATES:\n")
        log.write(f"  Dominant hand ({dominand_hand}): {stats['detection_rates']['dominant_hand']['detection_rate']:.1f}%\n")
        log.write(f"  Non-dominant hand: {stats['detection_rates']['non_dominant_hand']['detection_rate']:.1f}%\n")
        log.write(f"  Face: {stats['detection_rates']['face']['detection_rate']:.1f}%\n")
        log.write(f"  All parts detected: {stats['detection_rates']['overall']['success_rate']:.1f}%\n\n")
        
        log.write("DETECTION FAILURES:\n")
        log.write(f"  Frames with dominant hand failures: {len(stats['failed_frames']['dominant_hand_failures'])}\n")
        log.write(f"  Frames with non-dominant hand failures: {len(stats['failed_frames']['non_dominant_hand_failures'])}\n")
        log.write(f"  Frames with face failures: {len(stats['failed_frames']['face_failures'])}\n")
        log.write(f"  Frames with all parts missing: {len(stats['failed_frames']['all_failures'])}\n\n")
        
        log.write("PERFORMANCE:\n")
        log.write(f"  Average processing time per frame: {stats['processing_performance']['average_processing_time_ms']:.2f} ms\n")
        log.write(f"  Total processing time: {stats['processing_performance']['total_processing_time_seconds']:.2f} seconds\n")
    
    print(f"\n===== PROCESSING SUMMARY =====")
    print(f"Processed {processed_count} frames")
    print(f"Detection rates: Dom hand: {stats['detection_rates']['dominant_hand']['detection_rate']:.1f}%, " +
          f"Non-dom hand: {stats['detection_rates']['non_dominant_hand']['detection_rate']:.1f}%, " +
          f"Face: {stats['detection_rates']['face']['detection_rate']:.1f}%")
    print(f"All parts detected in {stats['detection_rates']['overall']['success_rate']:.1f}% of frames")
    print(f"Full statistics saved to: {stats_file}")
    print(f"Results saved to: {output_dir}")
    
    return str(output_dir)




In [143]:
def load_frame_data(npz_path):
    """
    Load saved frame data from an NPZ file.
    
    Args:
        npz_path (str): Path to the saved .npz file
        
    Returns:
        tuple: All the detection results for the frame
    """
    data = np.load(npz_path)
    
    # Extract all arrays from the npz file
    dom_landmarks = data['dom_landmarks']
    non_dom_landmarks = data['non_dom_landmarks']
    confidence_scores = data['confidence_scores']
    interpolation_scores = data['interpolation_scores']
    detection_status = data['detection_status']
    blendshape_scores = data['blendshape_scores']
    face_detected = data['face_detected'].item()  # Convert 0-d array to scalar
    nose_to_wrist_dist = data['nose_to_wrist_dist']
    frame_idx = data['frame_idx'].item()
    timestamp_ms = data['timestamp_ms'].item()
    
    return (dom_landmarks, non_dom_landmarks, confidence_scores, interpolation_scores,
            detection_status, blendshape_scores, face_detected, 
            nose_to_wrist_dist, frame_idx, timestamp_ms)


In [144]:
process_video(video_path=video_path, adaptive_detect_func=adaptive_detect, hand_model_path=hand_model_path, face_model_path=face_model_path,
                 min_hand_detection_confidence=0.5, min_hand_presence_confidence=0.5,
                 min_face_detection_confidence=0.5, min_face_presence_confidence=0.5,
                 num_hands=2, output_face_blendshapes=True,
                 max_attempts=3, threshold_reduction_factor=0.7, min_threshold=0.2, 
                 frame_step=1, start_time_seconds=30.2, end_time_seconds=60.4,
                 save_failure_screenshots=True)

Detected dominant hand from filename: Right
Video: youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right
Total frames: 65
FPS: 1.0
Duration: 65.00 seconds
Processing frames 30 to 60 (time 30.20s to 60.40s)
Output directory: youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_landmarks
Processing frame 30/65 (timestamp: 00m30s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: False
New best detection: dominant hand=1, non-dominant hand=0, face=1

--- Attempt 2/3 ---
Lowering non-dominant hand thresholds: 0.350, 0.350


I0000 00:00:1742825052.746069    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825052.748711  285922 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825052.905120  285926 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825052.920770  285929 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825052.974667    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825052.977903  285963 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.974)
Non-dominant hand detected: True (confidence: 0.996)
Face detected: True
Total detection attempts: 2
Processing frame 31/65 (timestamp: 00m31s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True


I0000 00:00:1742825053.155234    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825053.157598  285995 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825053.158565    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825053.167968  285998 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825053.195131  286002 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825053.260700    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825053.263

New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.980)
Non-dominant hand detected: True (confidence: 0.998)
Face detected: True
Total detection attempts: 1
Processing frame 32/65 (timestamp: 00m32s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.966)
Non-dominant hand detected: True (confidence: 0.970)
Face detected: True
Total detection attempts: 1


I0000 00:00:1742825053.457154    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825053.461117  286043 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825053.481965  286045 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825053.502345  286047 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825053.566839    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825053.568952  286059 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

Processing frame 33/65 (timestamp: 00m33s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=0

--- Attempt 2/3 ---
Lowering face thresholds: 0.350, 0.350


I0000 00:00:1742825053.662359    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825053.665386  286075 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825053.687249  286079 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825053.720949  286078 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825053.787464    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825053.790880  286091 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

Dominant hand (Right) detected: True
Non-dominant hand detected: True
No improvement in detection. Continuing to next attempt.

--- Attempt 3/3 ---
Lowering face thresholds: 0.245, 0.245


I0000 00:00:1742825054.057932    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825054.065068  286123 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825054.066133    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825054.074088  286126 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825054.094653  286125 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825054.120179    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825054.122

Dominant hand (Right) detected: True
Non-dominant hand detected: True
No improvement in detection. Continuing to next attempt.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.997)
Non-dominant hand detected: True (confidence: 0.997)
Face detected: False
Total detection attempts: 3
Saved failure screenshot: youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_frame000033_00m33s000ms_missing_Face.jpg
Processing frame 34/65 (timestamp: 00m34s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5


I0000 00:00:1742825054.333483    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825054.337156  286155 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825054.337746    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825054.344548  286158 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825054.366307  286162 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825054.437329    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825054.442

Dominant hand (Right) detected: False
Non-dominant hand detected: True
New best detection: dominant hand=0, non-dominant hand=1, face=1

--- Attempt 2/3 ---
Lowering dominant hand thresholds: 0.350, 0.350
Dominant hand (Right) detected: True
Non-dominant hand detected: True


I0000 00:00:1742825054.973827    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825054.978387  286240 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825054.978935    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825054.984255  286250 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825055.003434  286242 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825055.066137    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5


New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.949)
Non-dominant hand detected: True (confidence: 0.993)
Face detected: True
Total detection attempts: 2
Processing frame 35/65 (timestamp: 00m35s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5


I0000 00:00:1742825055.069305  286258 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825055.163885  286264 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825055.182700  286265 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825055.299712    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825055.302991  286274 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W00

Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.905)
Non-dominant hand detected: True (confidence: 0.993)
Face detected: True
Total detection attempts: 1
Processing frame 36/65 (timestamp: 00m36s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: False
New best detection: dominant hand=1, non-dominant hand=0, face=1

--- Attempt 2/3 ---
Lowering non-dominant hand thresholds: 0.350, 0.350
Dominant hand (Right) detected: True
Non-dominant hand detected: False


I0000 00:00:1742825055.524229    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825055.528899  286306 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825055.529512    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825055.535332  286309 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825055.557954  286307 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825055.595251    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825055.598

No improvement in detection. Continuing to next attempt.

--- Attempt 3/3 ---
Lowering non-dominant hand thresholds: 0.245, 0.245
Dominant hand (Right) detected: True
Non-dominant hand detected: False
No improvement in detection. Continuing to next attempt.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.968)
Non-dominant hand detected: False (confidence: 0.000)
Face detected: True
Total detection attempts: 3
Saved failure screenshot: youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_frame000036_00m36s000ms_missing_NonDomHand.jpg


I0000 00:00:1742825055.769985    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825055.773674  286354 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825055.796141  286356 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825055.808954  286361 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825055.913043    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825055.916314  286370 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

Processing frame 37/65 (timestamp: 00m37s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=0

--- Attempt 2/3 ---
Lowering face thresholds: 0.350, 0.350


I0000 00:00:1742825056.013985    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825056.017461  286386 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825056.054586  286393 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825056.071209  286388 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825056.147362    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825056.150670  286402 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.949)
Non-dominant hand detected: True (confidence: 0.998)
Face detected: True
Total detection attempts: 2
Processing frame 38/65 (timestamp: 00m38s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True


I0000 00:00:1742825056.342296    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825056.344839  286434 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825056.345421    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825056.349651  286438 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825056.375765  286443 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825056.429841    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825056.432

New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.953)
Non-dominant hand detected: True (confidence: 0.997)
Face detected: True
Total detection attempts: 1
Processing frame 39/65 (timestamp: 00m39s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5


I0000 00:00:1742825056.630942    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825056.634580  286482 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825056.869609  286485 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825056.886141  286484 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825056.966339    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825056.969617  286498 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

Dominant hand (Right) detected: True
Non-dominant hand detected: False
New best detection: dominant hand=1, non-dominant hand=0, face=1

--- Attempt 2/3 ---
Lowering non-dominant hand thresholds: 0.350, 0.350
Dominant hand (Right) detected: True
Non-dominant hand detected: False


I0000 00:00:1742825057.154297    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825057.156455  286558 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825057.157239    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825057.162417  286561 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825057.180986  286559 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825057.214391    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825057.217

No improvement in detection. Continuing to next attempt.

--- Attempt 3/3 ---
Lowering non-dominant hand thresholds: 0.245, 0.245
Dominant hand (Right) detected: True
Non-dominant hand detected: False
No improvement in detection. Continuing to next attempt.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.993)
Non-dominant hand detected: False (confidence: 0.000)
Face detected: True
Total detection attempts: 3
Saved failure screenshot: youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_frame000039_00m39s000ms_missing_NonDomHand.jpg
Processing frame 40/65 (timestamp: 00m40s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5


I0000 00:00:1742825057.403543    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825057.407054  286606 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825057.431896  286613 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825057.447929  286607 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825057.538476    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825057.542056  286622 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

Dominant hand (Right) detected: True
Non-dominant hand detected: False
New best detection: dominant hand=1, non-dominant hand=0, face=1

--- Attempt 2/3 ---
Lowering non-dominant hand thresholds: 0.350, 0.350
Dominant hand (Right) detected: True
Non-dominant hand detected: False


I0000 00:00:1742825057.725913    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825057.728471  286654 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825057.729349    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825057.734006  286657 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825057.749987  286655 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825057.781068    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825057.783

No improvement in detection. Continuing to next attempt.

--- Attempt 3/3 ---
Lowering non-dominant hand thresholds: 0.245, 0.245
Dominant hand (Right) detected: True
Non-dominant hand detected: False
No improvement in detection. Continuing to next attempt.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.928)
Non-dominant hand detected: False (confidence: 0.000)
Face detected: True
Total detection attempts: 3
Saved failure screenshot: youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_frame000040_00m40s000ms_missing_NonDomHand.jpg
Processing frame 41/65 (timestamp: 00m41s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5


I0000 00:00:1742825057.955870    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825057.958690  286688 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825057.959331    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825057.964042  286689 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825057.979228  286691 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825058.036143    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825058.039

Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.992)
Non-dominant hand detected: True (confidence: 0.997)
Face detected: True
Total detection attempts: 1
Processing frame 42/65 (timestamp: 00m42s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True


I0000 00:00:1742825058.246934    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825058.250282  286736 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825058.274573  286738 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825058.287531  286743 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825058.363081    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825058.366434  286752 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.991)
Non-dominant hand detected: True (confidence: 0.990)
Face detected: True
Total detection attempts: 1
Processing frame 43/65 (timestamp: 00m43s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True


I0000 00:00:1742825058.577789    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825058.579847  286784 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825058.580459    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825058.585595  286788 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825058.602220  286791 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.990)
Non-dominant hand detected: True (confidence: 0.992)
Face detected: True
Total detection attempts: 1
Processing frame 44/65 (timestamp: 00m44s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5


I0000 00:00:1742825058.653962    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825058.658841  286800 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825058.688171  286808 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825058.845580  286805 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825058.929897    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825058.933733  286816 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.988)
Non-dominant hand detected: True (confidence: 0.988)
Face detected: True
Total detection attempts: 1
Processing frame 45/65 (timestamp: 00m45s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True


I0000 00:00:1742825059.023598  286832 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825059.044596  286834 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825059.064032  286835 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825059.153523    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825059.157401  286858 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W00

New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.992)
Non-dominant hand detected: True (confidence: 0.990)
Face detected: True
Total detection attempts: 1
Processing frame 46/65 (timestamp: 00m46s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.988)
Non-dominant hand detected: True (confidence: 0.985)
Face detected: True
Total detection attempts: 1


I0000 00:00:1742825059.339292    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825059.342449  286907 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825059.343147    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825059.348777  286911 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825059.364177  286919 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825059.422118    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825059.426

Processing frame 47/65 (timestamp: 00m47s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=0

--- Attempt 2/3 ---
Lowering face thresholds: 0.350, 0.350


I0000 00:00:1742825059.587978    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825059.593389  286939 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825059.594409    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825059.600327  286941 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825059.639542  286946 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825059.667441    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825059.671

Dominant hand (Right) detected: True
Non-dominant hand detected: True
No improvement in detection. Continuing to next attempt.

--- Attempt 3/3 ---
Lowering face thresholds: 0.245, 0.245
Dominant hand (Right) detected: True
Non-dominant hand detected: True


I0000 00:00:1742825059.863260    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825059.866336  286987 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825059.886584  286995 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825059.902265  286999 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825059.986809    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825059.990534  287003 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

No improvement in detection. Continuing to next attempt.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.978)
Non-dominant hand detected: True (confidence: 0.975)
Face detected: False
Total detection attempts: 3
Saved failure screenshot: youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_frame000047_00m47s000ms_missing_Face.jpg
Processing frame 48/65 (timestamp: 00m48s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True


I0000 00:00:1742825060.208631    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825060.211819  287035 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825060.212313    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825060.217318  287039 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825060.235988  287041 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825060.287768    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825060.290

New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.966)
Non-dominant hand detected: True (confidence: 0.993)
Face detected: True
Total detection attempts: 1
Processing frame 49/65 (timestamp: 00m49s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True


I0000 00:00:1742825060.417789    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825060.421599  287067 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825060.422657    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825060.430162  287071 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825060.445569  287069 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825060.496085    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825060.499

New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.970)
Non-dominant hand detected: True (confidence: 0.994)
Face detected: True
Total detection attempts: 1
Processing frame 50/65 (timestamp: 00m50s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.962)
Non-dominant hand detected: True (confidence: 0.996)
Face detected: True
Total detection attempts: 1
Processing frame 51/65 (timestamp: 00m51s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: False
New best detection: dominant hand=1, non-dominant hand=0, fac

I0000 00:00:1742825060.692582    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825060.695545  287115 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825060.720297  287118 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825060.739717  287117 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825060.832222    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825060.836061  287131 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

Dominant hand (Right) detected: True
Non-dominant hand detected: False
No improvement in detection. Continuing to next attempt.

--- Attempt 3/3 ---
Lowering non-dominant hand thresholds: 0.245, 0.245
Dominant hand (Right) detected: True
Non-dominant hand detected: True


I0000 00:00:1742825061.049395    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825061.051395  287163 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825061.051904    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825061.061456  287164 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825061.078009  287170 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825061.114608    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825061.118

New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.988)
Non-dominant hand detected: True (confidence: 0.556)
Face detected: True
Total detection attempts: 3
Processing frame 52/65 (timestamp: 00m52s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: False
New best detection: dominant hand=1, non-dominant hand=0, face=1

--- Attempt 2/3 ---
Lowering non-dominant hand thresholds: 0.350, 0.350


I0000 00:00:1742825061.351060    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825061.354418  287238 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825061.378016  287243 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825061.393709  287248 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825061.441881    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825061.444695  287254 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.909)
Non-dominant hand detected: True (confidence: 0.983)
Face detected: True
Total detection attempts: 2
Processing frame 53/65 (timestamp: 00m53s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True


I0000 00:00:1742825061.632487    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825061.634836  287286 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825061.635320    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825061.640775  287289 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825061.665220  287287 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825061.720292    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825061.723

New best detection: dominant hand=1, non-dominant hand=1, face=0

--- Attempt 2/3 ---
Lowering face thresholds: 0.350, 0.350
Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.980)
Non-dominant hand detected: True (confidence: 0.992)
Face detected: True
Total detection attempts: 2
Processing frame 54/65 (timestamp: 00m54s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5


I0000 00:00:1742825062.084162    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825062.087679  287350 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825062.088192    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825062.095172  287352 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825062.111596  287356 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825062.163710    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825062.166

Dominant hand (Right) detected: True
Non-dominant hand detected: False
New best detection: dominant hand=1, non-dominant hand=0, face=0

--- Attempt 2/3 ---
Lowering non-dominant hand thresholds: 0.350, 0.350
Lowering face thresholds: 0.350, 0.350
Dominant hand (Right) detected: True
Non-dominant hand detected: False


I0000 00:00:1742825062.361683    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825062.364260  287398 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825062.394221  287400 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825062.411820  287404 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825062.481787    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825062.483850  287414 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

No improvement in detection. Continuing to next attempt.

--- Attempt 3/3 ---
Lowering non-dominant hand thresholds: 0.245, 0.245
Lowering face thresholds: 0.245, 0.245
Dominant hand (Right) detected: True
Non-dominant hand detected: False


I0000 00:00:1742825062.581055    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825062.585991  287430 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825062.629586  287438 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825062.650439  287434 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825062.734926    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825062.739518  287446 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

No improvement in detection. Continuing to next attempt.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.967)
Non-dominant hand detected: False (confidence: 0.000)
Face detected: False
Total detection attempts: 3
Saved failure screenshot: youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_frame000054_00m54s000ms_missing_NonDomHand_Face.jpg
Processing frame 55/65 (timestamp: 00m55s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True


I0000 00:00:1742825062.847161    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825062.851309  287462 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825062.879209  287464 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825062.895742  287465 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825062.975950    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825062.978931  287478 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.934)
Non-dominant hand detected: True (confidence: 0.984)
Face detected: True
Total detection attempts: 1
Processing frame 56/65 (timestamp: 00m56s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True


I0000 00:00:1742825063.050091  287494 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825063.073890  287498 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825063.088094  287501 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825063.183553    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825063.185874  287510 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W00

New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.949)
Non-dominant hand detected: True (confidence: 0.983)
Face detected: True
Total detection attempts: 1
Processing frame 57/65 (timestamp: 00m57s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.955)
Non-dominant hand detected: True (confidence: 0.972)
Face detected: True
Total detection attempts: 1


I0000 00:00:1742825063.387805    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825063.392395  287547 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825063.393567    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825063.398016  287548 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825063.417266  287549 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825063.460876    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825063.463

Processing frame 58/65 (timestamp: 00m58s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=0

--- Attempt 2/3 ---
Lowering face thresholds: 0.350, 0.350


I0000 00:00:1742825063.599401  287602 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825063.599870    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825063.603851  287605 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825063.620698  287604 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825063.655245    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825063.660258  287618 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76a

Dominant hand (Right) detected: True
Non-dominant hand detected: True
No improvement in detection. Continuing to next attempt.

--- Attempt 3/3 ---
Lowering face thresholds: 0.245, 0.245
Dominant hand (Right) detected: True
Non-dominant hand detected: True


I0000 00:00:1742825063.848924    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825063.852334  287650 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825063.874700  287654 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825063.890546  287652 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825063.985541    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825063.987831  287666 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

No improvement in detection. Continuing to next attempt.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.947)
Non-dominant hand detected: True (confidence: 0.712)
Face detected: False
Total detection attempts: 3
Saved failure screenshot: youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_frame000058_00m58s000ms_missing_Face.jpg
Processing frame 59/65 (timestamp: 00m59s000ms)

--- Attempt 1/3 ---
Using original thresholds: hands=0.5, face=0.5
Dominant hand (Right) detected: False
Non-dominant hand detected: True
New best detection: dominant hand=0, non-dominant hand=1, face=1

--- Attempt 2/3 ---
Lowering dominant hand thresholds: 0.350, 0.350


I0000 00:00:1742825064.067506    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825064.070320  287682 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825064.097727  287684 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825064.113356  287688 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1742825064.178247    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825064.180623  287698 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy

Dominant hand (Right) detected: True
Non-dominant hand detected: True
New best detection: dominant hand=1, non-dominant hand=1, face=1
All body parts detected. Stopping early.

=== Detection Summary ===
Dominant hand detected: True (confidence: 0.979)
Non-dominant hand detected: True (confidence: 0.992)
Face detected: True
Total detection attempts: 2

===== PROCESSING SUMMARY =====
Processed 30 frames
Detection rates: Dom hand: 100.0%, Non-dom hand: 86.7%, Face: 86.7%
All parts detected in 0.0% of frames
Full statistics saved to: youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_landmarks/detection_statistics.json
Results saved to: youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_landmarks


I0000 00:00:1742825064.376159    4103 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742825064.378534  287730 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.3~git2407250600.76ae27~oibaf~j (git-76ae27e 2024-07-25 jammy-oibaf-ppa)), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 15.0.7, DRM 3.42, 5.15.0-131-generic)
W0000 00:00:1742825064.379035    4103 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1742825064.383217  287731 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742825064.400715  287739 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


'youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_landmarks'

In [145]:
lol = load_frame_data("youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_landmarks/youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_frame000036_00m36s000ms.npz")

In [146]:
lol

(array([[ 3.78225894e-01, -1.12051611e+00,  5.50700526e-04],
        [ 9.23391210e-01, -1.69236467e+00,  2.34009814e-03],
        [ 1.46009249e+00, -1.71132881e+00,  4.30999184e-03],
        [ 1.74048902e+00, -1.47164944e+00,  7.10542547e-03],
        [ 1.49123479e+00, -8.30840850e-01,  2.45598587e-03],
        [ 2.06346240e+00, -1.09711661e+00,  6.16572937e-03],
        [ 2.29248242e+00, -1.23134490e+00,  1.00170448e-02],
        [ 2.45311100e+00, -1.34790413e+00,  1.21544152e-02],
        [ 1.50366696e+00, -9.97311730e-02,  2.38017133e-03],
        [ 2.11294367e+00, -5.01373676e-01,  5.33142872e-03],
        [ 2.35172676e+00, -7.40329178e-01,  7.30238575e-03],
        [ 2.48626698e+00, -9.21343310e-01,  8.04091245e-03],
        [ 1.42495309e+00,  4.93358436e-01,  2.41103559e-03],
        [ 2.01319802e+00,  1.53102934e-01,  3.15418793e-03],
        [ 2.29482777e+00, -9.26280119e-02,  3.09186196e-03],
        [ 2.46820044e+00, -3.51395239e-01,  2.83384765e-03],
        [ 1.27861771e+00

In [147]:
def find_interpolation_frames(x, nums_list):
    """
    Returns integers in the range [x-5, x+5] that are not equal to x
    and are not in nums_list.
    
    Args:
        x (int): The reference integer
        nums_list (list): A list of integers
        
    Returns:
        list: Integers in [x-5, x+5] excluding x and elements in nums_list
    """
    # Create the set of all integers in the range [x-5, x+5]
    all_range = set(range(x-5, x+6))  # +6 because range is exclusive at upper bound
    
    # Remove x itself
    all_range.discard(x)
    
    # Remove numbers that are in the input list
    result = all_range - set(nums_list)
    
    # Convert back to a list and return
    return sorted(list(result))

In [148]:
import glob


def find_file_with_partial_name(partial_name, search_dir='.', recursive=False):
    """
    Find files that start with the given partial name.
    
    Args:
        partial_name (str): Partial file name to match
        search_dir (str): Directory to search in (default: current directory)
        recursive (bool): Whether to search in subdirectories
        
    Returns:
        list: Complete paths of all matching files
    """
    # Create a search pattern for files starting with the partial name
    search_pattern = os.path.join(search_dir, f"{partial_name}*")
    
    # Use recursive glob if requested
    if recursive:
        matches = []
        for root, _, _ in os.walk(search_dir):
            matches.extend(glob.glob(os.path.join(root, f"{os.path.basename(partial_name)}*")))
        return matches
    else:
        return glob.glob(search_pattern)

In [149]:
def has_numbers_on_both_sides(x, missing_numbers):
    """
    Checks if the list of missing numbers has at least one number smaller than x
    AND at least one number larger than x.
    
    Args:
        x (int): The reference integer
        missing_numbers (list): Output from find_missing_numbers(x, nums_list)
        
    Returns:
        bool: False if all numbers are either all smaller or all larger than x.
              True if there's at least one smaller and one larger number.
    """
    has_smaller = False
    has_larger = False
    
    for num in missing_numbers:
        if num < x:
            has_smaller = True
        elif num > x:
            has_larger = True
            
        # Early exit if we found both smaller and larger numbers
        if has_smaller and has_larger:
            return True
    
    # If we get here, we didn't find both smaller and larger numbers
    return False

In [150]:


def modify_npz_file(file_path, modifications):
    """
    Load a .npz file, modify specific arrays, and save it back.
    
    Args:
        file_path (str): Path to the .npz file
        modifications (dict): Dictionary with keys as array names and values as new arrays
                             or functions that take the original array and return a modified version
    
    Example:
        modify_npz_file('data.npz', {
            'array1': np.array([1, 2, 3]),  # Replace completely
            'array2': lambda arr: arr * 2    # Modify using a function
        })
    """
    # Load the npz file
    with np.load(file_path) as data:
        # Create a copy of all arrays
        arrays = {name: data[name] for name in data.files}
    
    # Apply modifications
    for name, modification in modifications.items():
        if name in arrays:
            if callable(modification):
                # If the modification is a function, apply it to the original array
                arrays[name] = modification(arrays[name])
            else:
                # Otherwise, replace the array
                arrays[name] = modification
        else:
            print(f"Warning: Array '{name}' not found in the original file")
    
    # Save back to the file with same format
    np.savez(file_path, **arrays)
    
    print(f"Successfully modified and saved {len(modifications)} arrays in {file_path}")

In [None]:
def interpolate_undetected_hand_landmarks(directory_path):  
    """
    Interpolate landmarks for frames where hand detection failed.
    """
    print(f"Starting interpolation for directory: {directory_path}")
    
    # Load detection statistics JSON
    with open(os.path.join(directory_path, 'detection_statistics.json')) as f:
        data = json.load(f)
    
    first_frame_number = round(data['video_info']['fps'] * data['video_info']['start_time'])
    final_frame_number = round(data['video_info']['fps'] * data['video_info']['end_time'])
    
    print(f"Processing frames range: {first_frame_number} to {final_frame_number}")
    
    # Maximum possible sum of weights for normalization (when all 10 frames are available)
    MAX_WEIGHT_SUM = 2.92722222
    
    # Process non-dominant hand failures
    print("Processing non-dominant hand failures...")
    missing_non_dominant_frame_list = [frame['frame'] for frame in data['failed_frames']['non_dominant_hand_failures']]
    
    non_dom_interpolated_count = 0
    
    for missing_frame in data['failed_frames']['non_dominant_hand_failures']:
        frame_number = missing_frame['frame']
        filepath = missing_frame['file']
        
        # Only interpolate frames not at the edges of the video
        if (frame_number - 5) <= first_frame_number or (frame_number + 5) >= final_frame_number:
            print(f"Skipping frame {frame_number} - too close to video boundary")
            continue
        
        # Find frames with valid detections for interpolation
        interpolation_frames = find_interpolation_frames(frame_number, missing_non_dominant_frame_list)
        
        if not interpolation_frames:
            print(f"No valid frames found for interpolating frame {frame_number}")
            continue
        
        # Calculate interpolated landmarks
        interpolation_weights_sum = 0
        interpolated_coordinates = np.zeros(shape=(20, 3))
        interpolated_wrist_to_nost = np.zeros(shape=(1, 2))
        
        for interp_frame in interpolation_frames:
            weight = 1 / ((frame_number - interp_frame) ** 2)
            interpolation_weights_sum += weight
            
            # Find and load the reference frame
            interp_partial_filename = data['video_info']['name'] + f"_frame{interp_frame:06d}"
            try:
                interp_files = find_file_with_partial_name(
                    interp_partial_filename, 
                    search_dir=directory_path, 
                    recursive=False
                )
                
                if not interp_files:
                    print(f"Warning: Could not find file for frame {interp_frame}")
                    continue
                    
                interp_filepath = interp_files[0]
                
                # Load the frame data - index 1 for non-dominant hand landmarks
                frame_data = load_frame_data(interp_filepath)
                non_dom_landmarks = frame_data[1]  # Correct index for non-dominant hand
                
                
                # Add weighted contribution
                interpolated_coordinates += weight * non_dom_landmarks
                
            except Exception as e:
                print(f"Error processing frame {interp_frame}: {e}")
                continue
        
        # Normalize by sum of weights (crucial step!)
        if interpolation_weights_sum > 0:
            interpolated_coordinates /= interpolation_weights_sum
            
            # Calculate confidence based on weights and frame distribution
            has_frames_on_both_sides = has_numbers_on_both_sides(frame_number, interpolation_frames)
            
            if has_frames_on_both_sides:
                interpolation_confidence = interpolation_weights_sum / MAX_WEIGHT_SUM
            else:
                interpolation_confidence = (interpolation_weights_sum / MAX_WEIGHT_SUM) * 0.8
                
            print(f"Frame {frame_number}: Interpolated with confidence {interpolation_confidence:.2f}")
            
            # Update the file with interpolated data
            def update_interp_scores(arr):
                new_arr = arr.copy()
                new_arr[1] = interpolation_confidence  # Index 1 for non-dominant hand
                return new_arr
                
            modifications = {
                'non_dom_landmarks': interpolated_coordinates,
                'interpolation_scores': update_interp_scores
            }
            
            modify_npz_file(
                file_path=os.path.join(directory_path, filepath),
                modifications=modifications
            )
            
            non_dom_interpolated_count += 1
    
    # Process dominant hand failures
    print(f"Interpolated {non_dom_interpolated_count} non-dominant hand frames")
    print("Processing dominant hand failures...")
    
    missing_dominant_frame_list = [frame['frame'] for frame in data['failed_frames']['dominant_hand_failures']]
    
    dom_interpolated_count = 0
    
    for missing_frame in data['failed_frames']['dominant_hand_failures']:
        frame_number = missing_frame['frame']
        filepath = missing_frame['file']
        
        # Only interpolate frames not at the edges of the video
        if (frame_number - 5) <= first_frame_number or (frame_number + 5) >= final_frame_number:
            continue
        
        # Find frames with valid detections for interpolation
        interpolation_frames = find_interpolation_frames(frame_number, missing_dominant_frame_list)
        
        if not interpolation_frames:
            continue
        
        # Calculate interpolated landmarks
        interpolation_weights_sum = 0
        interpolated_coordinates = np.zeros(shape=(20, 3))
        
        for interp_frame in interpolation_frames:
            weight = 1 / ((frame_number - interp_frame) ** 2)
            interpolation_weights_sum += weight
            
            # Find and load the reference frame
            interp_partial_filename = data['video_info']['name'] + f"_frame{interp_frame:06d}"
            try:
                interp_files = find_file_with_partial_name(
                    interp_partial_filename, 
                    search_dir=directory_path, 
                    recursive=False
                )
                
                if not interp_files:
                    continue
                    
                interp_filepath = interp_files[0]
                
                # Load the frame data - index 0 for dominant hand landmarks
                frame_data = load_frame_data(interp_filepath)
                dom_landmarks = frame_data[0]  # Correct index for dominant hand
                
                # Add weighted contribution
                interpolated_coordinates += weight * dom_landmarks
                
            except Exception as e:
                print(f"Error processing frame {interp_frame}: {e}")
                continue
        
        # Normalize by sum of weights
        if interpolation_weights_sum > 0:
            interpolated_coordinates /= interpolation_weights_sum
            
            # Calculate confidence based on weights and frame distribution
            has_frames_on_both_sides = has_numbers_on_both_sides(frame_number, interpolation_frames)
            
            if has_frames_on_both_sides:
                interpolation_confidence = interpolation_weights_sum / MAX_WEIGHT_SUM
            else:
                interpolation_confidence = (interpolation_weights_sum / MAX_WEIGHT_SUM) * 0.8
            
            # Update the file with interpolated data
            def update_interp_scores(arr):
                new_arr = arr.copy()
                new_arr[0] = interpolation_confidence  # Index 0 for dominant hand
                return new_arr
                
            modifications = {
                'dom_landmarks': interpolated_coordinates,
                'interpolation_scores': update_interp_scores
            }
            
            modify_npz_file(
                file_path=os.path.join(directory_path, filepath),
                modifications=modifications
            )
            
            dom_interpolated_count += 1
    
    print(f"Interpolated {dom_interpolated_count} dominant hand frames")
    print(f"Total interpolated: {non_dom_interpolated_count + dom_interpolated_count} frames")
    
    return non_dom_interpolated_count + dom_interpolated_count

In [152]:
interpolate_undetected_hand_landmarks(directory_path="youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_landmarks")

Starting interpolation for directory: youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_landmarks
Processing frames range: 30 to 60
Processing non-dominant hand failures...
Frame 36: Interpolated with confidence 0.94
Successfully modified and saved 2 arrays in youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_landmarks/youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_frame000036_00m36s000ms.npz
Frame 39: Interpolated with confidence 0.62
Successfully modified and saved 2 arrays in youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_landmarks/youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_frame000039_00m39s000ms.npz
Frame 40: Interpolated with confidence 0.64
Successfully modified and saved 2 arrays in youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_landmarks/youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_frame000040_00m40s000ms.npz
Frame 54: Interpolated with confidence 1.00
Successfully modified and saved 2 arrays in youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_R

4

In [156]:
lol = load_frame_data("youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_landmarks/youtube_DNViaspA8hM_1920x1080_h264_fps10_fps1_Right_frame000036_00m36s000ms.npz")

dom_landmarks, non_dom_landmarks, confidence_scores, interpolation_scores, detection_status, blendshape_scores, face_detected, nose_to_wrist_dist