python UI for make dataset

In [1]:
import tkinter as tk
from tkinter import ttk
import cv2
import numpy as np
from PIL import Image, ImageTk
import pyautogui
from ultralytics import YOLO
from huggingface_hub import hf_hub_download
import random
import os
from datetime import datetime
from gaze_tracking import GazeTracking
import csv
from cvzone.FaceMeshModule import FaceMeshDetector
import screeninfo


class YoloDistanceApp:
    def __init__(self, root):
        self.root = root
        self.root.title("YOLO Face Detection with Distance Measurement")

        # Get screen resolution before initializing other components
        screen_size = pyautogui.size()
        self.screen_width = screen_size[0]
        self.screen_height = screen_size[1]

        # Initialize trackers and detectors
        self.gaze = GazeTracking()
        self.fm = FaceMeshDetector(maxFaces=1)

        # Define eye landmark points
        self.rightEye = {
            'upper': [463, 414, 286, 258, 257, 259, 260],
            'lower': [467, 359, 255, 339, 254, 253, 252, 256, 341]
        }

        self.leftEye = {
            'upper': [130, 247, 30, 29, 27, 28, 56],
            'lower': [190, 243, 112, 26, 22, 23, 24, 110, 25]
        }

        self.output_dir = "output_from_capture"
        os.makedirs(self.output_dir, exist_ok=True)
        self.file_counter = 1
        self.update_next_available_number()

        # Initialize YOLO model
        model_path = hf_hub_download(
            repo_id="arnabdhar/YOLOv8-Face-Detection",
            filename="model.pt"
        )
        self.model_face = YOLO(model_path)
        self.model_face.conf = 0.25
        self.model_face.iou = 0.45

        # Initialize parameters
        self.current_parameters = {
            'left_eye_position': None,
            'right_eye_position': None,
            'gaze_direction': None,
            'distance': None,
            'posture': None,
            'dot_position_x': None,
            'dot_position_y': None,
            'webcam_resolution_width': None,
            'webcam_resolution_height': None,
            'screen_resolution_width': self.screen_width,
            'screen_resolution_height': self.screen_height,
            'preview_width': None,
            'preview_height': None,
            'canvas_width': None,
            'canvas_height': None,
            'face_size_width': None,
            'face_size_hight': None,
            'face_min_position': None,
            'face_max_position': None,
            'face_center_position': None,
            'right_eye_bbox': None,
            'left_eye_bbox': None
        }

        self.original_canvas_width = 1728
        self.original_canvas_height = 1117
        self.switch_parameterOnScreen = False
        self.position_left_label = None
        self.position_right_label = None
        self.gaze_label = None
        self.canvas_normal_state = True
        self.controls_visible = True
        self.focal_length = 1000
        self.FACE_WIDTH_CM = 14
        self.preview_active = False
        self.webcam_width = 640
        self.webcam_height = 360
        self.current_dot = None
        self.capture_counter = 0

        self.setup_gui()

        # Initialize webcam
        self.cap = cv2.VideoCapture(1)
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

        # Start preview update
        self.update_preview()

    def setup_gui(self):
        # Top bar with dark background for better visibility
        self.top_bar = tk.Frame(self.root, bg='#2C3E50')
        self.top_bar.pack(fill='x')

        # Controls frame
        self.controls_frame = tk.Frame(self.top_bar, bg='#2C3E50')
        self.controls_frame.pack(fill='x', padx=5, pady=5)

        # Create a single hideable container for all elements
        self.all_hideable_content = tk.Frame(self.controls_frame, bg='#2C3E50')
        self.all_hideable_content.pack(side=tk.LEFT, fill='x', expand=True)

        # Buttons section with input
        self.buttons_frame = tk.Frame(self.all_hideable_content, bg='#2C3E50')
        self.buttons_frame.pack(fill='x', padx=5)

        # Random times input
        self.random_times_frame = tk.Frame(self.buttons_frame, bg='#2C3E50')
        self.random_times_frame.pack(side=tk.LEFT, padx=5)

        self.random_times_label = tk.Label(
            self.random_times_frame,
            text="Times:",
            font=('Arial', 12),
            bg='#2C3E50',
            fg='white'
        )
        self.random_times_label.pack(side=tk.LEFT, padx=2)

        self.random_times_entry = ttk.Entry(
            self.random_times_frame,
            width=5
        )
        self.random_times_entry.insert(0, "1")
        self.random_times_entry.pack(side=tk.LEFT, padx=2)

        # Delay input
        self.delay_frame = tk.Frame(self.buttons_frame, bg='#2C3E50')
        self.delay_frame.pack(side=tk.LEFT, padx=5)

        self.delay_label = tk.Label(
            self.delay_frame,
            text="Delay(s):",
            font=('Arial', 12),
            bg='#2C3E50',
            fg='white'
        )
        self.delay_label.pack(side=tk.LEFT, padx=2)

        self.delay_entry = ttk.Entry(
            self.delay_frame,
            width=5
        )
        self.delay_entry.insert(0, "3")
        self.delay_entry.pack(side=tk.LEFT, padx=2)

        # Screen selection
        self.selected_screen = tk.StringVar()
        self.screens = screeninfo.get_monitors()

        self.screen_selection_frame = tk.Frame(
            self.buttons_frame, bg='#2C3E50')
        self.screen_selection_frame.pack(side=tk.LEFT, padx=5)

        self.screen_label = tk.Label(
            self.screen_selection_frame,
            text="Screen:",
            font=('Arial', 12),
            bg='#2C3E50',
            fg='white'
        )
        self.screen_label.pack(side=tk.LEFT, padx=2)

        self.screen_dropdown = ttk.Combobox(
            self.screen_selection_frame,
            textvariable=self.selected_screen,
            width=15
        )
        self.screen_dropdown['values'] = [f"Screen {i+1}: {s.width}x{s.height}"
                                          for i, s in enumerate(self.screens)]
        if self.screen_dropdown['values']:
            self.screen_dropdown.set(self.screen_dropdown['values'][0])
        self.screen_dropdown.pack(side=tk.LEFT, padx=2)

        # Action Buttons
        self.set_random_button = ttk.Button(
            self.buttons_frame,
            text="Set Random",
            command=self.set_random_parameters
        )
        self.set_random_button.pack(side=tk.LEFT, padx=5)

        self.random_button = ttk.Button(
            self.buttons_frame,
            text="Random Dot",
            command=self.draw_random_dot_and_capture
        )
        self.random_button.pack(side=tk.LEFT, padx=5)

        self.clear_button = ttk.Button(
            self.buttons_frame,
            text="Clear All",
            command=self.clear_all
        )
        self.clear_button.pack(side=tk.LEFT, padx=5)

        # Show/Hide Preview switch
        self.preview_var = tk.BooleanVar(value=False)
        self.preview_switch = ttk.Checkbutton(
            self.buttons_frame,
            text="Show Preview",
            variable=self.preview_var,
            command=self.toggle_preview
        )
        self.preview_switch.pack(side=tk.LEFT, padx=5)

        # Parameters toggle switch
        self.parameter_var = tk.BooleanVar(value=False)
        self.parameter_switch = ttk.Checkbutton(
            self.buttons_frame,
            text="Show Parameters",
            variable=self.parameter_var,
            command=self.toggle_parameters
        )
        self.parameter_switch.pack(side=tk.LEFT, padx=5)

        # Status section (in the same hideable container)
        self.status_frame = tk.Frame(self.all_hideable_content, bg='#2C3E50')
        self.status_frame.pack(fill='x', padx=5, pady=5)

        # Status Row 1
        self.status_row1 = tk.Frame(self.status_frame, bg='#2C3E50')
        self.status_row1.pack(fill='x', pady=2)

        # self.countdown_label = tk.Label(
        #     self.status_row1,
        #     text="",
        #     font=('Arial', 26, 'bold'),
        #     bg='#2C3E50',
        #     fg='white'
        # )
        # self.countdown_label.pack(side=tk.LEFT, padx=5)

        # Top bar with dark background for better visibility
        self.top_bar = tk.Frame(self.root, bg='#2C3E50')
        self.top_bar.pack(fill='x')

        self.distance_label = tk.Label(
            self.status_row1,
            text="Distance: -- cm",
            font=('Arial', 12),
            bg='#2C3E50',
            fg='white'
        )
        self.distance_label.pack(side=tk.LEFT, padx=5)

        self.posture_label = tk.Label(
            self.status_row1,
            text="Posture: --",
            font=('Arial', 12),
            bg='#2C3E50',
            fg='white'
        )
        self.posture_label.pack(side=tk.LEFT, padx=5)

        self.countdown_label = tk.Label(
            self.status_row1,
            text="",
            font=('Arial', 36, 'bold'),
            bg='#2C3E50',
            fg='white'
        )
        self.countdown_label.pack(side=tk.LEFT, padx=5)

        # Status Row 2
        self.status_row2 = tk.Frame(self.status_frame, bg='#2C3E50')
        self.status_row2.pack(fill='x', pady=2)

        self.position_left_label = tk.Label(
            self.status_row2,
            text="left: (--, --)",
            font=('Arial', 12),
            bg='#2C3E50',
            fg='white'
        )
        self.position_left_label.pack(side=tk.LEFT, padx=5)

        self.position_right_label = tk.Label(
            self.status_row2,
            text="right: (--, --)",
            font=('Arial', 12),
            bg='#2C3E50',
            fg='white'
        )
        self.position_right_label.pack(side=tk.LEFT, padx=5)

        self.gaze_label = tk.Label(
            self.status_row2,
            text="Gaze: --",
            font=('Arial', 12),
            bg='#2C3E50',
            fg='white'
        )
        self.gaze_label.pack(side=tk.LEFT, padx=5)

        self.signal_label = tk.Label(
            self.status_row1,
            text="",
            font=('Arial', 36, 'bold'),
            bg='#2C3E50',
            fg='white'
        )
        self.signal_label.pack(side=tk.RIGHT, padx=5)

        # Toggle controls button (outside hideable container)
        self.toggle_controls_btn = ttk.Button(
            self.controls_frame,
            text="≡",
            width=3,
            command=self.toggle_controls
        )
        self.toggle_controls_btn.pack(side=tk.RIGHT, padx=5)

        # Main container with padding
        self.main_container = tk.Frame(self.root)
        self.main_container.pack(expand=True, fill='both', padx=10, pady=10)

        # Canvas section
        self.canvas_frame = tk.Frame(self.main_container)
        self.canvas_frame.pack(expand=True, fill='both')

        self.canvas = tk.Canvas(
            self.canvas_frame,
            width=self.original_canvas_width,
            height=self.original_canvas_height,
            bg='white'
        )
        self.canvas.pack(expand=True, fill='both')

        # label oin the reg dot
        self.dot_countdown_label = tk.Label(
            self.canvas,
            text="",
            font=('Arial', 32, 'bold'),
            fg='red',
            bg='white'
        )
        # Preview frame
        self.preview_frame = tk.Frame(self.main_container)

        # Preview labels
        self.webcam_label = tk.Label(self.preview_frame)
        self.screen_label = tk.Label(self.preview_frame)

    def toggle_preview(self):
        """Toggle visibility of the preview panel"""
        if self.preview_var.get():
            # Show preview
            # self.switch_parameterOnScreen = True
            self.canvas_normal_state = False
            self.preview_frame.place(
                in_=self.canvas_frame,  # Place relative to canvas frame
                relx=0.5,              # Center horizontally
                rely=0.5,              # Center vertically
                anchor='center'        # Center anchor point
            )
            # self.preview_frame.place(
            #     relx=1.0,  # Right align
            #     rely=0,    # Top align
            #     anchor='ne',  # North-east anchor
            #     y=10,      # Small padding from top
            #     x=-10      # Small padding from right
            # )
            self.webcam_label.pack(side=tk.LEFT, padx=5)
            self.screen_label.pack(side=tk.LEFT, padx=5)
        else:
            # Hide preview
            self.parameter_var.set(False)
            self.switch_parameterOnScreen = False

            self.canvas_normal_state = True
            self.preview_frame.place_forget()
            self.webcam_label.pack_forget()
            self.screen_label.pack_forget()

    def save_capture(self, frame, screen, timestamp):
        cv2.imwrite(
            f"{self.output_dir}/webcam_{timestamp}.jpg",
            cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        )
        screen.save(f"{self.output_dir}/screen_{timestamp}.jpg")
        print(f"Images saved with timestamp: {timestamp}")

    def toggle_controls(self, force_hide=False):
        if self.controls_visible or force_hide:
            self.all_hideable_content.pack_forget()
            self.toggle_controls_btn.configure(text="≡")
            self.controls_visible = False
        else:
            self.all_hideable_content.pack(side=tk.LEFT, fill='x', expand=True)
            self.toggle_controls_btn.configure(text="×")
            self.controls_visible = True

    def get_eye_bbox(self, landmarks, eye_points):
        point_array = np.array([landmarks[idx]
                               for group in eye_points.values() for idx in group])
        x_min = int(np.min(point_array[:, 0])) - 10
        x_max = int(np.max(point_array[:, 0])) + 10
        y_min = int(np.min(point_array[:, 1])) - 10
        y_max = int(np.max(point_array[:, 1])) + 10
        return (x_min, y_min), (x_max, y_max)

    def measure_distance(self, face_width_pixels):
        return (self.FACE_WIDTH_CM * self.focal_length) / face_width_pixels

    def update_y_position(self, y_position):
        y_position += 54
        return y_position

    def process_frame(self, frame):
        # Update gaze tracking
        self.gaze.refresh(frame)

        # YOLO face detection
        results = self.model_face.predict(
            frame,
            conf=0.25,
            iou=0.45,
            max_det=1,
            classes=[0],
            verbose=False
        )

        # Face mesh detection
        _, faces = self.fm.findFaceMesh(frame, draw=0)

        # Get clean frame for non-parameter view
        clean_frame = frame.copy()

        # Process gaze direction
        gaze_text = "Gaze not detected"
        if self.gaze.is_right():
            gaze_text = "Looking right"
        elif self.gaze.is_left():
            gaze_text = "Looking left"
        elif self.gaze.is_center():
            gaze_text = "Looking center"

        # Update GUI labels
        self.gaze_label.config(text=f"Gaze: {gaze_text}")

        # Get and update pupil positions
        left_pupil = self.gaze.pupil_left_coords()
        right_pupil = self.gaze.pupil_right_coords()

        self.position_left_label.config(text=f"left: {left_pupil}")
        self.position_right_label.config(text=f"right: {right_pupil}")

        # Update parameters
        self.current_parameters['gaze_direction'] = gaze_text
        self.current_parameters['left_eye_position'] = str(left_pupil)
        self.current_parameters['right_eye_position'] = str(right_pupil)

        # Process face detection results
        if len(results[0].boxes) > 0:
            box = results[0].boxes[0]
            x1, y1, x2, y2 = map(int, box.xyxy[0])

            face_width = x2 - x1
            face_height = y2 - y1
            face_center_x = (x1 + x2) // 2
            face_center_y = (y1 + y2) // 2

            # Calculate distance without smoothing
            distance = self.measure_distance(face_width)

            # Update distance in parameters and GUI
            self.current_parameters['distance'] = f"{distance}"
            self.distance_label.config(text=f"Distance: {distance:.1f} cm")

            if self.switch_parameterOnScreen:
                frame = self.gaze.annotated_frame()
                y_position = 40

                # YOLO detection confidence
                cv2.putText(frame, f"YOLO Confidence: {float(box.conf[0]):.3f}",
                            (10, y_position),
                            cv2.FONT_HERSHEY_SIMPLEX, 1.4, (0, 255, 0), 3)
                y_position = self.update_y_position(y_position)

                # Face size
                cv2.putText(frame, f"Face Size: {round(face_width, 2)} x {round(face_height, 2)}",
                            (10, y_position),
                            cv2.FONT_HERSHEY_SIMPLEX, 1.4, (255, 255, 255), 3)
                self.current_parameters['face_size_width'] = round(
                    face_width, 2)
                self.current_parameters['face_size_hight'] = round(
                    face_height, 2)
                y_position = self.update_y_position(y_position)

                # Face box position
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                face_min_position = (x1, y1)
                face_max_position = (x2, y2)
                cv2.putText(frame, f"Face -> min: {face_min_position}, max:{face_max_position}", (10, y_position),
                            cv2.FONT_HERSHEY_SIMPLEX, 1.4, (255, 255, 255), 3)
                self.current_parameters['face_min_position'] = face_min_position
                self.current_parameters['face_max_position'] = face_max_position
                y_position = self.update_y_position(y_position)

                # Face center
                position_center_face = (face_center_x, face_center_y)
                cv2.circle(frame, position_center_face,
                           3, (0, 0, 255), -1)
                cv2.putText(frame, f"Face Center: {position_center_face}",
                            (10, y_position),
                            cv2.FONT_HERSHEY_SIMPLEX, 1.4, (255, 255, 255), 3)
                self.current_parameters['face_center_position'] = position_center_face
                y_position = self.update_y_position(y_position)

                # Pupil positions
                if right_pupil:
                    cv2.putText(frame, f"Right Pupil: {right_pupil}",
                                (10, y_position),
                                cv2.FONT_HERSHEY_SIMPLEX, 1.4, (255, 255, 255), 3)
                    y_position = self.update_y_position(y_position)
                if left_pupil:
                    cv2.putText(frame, f"Left Pupil: {left_pupil}",
                                (10, y_position),
                                cv2.FONT_HERSHEY_SIMPLEX, 1.4, (255, 255, 255), 3)
                    y_position = self.update_y_position(y_position)
                # Process eye landmarks if face mesh is detected
                if faces:
                    face_landmarks = faces[0]
                    for eye_name, eye_points in [("Right Eye", self.rightEye), ("Left Eye", self.leftEye)]:
                        (ex1, ey1), (ex2, ey2) = self.get_eye_bbox(
                            face_landmarks, eye_points)
                        cv2.rectangle(frame, (ex1, ey1),
                                      (ex2, ey2), (255, 0, 0), 2)
                        cv2.putText(frame, f"{eye_name}-> min:({ex1},{ey1}), max:({ex2},{ey2})", (10, y_position),
                                    cv2.FONT_HERSHEY_SIMPLEX, 1.4, (255, 255, 255), 3)
                        if eye_name == "Right Eye":
                            self.current_parameters['right_eye_bbox'] = [
                                ex1, ey1, ex2, ey2]
                        else:
                            self.current_parameters['left_eye_bbox'] = [
                                ex1, ey1, ex2, ey2]
                        y_position = self.update_y_position(y_position)

                        # Draw eye landmarks
                        for i in eye_points['upper']:
                            cv2.circle(
                                frame, face_landmarks[i], 2, (0, 255, 255), -1)
                        for i in eye_points['lower']:
                            cv2.circle(
                                frame, face_landmarks[i], 2, (255, 255, 0), -1)

                # Draw distance measurement
                cv2.putText(frame, f"Distance: {distance:.1f} cm",
                            (10, y_position),
                            cv2.FONT_HERSHEY_SIMPLEX, 1.4, (255, 255, 255), 3)
                y_position = self.update_y_position(y_position)

                # Draw gaze direction
                cv2.putText(frame, f"Gaze: {gaze_text}",
                            (10, y_position),
                            cv2.FONT_HERSHEY_SIMPLEX, 1.4, (255, 255, 255), 3)

            # Calculate posture
            aspect_ratio = face_width / face_height
            posture = "Forward"
            if aspect_ratio > 1.1:
                posture = "Turned Left/Right"
            elif aspect_ratio < 0.9:
                if face_center_y < frame.shape[0] // 2:
                    posture = "Looking Up"
                else:
                    posture = "Looking Down"

            self.posture_label.config(text=f"Posture: {posture}")
            self.current_parameters['posture'] = posture

        return frame if self.switch_parameterOnScreen else clean_frame

    def update_preview(self):
        ret, frame = self.cap.read()
        if ret:
            # Update preview dimensions
            self.current_parameters['webcam_resolution_width'] = int(
                frame.shape[1])  # width is at index 1
            self.current_parameters['webcam_resolution_height'] = int(
                frame.shape[0])  # height is at index 0
            self.current_parameters['preview_width'] = self.webcam_width
            self.current_parameters['preview_height'] = self.webcam_height

            frame_processed = self.process_frame(frame.copy())
            frame_resized = cv2.resize(
                frame_processed,
                (self.webcam_width, self.webcam_height)
            )
            frame_rgb = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB)
            photo = ImageTk.PhotoImage(image=Image.fromarray(frame_rgb))
            self.webcam_label.configure(image=photo)
            self.webcam_label.image = photo

        self.root.after(10, self.update_preview)

    def set_random_parameters(self):
        """Set random parameters for multiple captures"""
        try:
            num_times = int(self.random_times_entry.get())
            if num_times <= 0:
                print("Please enter a positive number")
                return

            # Disable buttons during capture sequence
            self.set_random_button.config(state=tk.DISABLED)
            self.random_button.config(state=tk.DISABLED)

            # Schedule multiple captures
            self.schedule_multiple_captures(num_times)

        except ValueError:
            print("Please enter a valid number")

    # Remove/delete this entire method:
    def update_delay_label(self, *args):
        value = int(self.delay_scale.get())
        self.delay_value_label.config(text=f"{value}s")

    # Modify schedule_multiple_captures to use the scale value
    def schedule_multiple_captures(self, remaining_times):
        if remaining_times > 0:
            try:
                self.signal_label.config(
                    text=f"{remaining_times} Processing.....")
                # Get delay from entry in milliseconds
                delay_s = float(self.delay_entry.get())
                if delay_s <= 0:
                    print("Please enter a positive delay time")
                    return

                delay_ms = int(delay_s * 1000)

                def capture_with_delay():
                    self.draw_random_dot_and_capture()
                    # Schedule next capture after current one completes
                    self.root.after(delay_ms,
                                    lambda: self.schedule_multiple_captures(remaining_times - 1))

                self.root.after(5000, capture_with_delay)
            except ValueError:
                print("Please enter a valid delay time")
                self.signal_label.config(text=f"Error.....")
                self.set_random_button.config(state=tk.NORMAL)
                self.random_button.config(state=tk.NORMAL)
        else:
            self.signal_label.config(text=f"- Done -")
            self.root.after(1000, self.signal_label.config(text=f""))
            self.set_random_button.config(state=tk.NORMAL)
            self.random_button.config(state=tk.NORMAL)

    def update_dot(self, random_position=False):
        # Clear previous dots
        if hasattr(self, 'dot_id'):
            self.canvas.delete(self.dot_id)
        if hasattr(self, 'current_dot'):
            self.canvas.delete(self.current_dot)

        # Generate new position if random is requested
        if random_position:
            self.dot_x = random.randint(1, self.canvas.winfo_width()-1)
            self.dot_y = random.randint(1, self.canvas.winfo_height()-1)

        # Update parameters
        self.current_parameters['dot_position_x'] = self.dot_x
        self.current_parameters['dot_position_y'] = self.dot_y

        # Draw new dot
        dot_radius = 5
        self.dot_id = self.canvas.create_oval(
            self.dot_x - dot_radius,
            self.dot_y - dot_radius,
            self.dot_x + dot_radius,
            self.dot_y + dot_radius,
            fill='red',
            outline='red'
        )
        self.current_dot = self.dot_id  # Keep both references synchronized

        # Update label position if it exists
        if hasattr(self, 'dot_countdown_label'):
            self.update_dot_label_position()

    def draw_random_dot_and_capture(self):
        # Hide any existing label
        if hasattr(self, 'dot_countdown_label'):
            self.dot_countdown_label.place_forget()
        if self.current_dot:
            self.canvas.delete(self.current_dot)
            self.dot_countdown_label.place_forget()

        self.capture_in_progress = True

        # Draw new random dot
        self.update_dot(random_position=True)

        # Reset UI state
        self.preview_var.set(False)
        self.canvas_normal_state = True
        self.preview_frame.place_forget()
        self.webcam_label.pack_forget()
        self.screen_label.pack_forget()
        self.switch_parameterOnScreen = False
        self.toggle_controls(force_hide=True)

        # Start countdown
        self.root.after(100, lambda: self.countdown(4))

    def toggle_parameters(self):
        """Toggle visibility of the parameters on screen"""
        self.switch_parameterOnScreen = self.parameter_var.get()

    def save_parameters_to_csv(self, filename_base):
        csv_path = os.path.join(
            self.output_dir, f"parameters_{filename_base}.csv")

        self.current_parameters['canvas_width'] = self.canvas.winfo_width()
        self.current_parameters['canvas_height'] = self.canvas.winfo_height()
        try:
            # Safely handle eye positions
            right_eye_pos = eval(self.current_parameters['right_eye_position']
                                 ) if self.current_parameters['right_eye_position'] != 'None' else (None, None)
            left_eye_pos = eval(self.current_parameters['left_eye_position']
                                ) if self.current_parameters['left_eye_position'] != 'None' else (None, None)

            # Safely handle face and eye positions - make sure to handle None case
            right_eye_box = self.current_parameters.get('right_eye_bbox')
            if right_eye_box is None or right_eye_box == 'None':
                right_eye_box = [None, None, None, None]

            left_eye_box = self.current_parameters.get('left_eye_bbox')
            if left_eye_box is None or left_eye_box == 'None':
                left_eye_box = [None, None, None, None]

            face_min_position = self.current_parameters.get(
                'face_min_position')
            if face_min_position is None or face_min_position == 'None':
                face_min_position = [None, None]

            face_max_position = self.current_parameters.get(
                'face_max_position')
            if face_max_position is None or face_max_position == 'None':
                face_max_position = [None, None]

            face_center_position = self.current_parameters.get(
                'face_center_position')
            if face_center_position is None or face_center_position == 'None':
                face_center_position = [None, None]

        except Exception as e:
            print(f"Error parsing positions: {e}")
            right_eye_pos = [None, None]
            left_eye_pos = [None, None]
            right_eye_box = [None, None, None, None]
            left_eye_box = [None, None, None, None]
            face_min_position = [None, None]
            face_max_position = [None, None]
            face_center_position = [None, None]

        # Prepare data for CSV with safe value extraction
        data = {
            'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'file_number': filename_base,
            # Resolution information
            'screen_resolution_width': self.current_parameters.get('screen_resolution_width', 'N/A'),
            'screen_resolution_height': self.current_parameters.get('screen_resolution_height', 'N/A'),
            'webcam_resolution_width': self.current_parameters.get('webcam_resolution_width', 'N/A'),
            'webcam_resolution_height': self.current_parameters.get('webcam_resolution_height', 'N/A'),
            'preview_width': self.current_parameters.get('preview_width', 'N/A'),
            'preview_height': self.current_parameters.get('preview_height', 'N/A'),
            'canvas_width': self.current_parameters.get('canvas_width', 'N/A'),
            'canvas_height': self.current_parameters.get('canvas_height', 'N/A'),
            'gaze_direction': self.current_parameters.get('gaze_direction', 'N/A'),
            'posture': self.current_parameters.get('posture', 'N/A'),
            'dot_position_x': self.current_parameters.get('dot_position_x', 'N/A'),
            'dot_position_y': self.current_parameters.get('dot_position_y', 'N/A'),
            'distance_cm': self.current_parameters.get('distance', 'N/A'),
            # face
            'face_size_width': self.current_parameters.get('face_size_width', 'N/A'),
            'face_size_hight': self.current_parameters.get('face_size_hight', 'N/A'),
            'face_min_position_x': 'N/A' if face_min_position[0] is None else face_min_position[0],
            'face_min_position_y': 'N/A' if face_min_position[1] is None else face_min_position[1],
            'face_max_position_x': 'N/A' if face_max_position[0] is None else face_max_position[0],
            'face_max_position_y': 'N/A' if face_max_position[1] is None else face_max_position[1],
            'face_center_position_x': 'N/A' if face_center_position[0] is None else face_center_position[0],
            'face_center_position_y': 'N/A' if face_center_position[1] is None else face_center_position[1],
            # eye Pupil
            'left_eye_pupil_x': 'N/A' if left_eye_pos[0] is None else left_eye_pos[0],
            'left_eye_pupil_y': 'N/A' if left_eye_pos[1] is None else left_eye_pos[1],
            'right_eye_pupil_x': 'N/A' if right_eye_pos[0] is None else right_eye_pos[0],
            'right_eye_pupil_y': 'N/A' if right_eye_pos[1] is None else right_eye_pos[1],
            # eye box
            'right_eye_bbox_min_x': 'N/A' if right_eye_box[0] is None else right_eye_box[0],
            'right_eye_bbox_min_y': 'N/A' if right_eye_box[1] is None else right_eye_box[1],
            'right_eye_bbox_max_x': 'N/A' if right_eye_box[2] is None else right_eye_box[2],
            'right_eye_bbox_max_y': 'N/A' if right_eye_box[3] is None else right_eye_box[3],
            'left_eye_bbox_min_x': 'N/A' if left_eye_box[0] is None else left_eye_box[0],
            'left_eye_bbox_min_y': 'N/A' if left_eye_box[1] is None else left_eye_box[1],
            'left_eye_bbox_max_x': 'N/A' if left_eye_box[2] is None else left_eye_box[2],
            'left_eye_bbox_max_y': 'N/A' if left_eye_box[3] is None else left_eye_box[3],
        }

        try:
            with open(csv_path, 'w', newline='') as f:
                for key, value in data.items():
                    if value is None:
                        value = "N/A"
                    f.write(f"{key}: {value}\n")

            print(f"Successfully saved parameters to {csv_path}")

            # Debug print to verify data
            print("\nSaved parameters:")
            for key, value in data.items():
                print(f"{key}: {value}")

        except Exception as e:
            print(f"Error saving parameters: {e}")

    def update_dot_label_position(self):
        # Get label dimensions
        label_width = self.dot_countdown_label.winfo_reqwidth()
        label_height = self.dot_countdown_label.winfo_reqheight()

        # Get screen/canvas bounds
        canvas_width = self.canvas.winfo_width()
        canvas_height = self.canvas.winfo_height()

        # Default position (centered above dot)
        x = self.dot_x - label_width//2
        y = self.dot_y - 50

        # Check horizontal bounds
        if x < 0:  # Too far left
            x = self.dot_x + 20  # Place to right of dot
        elif x + label_width > canvas_width:  # Too far right
            x = self.dot_x - label_width - 20  # Place to left of dot

        # Check vertical bounds
        if y < 0:  # Too high
            y = self.dot_y + 20  # Place below dot
        elif y + label_height > canvas_height:  # Too low
            y = self.dot_y - label_height - 20  # Place above dot

        # Handle corner cases
        if x < 0 and y < 0:  # Top-left corner
            x = self.dot_x + 20
            y = self.dot_y + 20
        elif x + label_width > canvas_width and y < 0:  # Top-right corner
            x = self.dot_x - label_width - 20
            y = self.dot_y + 20
        elif x < 0 and y + label_height > canvas_height:  # Bottom-left corner
            x = self.dot_x + 20
            y = self.dot_y - label_height - 20
        elif x + label_width > canvas_width and y + label_height > canvas_height:  # Bottom-right corner
            x = self.dot_x - label_width - 20
            y = self.dot_y - label_height - 20
        return x, y

    def countdown(self, count):
        if count > 0:
            countdown_text = str(count)
            self.countdown_label.config(text=countdown_text)
            self.dot_countdown_label.config(text=countdown_text)
            self.dot_countdown_label.update_idletasks()  # This is important!
            x_label, y_label = self.update_dot_label_position()
            # Place label above dot
            self.dot_countdown_label.place(
                x=x_label,
                y=y_label
            )
            self.dot_countdown_label.lift()
            # self.random_button.config(state=tk.DISABLED)
            self.root.after(1000, lambda: self.countdown(count - 1))
        else:
            self.countdown_label.config(text="Capturing...")
            self.dot_countdown_label.place_forget()

            # self.random_button.config(state=tk.NORMAL)
            self.root.after(500, self.capture_sequence)

    def update_next_available_number(self):
        """Find the next available file number by checking existing files"""
        while True:
            webcam_file = os.path.join(
                self.output_dir,
                f"webcam_{self.file_counter:03d}.jpg"
            )
            screen_file = os.path.join(
                self.output_dir,
                f"screen_{self.file_counter:03d}.jpg"
            )

            if not os.path.exists(webcam_file) and not os.path.exists(screen_file):
                break
            self.file_counter += 1

    def capture_sequence(self):
        # Generate filename with current counter
        try:
            self.switch_parameterOnScreen = False
            filename_base = f"{self.file_counter:03d}"
            webcam_path = os.path.join(
                self.output_dir, f"webcam_{filename_base}.jpg")
            screen_path = os.path.join(
                self.output_dir, f"screen_{filename_base}.jpg")

            # Remove this condition since we want preview to show always
            # if not self.preview_var.get():
            #     self.preview_var.set(True)
            #     self.toggle_preview()

            ret, frame = self.cap.read()
            if ret:
                try:
                    # Process webcam frame
                    frame_processed = self.process_frame(frame.copy())

                    # selected_idx = self.screen_dropdown.current()
                    # if selected_idx >= 0 and selected_idx < len(self.screens):
                    #     selected_monitor = self.screens[selected_idx]
                    #     # Capture specific screen
                    #     screen = pyautogui.screenshot(
                    #         region=(
                    #             selected_monitor.x,
                    #             selected_monitor.y,
                    #             selected_monitor.width,
                    #             selected_monitor.height
                    #         )
                    #     )
                    # else:
                    #     # Fallback to full screenshot if no screen selected
                    #     screen = pyautogui.screenshot()
                    # screen = pyautogui.screenshot()

                    canvas_x = self.canvas.winfo_rootx()
                    canvas_y = self.canvas.winfo_rooty()
                    canvas_width = self.canvas.winfo_width()
                    canvas_height = self.canvas.winfo_height()

                    # Capture only the canvas area
                    canvas_screenshot = pyautogui.screenshot(
                        region=(
                            canvas_x,
                            canvas_y,
                            canvas_width,
                            canvas_height
                        )
                    )

                    screen_np = np.array(canvas_screenshot)
                    screen_bgr = cv2.cvtColor(screen_np, cv2.COLOR_RGB2BGR)
                    cv2.imwrite(screen_path, screen_bgr)
                    cv2.imwrite(webcam_path, frame_processed)

                    self.save_parameters_to_csv(filename_base)

                    # Update preview displays
                    frame_rgb = cv2.cvtColor(
                        frame_processed, cv2.COLOR_BGR2RGB)
                    frame_resized = cv2.resize(
                        frame_rgb, (self.webcam_width, self.webcam_height))
                    screen_resized = cv2.resize(
                        screen_np, (self.webcam_width, self.webcam_height))

                    webcam_pil = Image.fromarray(frame_resized)
                    screen_pil = Image.fromarray(screen_resized)

                    photo = ImageTk.PhotoImage(image=webcam_pil)
                    screen_photo = ImageTk.PhotoImage(image=screen_pil)

                    self.webcam_label.configure(image=photo)
                    self.webcam_label.image = photo
                    self.screen_label.configure(image=screen_photo)
                    self.screen_label.image = screen_photo

                    print(f"Images saved successfully:")
                    print(f"- Webcam: {webcam_path}")
                    print(f"- Canvas: {screen_path}")

                    # Increment counter for next capture
                    self.file_counter += 1
                    self.update_next_available_number()

                except Exception as e:
                    print(f"Error during capture: {e}")
                    import traceback
                    traceback.print_exc()

                # Always show preview after capture
                self.canvas_normal_state = False
                self.canvas.configure(
                    width=self.original_canvas_width,
                    height=self.original_canvas_height * 0.7
                )
                self.preview_frame.place(
                    in_=self.canvas_frame,
                    # relx=0.2,
                    # rely=0.5,
                    # anchor='left'
                )
                self.preview_frame.configure(bg='white')
                self.webcam_label.pack(side=tk.LEFT, padx=5)
                self.screen_label.pack(side=tk.LEFT, padx=5)

                # Hide preview after 2 seconds
                self.root.after(
                    2000, lambda: self.hide_preview_after_capture(True))

            # Reset UI state
            self.countdown_label.config(text="")
            self.random_button.config(state=tk.NORMAL)
        finally:
            # Always reset the capture flag when done
            self.capture_in_progress = False

        return self.capture_in_progress

    def hide_preview_after_capture(self, was_hidden):
        if was_hidden:
            self.canvas_normal_state = True
            self.webcam_label.pack_forget()
            self.screen_label.pack_forget()
            self.preview_frame.place_forget()
            self.toggle_controls(force_hide=False)

            self.canvas.configure(
                width=self.original_canvas_width,
                height=self.original_canvas_height
            )

    def clear_all(self):
        self.canvas.delete("all")
        self.current_dot = None
        self.screen_label.configure(image='')
        self.countdown_label.config(text="")
        self.dot_countdown_label.place_forget()
        self.distance_label.config(text="Distance: -- cm")
        self.posture_label.config(text="Posture: --")

        # Ensure random button is enabled
        self.random_button.config(state=tk.NORMAL)

    def __del__(self):
        if self.cap.isOpened():
            self.cap.release()


def main():
    root = tk.Tk()
    app = YoloDistanceApp(root)
    root.mainloop()


if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm
objc[1951]: Class CaptureDelegate is implemented in both /Users/porchportal2/miniforge3/envs/Video_captioning/lib/python3.9/site-packages/cv2/cv2.abi3.so (0x17bada520) and /Users/porchportal2/miniforge3/envs/Video_captioning/lib/python3.9/site-packages/mediapipe/.dylibs/libopencv_videoio.3.4.16.dylib (0x306f4c860). One of the two will be used. Which one is undefined.
objc[1951]: Class CVWindow is implemented in both /Users/porchportal2/miniforge3/envs/Video_captioning/lib/python3.9/site-packages/cv2/cv2.abi3.so (0x17bada570) and /Users/porchportal2/miniforge3/envs/Video_captioning/lib/python3.9/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x3031f0a68). One of the two will be used. Which one is undefined.
objc[1951]: Class CVView is implemented in both /Users/porchportal2/miniforge3/envs/Video_captioning/lib/python3.9/site-packages/cv2/cv2.abi3.so (0x17bada598) and /Users/porchportal2/miniforge3/envs/Video_captioning/l

Successfully saved parameters to output_from_capture/parameters_004.csv

Saved parameters:
timestamp: 2024-12-24 14:41:15
file_number: 004
screen_resolution_width: 1728
screen_resolution_height: 1117
webcam_resolution_width: 1280
webcam_resolution_height: 720
preview_width: 640
preview_height: 360
canvas_width: 1708
canvas_height: 920
gaze_direction: Looking left
posture: Looking Down
dot_position_x: 321
dot_position_y: 733
distance_cm: 56.0
face_size_width: 251
face_size_hight: 338
face_min_position_x: 504
face_min_position_y: 230
face_max_position_x: 755
face_max_position_y: 568
face_center_position_x: 629
face_center_position_y: 399
left_eye_pupil_x: 577
left_eye_pupil_y: 351
right_eye_pupil_x: 692
right_eye_pupil_y: 353
right_eye_bbox_min_x: 653
right_eye_bbox_min_y: 328
right_eye_bbox_max_x: 732
right_eye_bbox_max_y: 378
left_eye_bbox_min_x: 531
left_eye_bbox_min_y: 325
left_eye_bbox_max_x: 613
left_eye_bbox_max_y: 376
Images saved successfully:
- Webcam: output_from_capture/webca

In [1]:
import numpy as np
import cv2
import mediapipe as mp
import time

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    min_detection_confidence=0.5, min_tracking_confidence=0.5)

mp_drawing = mp.solutions.drawing_utils

drawing_spec = mp_drawing.DrawingSpec(
    color=(128, 0, 128), thickness=2, circle_radius=1)


def draw_pose_vectors(image, p1, angles, size=100):
    """Draw 3D pose vectors with RGB colors for each axis with enhanced movement"""
    x, y, z = angles

    # Add offset correction to improve accuracy
    x_offset = -4  # Adjust these offset values as needed

    # Calculate angles with offset correction
    pitch = -(x + x_offset) * np.pi / 90
    yaw = y * np.pi / 90
    roll = z * np.pi / 90

    # Calculate rotation matrices
    Rz = np.array([[np.cos(roll), -np.sin(roll), 0],
                   [np.sin(roll), np.cos(roll), 0],
                   [0, 0, 1]])

    Rx = np.array([[1, 0, 0],
                   [0, np.cos(pitch), -np.sin(pitch)],
                   [0, np.sin(pitch), np.cos(pitch)]])

    Ry = np.array([[np.cos(yaw), 0, np.sin(yaw)],
                   [0, 1, 0],
                   [-np.sin(yaw), 0, np.cos(yaw)]])

    # Combine rotations
    R = Rz @ Ry @ Rx

    # Initial vectors
    v1 = np.array([size * 1.5, 0, 0])    # X axis, made longer
    v2 = np.array([0, -size * 1.5, 0])   # Y axis, made longer
    v3 = np.array([0, 0, -size * 1.5])

    # Rotate vectors
    v1_rotated = R @ v1
    v2_rotated = R @ v2
    v3_rotated = R @ v3

    # Project to 2D and draw
    # X axis - Red
    p2 = (int(p1[0] + v1_rotated[0]), int(p1[1] + v1_rotated[1]))
    cv2.line(image, p1, p2, (0, 0, 255), 4)

    # Y axis - Green
    p2 = (int(p1[0] + v2_rotated[0]), int(p1[1] + v2_rotated[1]))
    cv2.line(image, p1, p2, (0, 255, 0), 4)

    # Z axis - Blue
    p2 = (int(p1[0] + v3_rotated[0]), int(p1[1] + v3_rotated[1]))
    cv2.line(image, p1, p2, (255, 0, 0), 4)


cap = cv2.VideoCapture(1)

while cap.isOpened():
    success, image = cap.read()

    start = time.time()

    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = face_mesh.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    img_h, img_w, img_c = image.shape
    face_2d = []
    face_3d = []

    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            for idx, lm in enumerate(face_landmarks.landmark):
                if idx == 33 or idx == 263 or idx == 1 or idx == 61 or idx == 291 or idx == 199:
                    if idx == 1:
                        nose_2d = (lm.x * img_w, lm.y * img_h)
                        nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 3000)
                    x, y = int(lm.x * img_w), int(lm.y * img_h)

                    face_2d.append([x, y])
                    face_3d.append([x, y, lm.z])

            face_2d = np.array(face_2d, dtype=np.float64)
            face_3d = np.array(face_3d, dtype=np.float64)

            focal_length = 1 * img_w
            cam_matrix = np.array([
                [focal_length, 0, img_h/2],
                [0, focal_length, img_w/2],
                [0, 0, 1]
            ])
            distortion_matrix = np.zeros((4, 1), dtype=np.float64)

            success, rotation_vec, translation_vec = cv2.solvePnP(
                face_3d, face_2d, cam_matrix, distortion_matrix)

            rmat, jac = cv2.Rodrigues(rotation_vec)
            angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)

            x = angles[0] * 360
            y = angles[1] * 360
            z = angles[2] * 360

            # Draw pose vectors on the left side
            positionOfVector_visualization = (150, 300)

            # Amplify the angles for more visible movement
            speed_up = 3.5
            amplified_angles = (x * speed_up, y * speed_up, z * speed_up)
            draw_pose_vectors(
                image, positionOfVector_visualization, amplified_angles)

            # Text for angles
            cv2.putText(image, "x: " + str(np.round(x, 2)), (500, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            cv2.putText(image, "y: " + str(np.round(y, 2)), (500, 100),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            cv2.putText(image, "z: " + str(np.round(z, 2)), (500, 150),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

            x_angle = np.arctan2(Qx[2][1], Qx[2][2])
            y_angle = np.arctan2(-Qy[2][0], np.sqrt((Qy[2]
                                 [1] * Qy[2][1]) + (Qy[2][2] * Qy[2][2])))
            z_angle = np.arctan2(Qz[0][0], Qz[1][0])

            cv2.putText(image, "x_angle: " + str(np.round(x_angle, 5)), (500, 200),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            cv2.putText(image, "y_angle: " + str(np.round(y_angle, 5)), (500, 250),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            cv2.putText(image, "z_angle: " + str(np.round(z_angle, 5)), (500, 300),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

            # print("\nCalculated angles (radians):")
            # print(f"x_angle: {x_angle:.4f}")
            # print(f"y_angle: {y_angle:.4f}")
            # print(f"z_angle: {z_angle:.4f}")

            mp_drawing.draw_landmarks(
                image=image,
                landmark_list=face_landmarks,
                connections=mp_face_mesh.FACEMESH_CONTOURS,
                landmark_drawing_spec=drawing_spec,
                connection_drawing_spec=drawing_spec)

        end = time.time()
        totalTime = end - start
        fps = 1 / totalTime
        cv2.putText(image, f'FPS: {int(fps)}', (20, 450),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 2)

    cv2.imshow('Head Pose Detection', image)
    if cv2.waitKey(5) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()

objc[10054]: Class CaptureDelegate is implemented in both /Users/porchportal2/miniforge3/envs/Video_captioning/lib/python3.9/site-packages/cv2/cv2.abi3.so (0x17dc1e520) and /Users/porchportal2/miniforge3/envs/Video_captioning/lib/python3.9/site-packages/mediapipe/.dylibs/libopencv_videoio.3.4.16.dylib (0x137970860). One of the two will be used. Which one is undefined.
objc[10054]: Class CVWindow is implemented in both /Users/porchportal2/miniforge3/envs/Video_captioning/lib/python3.9/site-packages/cv2/cv2.abi3.so (0x17dc1e570) and /Users/porchportal2/miniforge3/envs/Video_captioning/lib/python3.9/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x120dd4a68). One of the two will be used. Which one is undefined.
objc[10054]: Class CVView is implemented in both /Users/porchportal2/miniforge3/envs/Video_captioning/lib/python3.9/site-packages/cv2/cv2.abi3.so (0x17dc1e598) and /Users/porchportal2/miniforge3/envs/Video_captioning/lib/python3.9/site-packages/mediapipe/.dylibs/li

In [3]:
import cv2 as cv
from cvzone.FaceMeshModule import FaceMeshDetector
import numpy as np
from ultralytics import YOLO
from huggingface_hub import hf_hub_download
from gaze_tracking import GazeTracking

# Initialize all trackers
gaze = GazeTracking()
# Download the updated model
model_path = hf_hub_download(
    repo_id="arnabdhar/YOLOv8-Face-Detection", filename="model.pt")
# Initialize YOLO with specific parameters
model_face = YOLO(model_path)
model_face.conf = 0.25  # Lower confidence threshold for detection
model_face.iou = 0.45   # IOU threshold for NMS

cap = cv.VideoCapture(0)
fm = FaceMeshDetector(maxFaces=1)  # Limit to one face for better accuracy

# Define eye landmark points as dictionaries
rightEye = {
    'upper': [463, 414, 286, 258, 257, 259, 260],
    'lower': [467, 359, 255, 339, 254, 253, 252, 256, 341]
}

leftEye = {
    'upper': [130, 247, 30, 29, 27, 28, 56],
    'lower': [190, 243, 112, 26, 22, 23, 24, 110, 25]
}

# Initialize parameters
focal_length = 1000
known_distance = 70
known_width = 14
switch_ShowParameter = True


def get_eye_bbox(landmarks, eye_points):
    point_array = np.array([landmarks[idx]
                           for group in eye_points.values() for idx in group])
    x_min = int(np.min(point_array[:, 0])) - 10
    x_max = int(np.max(point_array[:, 0])) + 10
    y_min = int(np.min(point_array[:, 1])) - 10
    y_max = int(np.max(point_array[:, 1])) + 10
    return (x_min, y_min), (x_max, y_max)


def measure_distance(face_width_pixels):
    return (known_width * focal_length) / face_width_pixels


def draw_position_text(frame, text, y_pos, color=(255, 255, 255)):
    """Draw single line of position text"""
    cv.putText(frame, text, (10, y_pos),
               cv.FONT_HERSHEY_SIMPLEX, 1.4, color, 3)
    return y_pos + 54


while True:
    success, frame = cap.read()
    if not success:
        print("Failed to grab frame")
        break

    # Update gaze tracking
    gaze.refresh(frame)

    # YOLO face detection with improved parameters
    results = model_face.predict(
        frame,
        conf=0.25,          # Lower confidence threshold
        iou=0.45,           # IOU threshold
        max_det=1,          # Maximum number of detections
        classes=[0],        # Only detect faces
        verbose=False
    )

    # Face mesh detection
    frame_mesh, faces = fm.findFaceMesh(frame, draw=0)

    # Check if any faces were detected by both YOLO and face mesh
    if len(results[0].boxes) > 0:
        box = results[0].boxes[0]
        conf = float(box.conf[0])

        # Only process if confidence is good enough
        if conf >= 0.25:
            x1, y1, x2, y2 = map(int, box.xyxy[0])

            # Calculate face measurements
            face_width = x2 - x1
            face_height = y2 - y1
            face_center_x = (x1 + x2) // 2
            face_center_y = (y1 + y2) // 2

            # Calculate distance
            distance = measure_distance(face_width)

            if switch_ShowParameter:
                frame = gaze.annotated_frame()
                y_position = 40

                # Debug info for YOLO detection
                cv.putText(frame, f"YOLO Confidence: {conf:.3f}", (10, y_position),
                           cv.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 0), 3)
                y_position += 54

                cv.putText(frame, f"Face Size: {round(face_width, 2)} x {round(face_height, 2)}",
                           (10, y_position), cv.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 0), 3)
                y_position += 54

                # Draw pupil positions
                right_pupil = gaze.pupil_right_coords()
                left_pupil = gaze.pupil_left_coords()
                if right_pupil:
                    cv.putText(frame, f"Right Pupil: {right_pupil}", (10, y_position),
                               cv.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 0), 3)
                    y_position += 54
                if left_pupil:
                    cv.putText(frame, f"Left Pupil: {left_pupil}", (10, y_position),
                               cv.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 0), 3)
                    y_position += 54

                # Draw face box and position
                cv.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv.putText(frame, f"Face -> min:({x1} ,{y1}), max:({x2} ,{y2})", (10, y_position),
                           cv.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 0), 3)
                y_position += 54

                # Draw face center
                cv.circle(frame, (face_center_x, face_center_y),
                          3, (0, 255, 0), -1)
                cv.putText(frame, f"Face Center: ({face_center_x} ,{face_center_y})",
                           (10, y_position), cv.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 0), 3)
                y_position += 54

                # Process eye landmarks if face mesh is detected
                if faces:
                    face_landmarks = faces[0]
                    for eye_name, eye_points in [("Right Eye", rightEye), ("Left Eye", leftEye)]:
                        (ex1, ey1), (ex2, ey2) = get_eye_bbox(
                            face_landmarks, eye_points)
                        cv.rectangle(frame, (ex1, ey1),
                                     (ex2, ey2), (0, 255, 0), 2)
                        cv.putText(frame, f"{eye_name}-> min:({ex1} ,{ey1}), max:({ex2} ,{ey2})", (10, y_position),
                                   cv.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 0), 3)
                        y_position += 54
                        # Draw eye landmarks
                        for i in eye_points['upper']:
                            cv.circle(
                                frame, face_landmarks[i], 2, (0, 255, 255), -1)
                        for i in eye_points['lower']:
                            cv.circle(
                                frame, face_landmarks[i], 2, (255, 255, 0), -1)
                # Draw measurements
                cv.putText(frame, f"Distance: {distance:.1f} cm",
                           (10, y_position), cv.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 0), 3)
                y_position += 54

                # Draw gaze direction
                if gaze.is_right():
                    gaze_text = "Looking right"
                elif gaze.is_left():
                    gaze_text = "Looking left"
                elif gaze.is_center():
                    gaze_text = "Looking center"
                else:
                    gaze_text = "Gaze not detected"
                cv.putText(frame, f"Gaze: {gaze_text}", (10, y_position),
                           cv.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 0), 3)

    # Display the frame
    cv.imshow("Demo", frame)

    if cv.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()

2024-12-24 11:04:11.330 python[10054:795396] +[IMKClient subclass]: chose IMKClient_Modern
2024-12-24 11:04:11.330 python[10054:795396] +[IMKInputSession subclass]: chose IMKInputSession_Modern


KeyboardInterrupt: 