In [None]:
#python verion 3.12.0 --> create environment 
import cv2
import numpy as np
from ultralytics import YOLO
import supervision as sv

# Load YOLO model
model = YOLO('yolov8s-world.pt')
classes = ['Person','Mobile','Index Finger','Helmet','Safety Shoes']
model.set_classes(classes)

BOUNDING_BOX_ANNOTATOR = sv.BoundingBoxAnnotator(thickness=2)
LABEL_ANNOTATOR = sv.LabelAnnotator(text_thickness=2, text_scale=1, text_color=sv.Color.BLACK)

# Constants
KNOWN_DISTANCE = 30  # Inches
KNOWN_WIDTH = 5.7  # Inches
DISTANCE_LEVEL = 0

# Colors
GREEN = (0, 255, 0)
RED = (0, 0, 255)
BLACK = (0, 0, 0)
YELLOW = (0, 255, 255)
WHITE = (255, 255, 255)
CYAN = (255, 255, 0)
MAGENTA = (255, 0, 242)
GOLDEN = (32, 218, 165)
LIGHT_BLUE = (255, 9, 2)
PURPLE = (128, 0, 128)
CHOCOLATE = (30, 105, 210)
PINK = (147, 20, 255)
ORANGE = (0, 69, 255)

# Fonts
font = cv2.FONT_HERSHEY_PLAIN
fonts = cv2.FONT_HERSHEY_COMPLEX
fonts2 = cv2.FONT_HERSHEY_SCRIPT_SIMPLEX
fonts3 = cv2.FONT_HERSHEY_COMPLEX_SMALL
fonts4 = cv2.FONT_HERSHEY_TRIPLEX

# Load face detection model
face_detector = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

def focal_length(measured_distance, real_width, width_in_rf_image):
    return (width_in_rf_image * measured_distance) / real_width

def distance_finder(focal_length, real_face_width, face_width_in_frame):
    return (real_face_width * focal_length) / face_width_in_frame

def face_data(image, call_out, distance_level):
    if image is None:
        raise ValueError("Input image is None. Ensure the image is loaded properly.")
    
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = face_detector.detectMultiScale(gray_image, 1.3, 5)

    face_width = 0
    face_center_x = 0
    face_center_y = 0
    
    for (x, y, h, w) in faces:
        line_thickness = 2
        LLV = int(h * 0.12)

        # Draw bounding box
        cv2.line(image, (x, y + LLV), (x + w, y + LLV), GREEN, line_thickness)
        cv2.line(image, (x, y + h), (x + w, y + h), GREEN, line_thickness)
        cv2.line(image, (x, y + LLV), (x, y + LLV + LLV), GREEN, line_thickness)
        cv2.line(image, (x + w, y + LLV), (x + w, y + LLV + LLV), GREEN, line_thickness)
        cv2.line(image, (x, y + h), (x, y + h - LLV), GREEN, line_thickness)
        cv2.line(image, (x + w, y + h), (x + w, y + h - LLV), GREEN, line_thickness)

        face_width = w
        face_center_x = int(w / 2) + x
        face_center_y = int(h / 2) + y

        if distance_level < 10:
            distance_level = 10

        if call_out:
            cv2.line(image, (x, y - 11), (x + distance_level, y - 11), GREEN, 18)

    return face_width, faces, face_center_x, face_center_y

# Load reference image
ref_image = cv2.imread("lena (1).png")
if ref_image is None:
    print("Error: Reference image not loaded. Check the file path.")
else:
    ref_image_face_width, _, _, _ = face_data(ref_image, False, DISTANCE_LEVEL)
    focal_length_found = focal_length(KNOWN_DISTANCE, KNOWN_WIDTH, ref_image_face_width)
    print(f"Focal Length Found: {focal_length_found}")

def forward(image):
    results = model(image, conf=0.05)[0]  # Using YOLO model

    # Extract bounding boxes, class IDs, and confidences
    boxes = results.boxes.xyxy.cpu().numpy()  # Bounding box coordinates (x1, y1, x2, y2)
    class_ids = results.boxes.cls.cpu().numpy().astype(int)  # Class IDs
    confidences = results.boxes.conf.cpu().numpy()  # Confidence scores

    # Create detections manually
    detections = sv.Detections(
        xyxy=boxes,
        class_id=class_ids,
        confidence=confidences
    )
    
    face_width_in_frame, faces, _, _ = face_data(image, True, DISTANCE_LEVEL)

    labels = [
        f"{model.names[class_id]} {confidence:0.3f}"
        for class_id, confidence in zip(detections.class_id, detections.confidence)
    ]
    
    for (face_x, face_y, face_w, face_h) in faces:
        if face_width_in_frame != 0:
            distance = distance_finder(focal_length_found, KNOWN_WIDTH, face_width_in_frame)
            distance = round(distance, 2)
            cv2.putText(image, f"Distance {distance} Inches", (face_x - 6, face_y - 6),
                        fonts, 1, GREEN, 2)

    annotated_image = image.copy()
    annotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections)
    annotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections, labels=labels)
    cv2.imshow('Processed Frame', annotated_image)

# Video capture setup
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Unable to access the camera.")
else:
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break

        forward(frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()
