In [3]:

# --------------------------------------------------
# File Name: object_detection.py
# --------------------------------------------------
# Date Completed: 08-27-2023
# --------------------------------------------------
# Description:
# This is a simple object detection program that 
# uses the YOLO (You Only Look Once) model to detect
# and identify objects in a real-time webcam feed.
# --------------------------------------------------

# Import Dependencies
from ultralytics import YOLO
import cv2
import math

def euclidean_distance_boxes(box1, box2):
    """
    Calculate the Euclidean distance between the centers of two rectangular boxes.

    Args:
    box1 (tuple): A tuple of four numbers representing the coordinates of the top-left and bottom-right corners of the first box.
    box2 (tuple): A tuple of four numbers representing the coordinates of the top-left and bottom-right corners of the second box.

    Returns:
    float: The Euclidean distance between the centers of the two boxes.
    """

    # Calculate the coordinates of the center of the first box
    xa = (box1[0] + box1[2]) / 2
    ya = (box1[1] + box1[3]) / 2

    # Calculate the coordinates of the center of the second box
    xb = (box2[0] + box2[2]) / 2
    yb = (box2[1] + box2[3]) / 2

    # Calculate the Euclidean distance between the centers of the two boxes
    distance = math.sqrt((xb - xa)**2 + (yb - ya)**2)

    return distance

# Start Webcam
cap = cv2.VideoCapture(r"c:\Users\jithu\Desktop\WhatsApp Video 2024-05-09 at 20.50.15_21200491.mp4")
cap.set(3, 1080)
cap.set(4, 520)

# YOLO Model
model = YOLO("yolo-Weights/yolov8n.pt")

# Object Classes
classNames = ["Person", "Bicycle", "Car", "Motorbike", "Aeroplane", "Bus", "Train", "Truck", "Boat",
              "Traffic Light", "Fire Hydrant", "Stop Sign", "Parking Meter", "Bench", "Bird", "Cat",
              "Dog", "Horse", "Sheep", "Cow", "Elephant", "Bear", "Zebra", "Giraffe", "Backpack", "Umbrella",
              "Handbag", "Tie", "Suitcase", "Frisbee", "Skis", "Snowboard", "Sports Ball", "Kite", "Baseball Bat",
              "Baseball Glove", "Skateboard", "Surfboard", "Tennis Racket", "Bottle", "Wine Glass", "Cup",
              "Fork", "Knife", "Spoon", "Bowl", "Banana", "Apple", "Sandwich", "Orange", "Broccoli",
              "Carrot", "Hot dog", "Pizza", "Donut", "Cake", "Chair", "Sofa", "Potted Plant", "Bed",
              "Dining Table", "Toilet", "TV Monitor", "Laptop", "Mouse", "Remote", "Keyboard", "Mobile Phone",
              "Microwave", "Oven", "Toaster", "Sink", "Refrigerator", "Book", "Clock", "Vase", "Scissors",
              "Teddy Bear", "Hair Dryer", "Toothbrush"
              ]



# block start

data=[]

y=0
z=[]
t=0
clk=0


l=0

while True:
    l+=1
    success, img = cap.read()
    
    if not success:
        break
    img = cv2.flip(img, 1)  # Flip camera horizontally
    results = model(img, stream=True)
    for r in results:
       
        boxes = r.boxes
        t=0
        for box in boxes:
            if box.cls[0] != 28 and box.cls[0] != 0 and box.cls[0] != 24 and box.cls[0] != 26:
                continue
            x1, y1, x2, y2 = box.xyxy[0]
            if box.cls[0] == 28 or box.cls[0] == 24 or box.cls[0] == 26:
                z=[]
                for box1 in boxes:
                    if box1.cls[0]== 0:
                        xx1,yy1,xx2,yy2=box1.xyxy[0]
                        y=euclidean_distance_boxes((x1,y1,x2,y2),(xx1,yy1,xx2,yy2))
                        z.append(y)
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            if clk>5 and box.cls[0] == 28 or box.cls[0] == 24 or box.cls[0] == 26:
                color=(0, 0, 255)
            else:
                color=(0, 255, 0)
            # Draw bounding box
            cv2.line(img, (x1, y1), (x1 + 15, y1), color, 2)                      # Top line
            cv2.line(img, (x1, y1), (x1, y1 + 15), color, 2)                      # Left line
            cv2.line(img, (x2, y1), (x2 - 15, y1), color, 2)                      # Right line
            cv2.line(img, (x1, y2), (x1 + 15, y2), color, 2)                      # Bottom line
            cv2.line(img, (x1, y2), (x1, y2 - 15), color, 2)                      # Bottom left line
            cv2.line(img, (x2, y2), (x2 - 15, y2), color, 2)                      # Bottom right line
            cv2.line(img, (x2, y1), (x2, y1 + 15), color, 2)                      # Right top line
            cv2.line(img, (x2, y2), (x2, y2 - 15), color, 2)                      # Right bottom line

            # Draw confidence and class name
            confidence = round(float(box.conf[0]) * 100, 2)
            class_index = int(box.cls[0])
            class_name = classNames[class_index]
            text = f"{class_name}: {confidence}%"
            org = (x1, y1 - 10)  # Place text slightly above the bounding box
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 0.5
            color = (255, 0, 0)
            thickness = 1
        
            for f in z:
                if f<250:
                    t+=1
                    continue
            if box.cls[0]==28 or box.cls[0]==26 or box.cls[0]==24:
                if t==0:
                    clk+=1
                    text2='abandoned frames:'
                    position = (x1, y1-20)
                    font = cv2.FONT_HERSHEY_SIMPLEX
                    font_scale = 0.5
                    color = (255, 0, 0)  # BGR format

                    # Draw the text on the image
                    cv2.putText(img, 'abandoned frames:'+str(clk), position, font, font_scale, color, thickness=2)
                else:
                    clk=0
            if box.cls[0]==28 or box.cls[0]==26 or box.cls[0]==24:
                if clk>20:
                    text1 = "abandoned "
                    position = (x1, y1-40)
                    font = cv2.FONT_HERSHEY_SIMPLEX
                    font_scale = 0.5
                    color = (0, 0, 255)  # BGR format

                    # Draw the text on the image
                    cv2.putText(img, text1, position, font, font_scale, color, thickness=2)
            t=0
            cv2.putText(img, text, org, font, font_scale, color, thickness)

    cv2.imshow("Object Detection", img)
    cv2.imwrite(fr'c:\Users\jithu\Desktop\New folder\{l}.png',img)
    if cv2.waitKey(5) & 0xFF == 27: # Press 'ESC' to exit
        break

cap.release()
cv2.destroyAllWindows()
print(y)


0: 384x640 (no detections), 56.0ms
Speed: 10.0ms preprocess, 56.0ms inference, 738.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 35.5ms
Speed: 3.0ms preprocess, 35.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 37.0ms
Speed: 2.0ms preprocess, 37.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 34.0ms
Speed: 2.0ms preprocess, 34.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 36.5ms
Speed: 2.0ms preprocess, 36.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 35.0ms
Speed: 1.0ms preprocess, 35.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 31.5ms
Speed: 1.0ms preprocess, 31.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 43.5ms
Speed: 8.0ms preprocess, 43.5m