In [1]:
from ultralytics import YOLO  # Import YOLO model from Ultralytics

import cv2                   # Import OpenCV library
import math                  # Import math module for mathematical operations

# Start webcam
cap = cv2.VideoCapture(0)    # Open default camera (index 0)
cap.set(3, 640)               # Set frame width to 640 pixels
cap.set(4, 480)               # Set frame height to 480 pixels

# Load the YOLO model
model = YOLO("yolo-Weights/yolov8n.pt")  # Load YOLOv8 model with pre-trained weights

# Define object classes for detection
classNames = [
    "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
    "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
    "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
    "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
    "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
    "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
    "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
    "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
    "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
    "teddy bear", "hair drier", "toothbrush"
]


# Infinite loop to continuously capture frames from the camera
while True:
    # Read a frame from the camera
    success, img = cap.read()

    # Perform object detection using the YOLO model on the captured frame
    results = model(img, stream=True)

    # Iterate through the results of object detection
    for r in results:
        boxes = r.boxes  # Extract bounding boxes for detected objects

        # Iterate through each bounding box
        for box in boxes:
            # Extract coordinates of the bounding box
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)  # Convert to integer values

            # Draw the bounding box on the frame
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)

            # Calculate and print the confidence score of the detection
            confidence = math.ceil((box.conf[0]*100))/100
            print("Confidence --->", confidence)

            # Determine and print the class name of the detected object
            cls = int(box.cls[0])
            print("Class name -->", classNames[cls])

            # Draw text indicating the class name on the frame
            org = [x1, y1]
            font = cv2.FONT_HERSHEY_SIMPLEX
            fontScale = 1
            color = (255, 0, 0)
            thickness = 2
            cv2.putText(img, classNames[cls], org, font, fontScale, color, thickness)

    # Display the frame with detected objects in a window named "Webcam"
    cv2.imshow('Cam', img)

    # Check for the 'q' key press to exit the loop
    if cv2.waitKey(1) == ord('q'):
        break

# Release the camera
cap.release()

# Close all OpenCV windows
cv2.destroyAllWindows()

Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt to 'yolo-Weights/yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 11.0MB/s]



0: 480x640 1 person, 44.1ms
Confidence ---> 0.9
Class name --> person
Speed: 14.0ms preprocess, 44.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 32.7ms
Confidence ---> 0.91
Class name --> person
Speed: 0.9ms preprocess, 32.7ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 29.8ms
Confidence ---> 0.92
Class name --> person
Speed: 0.9ms preprocess, 29.8ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 28.8ms
Confidence ---> 0.93
Class name --> person
Speed: 0.7ms preprocess, 28.8ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 30.4ms
Confidence ---> 0.92
Class name --> person
Speed: 0.8ms preprocess, 30.4ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 30.1ms
Confidence ---> 0.95
Class name --> person
Speed: 0.9ms preprocess, 30.1ms inference, 0.8ms postprocess per image at shape (1,

In [4]:
from ultralytics import YOLO  # Import YOLO model from Ultralytics

import cv2                   # Import OpenCV library
import math                  # Import math module for mathematical operations

# Load the YOLO model
model = YOLO("yolo-Weights/yolov8n.pt")  # Load YOLOv8 model with pre-trained weights

# Define object classes for detection
classNames = [
    "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
    "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
    "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
    "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
    "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
    "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
    "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
    "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
    "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
    "teddy bear", "hair drier", "toothbrush"
]

def detect_cars(image):
    image_copy = image.copy()
    result = model(image_copy, stream=False)[0]
    boxes = result.boxes  # Extract bounding boxes for detected objects

    # Iterate through each bounding box
    for box in boxes:
        # Extract coordinates of the bounding box
        x1, y1, x2, y2 = box.xyxy[0]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)  # Convert to integer values

        # Draw the bounding box on the frame
        cv2.rectangle(image_copy, (x1, y1), (x2, y2), (255, 0, 255), 3)

        # Overlay class name and confidence
        confidence = math.ceil((box.conf[0]*100))/100
        cls = int(box.cls[0])
        text = f"{classNames[cls]}:{confidence}"

        cv2.putText(
            img=image_copy, 
            text=text, 
            org=[x1, y1], 
            fontFace=cv2.FONT_HERSHEY_SIMPLEX, 
            fontScale=1, 
            color=(255, 0, 0), 
            thickness=2
        )
    
    return image_copy

In [5]:
from PIL import Image
import numpy as np

image_path = "/home/eonrrfe/Documents/Repos/Others/CarDetection/test_images/cars_0.png"

image = Image.open(image_path)
image.show()
image_array = np.asarray(image)

inferred_image_array = detect_cars(image_array)
inferred_image = Image.fromarray(inferred_image_array)

inferred_image.show()


0: 640x480 5 cars, 1 suitcase, 31.9ms
Speed: 1.9ms preprocess, 31.9ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 480)
<class 'ultralytics.engine.results.Boxes'>
<class 'ultralytics.engine.results.Boxes'>
<class 'ultralytics.engine.results.Boxes'>
<class 'ultralytics.engine.results.Boxes'>
<class 'ultralytics.engine.results.Boxes'>
<class 'ultralytics.engine.results.Boxes'>
