In [1]:
from ultralytics import YOLO

In [2]:
model = YOLO('yolov8n.pt')

In [3]:
classNames = [
    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 
    'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 
    'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 
    'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 
    'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 
    'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 
    'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 
    'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 
    'potted plant', 'bed', 'dining table', 'toilet', 'TV', 'laptop', 'mouse', 
    'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 
    'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 
    'toothbrush'
]

In [7]:
import cv2
import cvzone
import math
cap = cv2.VideoCapture(0)

In [9]:
cap.set(3, 1280)
cap.set(4, 720)

True

In [11]:
classes_detected = {}

In [13]:
import time
last_update_time = time.time()

In [15]:
import pyttsx3
engine = pyttsx3.init()

In [None]:
while True:
    success, img = cap.read()
    results = model(img, stream=True)
    classes_detected.clear()
    for r in results:
        boxes = r.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            w, h = x2- x1, y2 - y1
            cvzone.cornerRect(img, (x1, y1, w, h))

            conf = math.ceil((box.conf[0] * 100)) 
            cls = int(box.cls[0])
            cvzone.putTextRect(img, f'{classNames[cls]} {conf}%', (max(0, x1), max(35, y1)))
            class_name = classNames[cls]
            if class_name in classes_detected:
                classes_detected[class_name] += 1
            else:
                classes_detected[class_name] = 1
    current_time = time.time()
    if current_time - last_update_time >= 4:
        for key, value in classes_detected.items():
            message = (f"Detected {key} {value} time")
            print(message)
            engine.say(message)
            engine.runAndWait()
        last_update_time = current_time  
    cv2.imshow("image", img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
print(classes_detected)
cap.release()
cv2.destroyAllWindows()


0: 384x640 1 person, 1 bed, 15.5ms
Speed: 2.0ms preprocess, 15.5ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)
Detected person 1 time
Detected bed 1 time

0: 384x640 1 person, 1 bed, 14.0ms
Speed: 2.0ms preprocess, 14.0ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)
Detected person 1 time
Detected bed 1 time

0: 384x640 1 person, 1 chair, 1 bed, 12.0ms
Speed: 2.0ms preprocess, 12.0ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 chair, 1 bed, 13.0ms
Speed: 2.0ms preprocess, 13.0ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 chair, 1 bed, 20.0ms
Speed: 2.0ms preprocess, 20.0ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)
Detected person 1 time
Detected bed 1 time
Detected chair 1 time

0: 384x640 2 persons, 1 bed, 15.0ms
Speed: 2.0ms preprocess, 15.0ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)
Detected person 2 time


In [4]:
image_path = "Dataset/test2017/test2017/000000018016.jpg"

In [11]:
model(image_path, show=True)


image 1/1 C:\Users\Shashwat\ModelDirectory\Dataset\test2017\test2017\000000018016.jpg: 448x640 15 persons, 1 baseball bat, 1 baseball glove, 13.5ms
Speed: 2.0ms preprocess, 13.5ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)


[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted p