### Realtime Object Detection

Detects objects in realtime video streaming via webcam

#### Import libraries for preprocessing and annotation

In [1]:
import numpy as np
import cv2
from pathlib import Path
from IPython.display import Image
import torch

#### Load pretrained models

In [2]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

Using cache found in C:\Users\SYED WALI/.cache\torch\hub\ultralytics_yolov5_master


[31m[1mrequirements:[0m Ultralytics requirements ['numpy>=1.23.5', 'pillow>=10.3.0', 'requests>=2.32.0', 'tqdm>=4.64.0'] not found, attempting AutoUpdate...
Collecting numpy>=1.23.5
  Downloading numpy-2.0.0-cp39-cp39-win_amd64.whl.metadata (60 kB)
     ---------------------------------------- 60.9/60.9 kB ? eta 0:00:00
Collecting pillow>=10.3.0
  Downloading pillow-10.3.0-cp39-cp39-win_amd64.whl.metadata (9.4 kB)
Collecting requests>=2.32.0
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting tqdm>=4.64.0
  Downloading tqdm-4.66.4-py3-none-any.whl.metadata (57 kB)
     ---------------------------------------- 57.6/57.6 kB ? eta 0:00:00
Downloading numpy-2.0.0-cp39-cp39-win_amd64.whl (16.5 MB)
   ---------------------------------------- 16.5/16.5 MB 9.2 MB/s eta 0:00:00
Downloading pillow-10.3.0-cp39-cp39-win_amd64.whl (2.5 MB)
   ---------------------------------------- 2.5/2.5 MB 9.0 MB/s eta 0:00:001
Downloading requests-2.32.3-py3-none-any.whl (64 kB)
   -

YOLOv5  2024-5-28 Python-3.9.8 torch-1.8.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


#### Run object detection on example image

In [3]:
imgs = ['https://ultralytics.com/images/zidane.jpg']
Image(url=imgs[0])

In [4]:
results = model(imgs)
results.print()
results.save(".")

AttributeError: module 'PIL.ImageFile' has no attribute 'PyEncoder'

In [None]:
Image(filename='zidane.jpg') 

#### Run object detection on realtime video via webcam

In [None]:
print("Press q to exit the object detection window!")
cap = cv2.VideoCapture(0)
while True:
    ret, image_np = cap.read()
    results = model(image_np)
    df_result = results.pandas().xyxy[0]
    dict_result = df_result.to_dict()
    scores = list(dict_result["confidence"].values())
    labels = list(dict_result["name"].values())
    
    list_boxes = list()
    for dict_item in df_result.to_dict('records'):
        list_boxes.append(list(dict_item.values())[:4])
    count = 0
    
    for xmin, ymin, xmax, ymax in list_boxes:
        image_np = cv2.rectangle(image_np, pt1=(int(xmin),int(ymin)), pt2=(int(xmax),int(ymax)), \
                                 color=(255,0, 0), thickness=2)
        cv2.putText(image_np, f"{labels[count]}: {round(scores[count], 2)}", (int(xmin), int(ymin)-10), \
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
        count = count + 1
        
    cv2.imshow('Object Detector', image_np);
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cap.release()
        cv2.destroyAllWindows()
        print("The window has been exited!")
        break

In [5]:
pip install pyttsx3

Note: you may need to restart the kernel to use updated packages.


In [None]:
import numpy as np
import cv2
from pathlib import Path
import pyttsx3
import torch

# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Initialize pyttsx3
engine = pyttsx3.init()

print("Press q to exit the object detection window!")

cap = cv2.VideoCapture(0)
while True:
    ret, image_np = cap.read()
    results = model(image_np)
    df_result = results.pandas().xyxy[0]
    dict_result = df_result.to_dict()
    scores = list(dict_result["confidence"].values())
    labels = list(dict_result["name"].values())

    list_boxes = []
    for dict_item in df_result.to_dict('records'):
        list_boxes.append(list(dict_item.values())[:4])
    count = 0

    for xmin, ymin, xmax, ymax in list_boxes:
        image_np = cv2.rectangle(image_np, pt1=(int(xmin), int(ymin)), pt2=(int(xmax), int(ymax)),
                                 color=(255, 0, 0), thickness=2)
        cv2.putText(image_np, f"{labels[count]}: {round(scores[count], 2)}", (int(xmin), int(ymin) - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
        # Speak out the detected object
        engine.say(f"I see {labels[count]}")
        engine.runAndWait()
        count = count + 1

    cv2.imshow('Object Detector', image_np)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        cap.release()
        cv2.destroyAllWindows()
        print("The window has been exited!")
        break


Using cache found in C:\Users\SYED WALI/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-5-28 Python-3.9.8 torch-1.8.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


Press q to exit the object detection window!
