In [2]:
# define the distance estimation function
def estimate_distance(bbox):
    # compute the width of the object in pixels
    pixel_width = bbox[2] - bbox[0]
    
    # assume the object is at a fixed distance of 2 meters
    known_distance = 2.0
    
    # compute the focal length of the camera (you may need to adjust this value)
    focal_length = 800.0
    
    # compute the estimated distance to the object
    estimated_distance = (known_distance * focal_length) / pixel_width
    
    return estimated_distance

In [3]:
import cv2
import numpy as np
import torch
from torchvision.ops.boxes import batched_nms


# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
# model = torch.hub.load('ultralytics/yolov5', 'custom', path='best.pt')

# Set detection threshold
conf_thresh = 0.25


Using cache found in C:\Users\Alif/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-4-2 Python-3.10.10 torch-2.0.0+cu117 CUDA:0 (NVIDIA GeForce RTX 3050, 8192MiB)

Fusing layers... 
Model summary: 224 layers, 7266973 parameters, 0 gradients
Adding AutoShape... 


In [4]:
# Initialize video stream
cap = cv2.VideoCapture(0)

while True:
    # Read a frame from the video stream
    ret, frame = cap.read()

    # Detect objects in the frame using the YOLOv5 model
    results = model(frame)

    # Apply non-maximum suppression to remove redundant bounding boxes
    keep_indices = batched_nms(results.xyxy[0][:, :4], results.xyxy[0][:, 5], results.pred[0][:, 0], iou_threshold=0.45)
    results.xyxy[0] = results.xyxy[0][keep_indices]

    # If objects were detected
    if len(results.xyxy[0]) > 0:
        # Iterate over the detected objects
        for i, obj in enumerate(results.xyxy[0]):
            # Extract the bounding box and class label of the object
            bbox = obj[:4].int()
            class_index = int(results.pred[0][i, 5])
            class_label = model.names[class_index]

            # Draw the bounding box and class label on the frame
            # cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
            cv2.rectangle(frame, (bbox[0].tolist(), bbox[1].tolist()), (bbox[2].tolist(), bbox[3].tolist()), (0, 255, 0), 2)
            # cv2.putText(frame, class_label, (bbox[0], bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            cv2.putText(frame, class_label, (bbox[0].tolist(), bbox[1].tolist() - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

            # Estimate the distance to the object
            distance = estimate_distance(bbox)

            # Draw the estimated distance on the frame
            cv2.putText(frame, f"{distance:.2f} meters", (bbox[0].tolist(), bbox[3].tolist() + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow('frame', frame)

    # Exit if the 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()