In [None]:
! pip install torch torchvision opencv-python



In [None]:
import torch
import cv2
import numpy as np
from scipy.spatial import distance

In [None]:
# Load the YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')  # 'yolov5s' is a small, pre-trained model

# Load an image
image_path = '/content/1 (6).jpg'
image = cv2.imread(image_path)

# Perform inference on the image
results = model(image)

# Get detected objects and filter for 'person'
person_detections = results.xyxy[0].cpu().numpy()  # Bounding boxes in [x1, y1, x2, y2, confidence, class] format
person_detections = [det for det in person_detections if det[5] == 0]  # Class 0 corresponds to 'person'

# Extract bounding box centroids for persons
centroids = []
real_world_distances = []  # Define the list to hold the conversion factors

# Assume real-world height of a person (e.g., average person height)
REAL_WORLD_HEIGHT = 1.7  # in meters

for det in person_detections:
    x1, y1, x2, y2 = det[:4]
    centroid_x = (x1 + x2) / 2
    centroid_y = (y1 + y2) / 2
    centroids.append((centroid_x, centroid_y))

    # Calculate the pixel height of the person (height of bounding box)
    pixel_height = y2 - y1

    # Conversion factor (real-world height divided by pixel height)
    conversion_factor = REAL_WORLD_HEIGHT / pixel_height  # meters per pixel
    real_world_distances.append(conversion_factor)  # Store conversion factor for each person

# Draw bounding boxes and centroids on the image
for det in person_detections:
    x1, y1, x2, y2, confidence, _ = det
    cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)

for (centroid_x, centroid_y) in centroids:
    cv2.circle(image, (int(centroid_x), int(centroid_y)), 5, (0, 0, 255), -1)

# Check if we have at least two persons detected
if len(centroids) >= 2:
    # Calculate the Euclidean distance between the first two persons in pixels
    person1 = centroids[0]
    person2 = centroids[1]
    pixel_distance = distance.euclidean(person1, person2)

    # Use the conversion factor (meters per pixel) from one of the detected persons
    if real_world_distances:
        # Assuming you use the first detected person's conversion factor
        conversion_factor = real_world_distances[0]

        # Convert pixel distance to real-world distance
        real_world_distance = pixel_distance * conversion_factor
        print(f"Pixel distance: {pixel_distance} pixels")
        print(f"Real-world distance: {real_world_distance:.2f} meters")
    else:
        print("Conversion factor could not be determined.")
else:
    print("Less than two persons detected.")

Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-9-6 Python-3.10.12 torch-2.4.0+cu121 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
  with amp.autocast(autocast):


Pixel distance: 1025.8120857399017 pixels
Real-world distance: 0.88 meters


In [None]:
import torch
import cv2
import numpy as np
from scipy.spatial import distance

# Constants
KNOWN_HEIGHT = 1.7  # Average person height in meters (as a reference)

# Load YOLOv5 model for detecting people
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

# Load the image
image_path = "/content/2 (2).jpg"  # Replace with your image path
image = cv2.imread(image_path)

# YOLO object detection
results = model(image)
person_detections = results.xyxy[0].cpu().numpy()  # Bounding boxes in [x1, y1, x2, y2, confidence, class] format
person_detections = [det for det in person_detections if det[5] == 0]  # Class 0 corresponds to 'person'

# Assuming camera parameters
focal_length = 800  # Assumed focal length in pixels (needs calibration)

# Function to estimate real-world distance based on bounding box height
def estimate_distance_from_height(bbox_height):
    if bbox_height > 0:
        # Estimating real-world distance using known height and focal length
        distance = (KNOWN_HEIGHT * focal_length) / bbox_height
        return distance
    return None

# Get centroids of each detected person
centroids = []
real_world_distances = []  # To store real-world distances

# Process detected persons
for det in person_detections:
    x1, y1, x2, y2 = map(int, det[:4])

    # Calculate the centroid of the bounding box
    cx = (x1 + x2) // 2
    cy = (y1 + y2) // 2

    # Store the centroid
    centroids.append((cx, cy))

    # Calculate the height of the bounding box (y2 - y1 gives the height in pixels)
    bbox_height = y2 - y1

    # Estimate real-world distance based on bounding box height
    real_distance = estimate_distance_from_height(bbox_height)
    real_world_distances.append(real_distance)

    # Print the real-world distance for each person
    print(f"Person at ({cx}, {cy}) has an estimated distance of {real_distance:.2f} meters")

    # Draw the bounding box and centroid on the image
    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    cv2.circle(image, (cx, cy), 5, (0, 0, 255), -1)

# Calculate distances between each pair of persons
num_persons = len(centroids)

for i in range(num_persons):
    for j in range(i + 1, num_persons):
        # Calculate Euclidean distance in pixel space between centroids
        pixel_dist = distance.euclidean(centroids[i], centroids[j])

        # Use average height distance as a rough estimate for real-world distance
        avg_height = (real_world_distances[i] + real_world_distances[j]) / 2

        # Print the real-world distance between the two persons
        print(f"Distance between Person {i+1} and Person {j+1}: {avg_height:.2f} meters")

        # Draw the distance on the image
        midpoint = ((centroids[i][0] + centroids[j][0]) // 2, (centroids[i][1] + centroids[j][1]) // 2)
        cv2.line(image, centroids[i], centroids[j], (255, 0, 0), 2)
        cv2.putText(image, f"{avg_height:.2f}m", midpoint, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)




Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-9-9 Python-3.10.12 torch-2.4.0+cu121 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
