In [4]:
import torch
import numpy as np
import cv2
import matplotlib.pyplot as plt

# Path to the image
image_path = '/content/74530.png'
output_image_path = '/content/74530_with_boxes.png'  # Path to save the output image

# Initialize the YOLOv5 model (using a larger model 'yolov5x')
model = torch.hub.load('ultralytics/yolov5', 'yolov5x')

# Set lower confidence threshold and NMS IoU threshold
model.conf = 0.25  # Confidence threshold
model.iou = 0.45   # NMS IoU threshold

# Read the image
img = cv2.imread(image_path)

# Check if the image was loaded successfully
if img is not None:
    # Get original image dimensions
    orig_height, orig_width, _ = img.shape

    # Convert the image from BGR to RGB format
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Optionally resize the image to 640x640 for inference
    img_resized = cv2.resize(img_rgb, (640, 640))

    # Perform object detection
    results = model(img_resized)

    # Print the results for debugging
    print(results)  # Check the structure of results

    # Extract detected object names and bounding boxes
    detections = results.pandas().xyxy[0]
    if not detections.empty:
        object_names = detections['name'].unique()
        print("Detected objects:", object_names)

        # Scaling factor between resized image (640x640) and original image
        x_scale = orig_width / 640
        y_scale = orig_height / 640

        # Draw bounding boxes on the original image (RGB format)
        for i in range(len(detections)):
            x_min, y_min, x_max, y_max = detections.iloc[i][['xmin', 'ymin', 'xmax', 'ymax']]
            confidence = detections.iloc[i]['confidence']
            class_id = detections.iloc[i]['class']
            label = results.names[int(class_id)]

            # Scale bounding boxes to match original image size
            x_min = int(x_min * x_scale)
            y_min = int(y_min * y_scale)
            x_max = int(x_max * x_scale)
            y_max = int(y_max * y_scale)

            # Draw rectangle (bounding box) on the original image
            cv2.rectangle(img_rgb, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

            # Adjust the size of the label text
            font_scale = 0.5  # Adjust this to change the text size (0.5 is smaller, increase for larger)
            font_thickness = 1  # You can also adjust the thickness of the text

            # Get the size of the text
            text = f'{label} {confidence:.2f}'
            (text_width, text_height), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness)

            # Draw a filled rectangle (red) as a background for the text
            cv2.rectangle(img_rgb, (x_min, y_min - text_height - 5),
                          (x_min + text_width, y_min), (0, 0, 255), -1)

            # Put label text (white color) on top of the red rectangle
            cv2.putText(img_rgb, text, (x_min, y_min - 5), cv2.FONT_HERSHEY_SIMPLEX,
                        font_scale, (255, 255, 255), font_thickness)

        # Save the original image with bounding boxes (converted back to BGR)
        img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
        cv2.imwrite(output_image_path, img_bgr)
        print(f"Image saved to {output_image_path}")

        # Display the original image with bounding boxes (in RGB)
        plt.figure(figsize=(10, 10))
        plt.imshow(img_rgb)
        plt.axis('off')
        plt.show()
    else:
        print("No objects detected.")
else:
    print(f"Failed to load image: {image_path}")


Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-9-27 Python-3.10.12 torch-2.4.1+cu121 CPU

Fusing layers... 
YOLOv5x summary: 444 layers, 86705005 parameters, 0 gradients, 205.5 GFLOPs
Adding AutoShape... 
  with amp.autocast(autocast):


image 1/1: 640x640 1 person, 1 tie, 2 forks, 1 knife, 1 bowl, 1 pizza, 4 chairs
Speed: 13.3ms pre-process, 4004.3ms inference, 1.4ms NMS per image at shape (1, 3, 640, 640)
Detected objects: ['knife' 'pizza' 'person' 'fork' 'tie' 'bowl' 'chair']
Image saved to /content/74530_with_boxes.png
