In [2]:
import cv2
import numpy as np
from ultralytics import YOLO

In [None]:
def resize_roi_to_fixed_size(roi, target_size=(640, 640)):
    h, w = roi.shape[:2]
    target_w, target_h = target_size

    # Calculate scaling factor
    scale = min(target_w / w, target_h / h)
    new_w = int(w * scale)
    new_h = int(h * scale)

    # Resize ROI
    resized_roi = cv2.resize(roi, (new_w, new_h), interpolation=cv2.INTER_LINEAR)

    # Create a blank canvas with the target size
    canvas = np.zeros((target_h, target_w, 3), dtype=np.uint8)

    # Center the resized ROI on the canvas
    top = (target_h - new_h) // 2
    left = (target_w - new_w) // 2
    canvas[top:top+new_h, left:left+new_w] = resized_roi

    return canvas

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO

# Load YOLO model
model = YOLO("./best.pt")

# Helper function: Resize ROI to fixed size
def resize_roi_to_fixed_size(roi, target_size=(640, 640)):
    h, w = roi.shape[:2]
    target_w, target_h = target_size

    # Calculate scaling factor
    scale = min(target_w / w, target_h / h)
    new_w = int(w * scale)
    new_h = int(h * scale)

    # Resize ROI
    resized_roi = cv2.resize(roi, (new_w, new_h), interpolation=cv2.INTER_LINEAR)

    # Create a blank canvas with the target size
    canvas = np.zeros((target_h, target_w, 3), dtype=np.uint8)

    # Center the resized ROI on the canvas
    top = (target_h - new_h) // 2
    left = (target_w - new_w) // 2
    canvas[top:top+new_h, left:left+new_w] = resized_roi

    return canvas

# Helper function: Map bounding box from resized ROI to original frame
def map_bbox_to_original_frame(bbox, roi_coords, original_size, target_size=(640, 640)):
    x_roi, y_roi, w_roi, h_roi = roi_coords
    orig_w, orig_h = original_size
    target_w, target_h = target_size

    scale_x = orig_w / target_w
    scale_y = orig_h / target_h

    x1, y1, x2, y2 = bbox
    x1 = int(x1 * scale_x + x_roi)
    y1 = int(y1 * scale_y + y_roi)
    x2 = int(x2 * scale_x + x_roi)
    y2 = int(y2 * scale_y + y_roi)

    return x1, y1, x2, y2

# Shape detection and real-time video processing
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Resize the frame for faster processing
    scale_percent = 50
    width = int(frame.shape[1] * scale_percent / 100)
    height = int(frame.shape[0] * scale_percent / 100)
    frame_resized = cv2.resize(frame, (width, height))

    # Convert to grayscale and apply Gaussian blur
    gray = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (3, 3), 0)
    edges = cv2.Canny(blurred, 50, 150)

    # Find contours
    contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    for i, contour in enumerate(contours):
        if hierarchy[0][i][3] != -1:  # Ignore nested contours
            continue

        area = cv2.contourArea(contour)
        if area < 500:  # Ignore small contours
            continue

        # Approximate the shape
        epsilon = 0.04 * cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, epsilon, True)

        # Detect shapes and ROI
        x, y, w, h = cv2.boundingRect(approx)
        shape = None
        if len(approx) == 3:
            shape = "Triangle"
        elif len(approx) == 4:
            shape = "Rectangle"
        elif len(approx) > 4:
            (cx, cy), radius = cv2.minEnclosingCircle(contour)
            if radius > 10:
                shape = "Circle"

        if shape:
            roi = frame_resized[y:y+h, x:x+w]  # Extract the ROI
            resized_roi = resize_roi_to_fixed_size(roi, target_size=(640, 640))  # Resize ROI

            # YOLO inference
            results = model.predict(resized_roi, conf=0.5, imgsz=640)  # YOLO inference

            for result in results[0].boxes:
                conf = result.conf.item()
                cls = result.cls.item()
                label = results[0].names[int(cls)]

                if conf > 0.5:  # Adjust confidence threshold
                    # Map bbox back to original frame
                    x1, y1, x2, y2 = map_bbox_to_original_frame(
                        result.xyxy[0].tolist(), (x, y, w, h), (width, height)
                    )

                    # Draw detections on the original frame
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    cv2.putText(frame, f"{label} ({conf:.2f})", 
                                (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("Shape and Sign Detection", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 640x640 (no detections), 823.9ms
Speed: 10.3ms preprocess, 823.9ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 793.6ms
Speed: 3.1ms preprocess, 793.6ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 Yield, 902.1ms
Speed: 5.3ms preprocess, 902.1ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 835.8ms
Speed: 3.5ms preprocess, 835.8ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 Speed_Limit_20, 767.9ms
Speed: 9.5ms preprocess, 767.9ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 Speed_Limit_30, 862.6ms
Speed: 3.6ms preprocess, 862.6ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 798.2ms
Speed: 10.8ms preprocess, 798.2ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 Speed_Limit_20, 736.1ms
Speed: 15.3ms preproc

: 