In [1]:
from ultralytics import YOLO
import numpy as np
import cv2
from sklearn.cluster import KMeans

# Calculate the average color of an image region
def get_average_color(image):
    return np.mean(image, axis=(0, 1)).astype(int)

def get_color_name(rgb_color):
    color_ranges = {
        'Red': ([100, 0, 0], [255, 150, 100]),
        'Green': ([0, 150, 0], [150, 255, 255]),
        'Black': ([0, 0, 0], [150, 150, 150]),
        'Blue': ([0, 0, 200], [50, 50, 255]),
        'Yellow': ([150, 50, 0], [255, 255, 100]),
        'White': ([150, 150, 200], [255, 255, 255]),
        'Gray': ([100, 100, 100], [200, 200, 200]),
    }

    for color_name, (lower, upper) in color_ranges.items():
        if all(lower[i] <= rgb_color[i] <= upper[i] for i in range(3)):
            return color_name
    return "Unknown"

# Draw borders and color labels
def box_label_with_color(image, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255), dominant_color_name=""):
    lw = max(round(sum(image.shape) / 2 * 0.003), 2)
    p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
    cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
    if label:
        label += f' | {dominant_color_name}'  # Add color tags
        tf = max(lw - 1, 1)
        w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0]
        outside = p1[1] - h >= 3
        p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
        cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA)  # Fill Rectangle
        cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
                    0, lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA)

# Video processing and color detection main loop
def process_video_with_color_detection(video_path, output_video_path):
    cap = cv2.VideoCapture(video_path)
    model = YOLO("yolov8n.pt")

    # Get the video frame width and height
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Initialize the video writer
    out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))


    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            break

        # Vehicle detection
        results = model.predict(frame)
        for box in results[0].boxes.data:
            class_id = int(box[-1]) + 1  # Get the object's category ID
            if class_id == 2:  # Motorcycle
                vehicle_type = "Motorcycle"
            if class_id == 3:  # Car
                vehicle_type = "Car"
            elif class_id == 7:  # Truck
                vehicle_type = "Truck"
            else:
                continue

            x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])

            # Extract vehicle region of interest (ROI)
            vehicle_roi = frame[y1:y2, x1:x2]

            # select the middle 50% of the vehicle bounding box
            vehicle_roi_center = frame[y1 + (y2 - y1) // 4 : y1 + 3 * (y2 - y1) // 4, x1 + (x2 - x1) // 4 : x1 + 3 * (x2 - x1) // 4]
            vehicle_roi_center = cv2.GaussianBlur(vehicle_roi_center, (5, 5), 0)  # Reduce noise
            vehicle_roi_center = cv2.convertScaleAbs(vehicle_roi_center, alpha=1.2, beta=0)  # Enhance contrast
            dominant_color = get_average_color(vehicle_roi_center)
            dominant_color_name = get_color_name(dominant_color)

            # Draw borders and color labels on images
            box_label_with_color(frame, box, label=vehicle_type, dominant_color_name=dominant_color_name)

        # Write the current frame to the output video
        out.write(frame)

    cap.release()
    out.release()

process_video_with_color_detection("../data/I94-US20-35.1.mp4", "output_vehicle_video.mp4")



0: 384x640 2 cars, 1 bench, 60.3ms
Speed: 6.2ms preprocess, 60.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 truck, 42.6ms
Speed: 0.0ms preprocess, 42.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 truck, 1 bench, 35.6ms
Speed: 1.1ms preprocess, 35.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 54.6ms
Speed: 0.0ms preprocess, 54.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 40.7ms
Speed: 2.5ms preprocess, 40.7ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 41.4ms
Speed: 0.0ms preprocess, 41.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 46.7ms
Speed: 1.5ms preprocess, 46.7ms inference, 6.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 60.3ms
Speed: 0.0ms preprocess, 60.3ms inference, 0.0ms postprocess per imag