In [8]:
from ultralytics import YOLO
import numpy as np
import cv2
from sklearn.cluster import KMeans
import webcolors

# Calculate the average color of an image region
def get_average_color(image):
    return np.mean(image, axis=(0, 1)).astype(int)

# Find the closest HTML color name using webcolors
def get_color_name(rgb_color):
    rgb_color = tuple(rgb_color)
    try:
        return webcolors.rgb_to_name(rgb_color)
    except ValueError:
        closest_name = get_closest_color_name(rgb_color)
        return closest_name

def get_closest_color_name(rgb_color):
    min_colours = {}
    for name in webcolors.names("css3"):
        r_c, g_c, b_c = webcolors.name_to_rgb(name)
        rd = (r_c - rgb_color[0]) ** 2
        gd = (g_c - rgb_color[1]) ** 2
        bd = (b_c - rgb_color[2]) ** 2
        min_colours[(rd + gd + bd)] = name

    return min_colours[min(min_colours.keys())]

# Draw borders and color labels
def box_label_with_color(image, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255), dominant_color_name=""):
    lw = max(round(sum(image.shape) / 2 * 0.003), 2)
    p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
    cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
    if label:
        label += f' | {dominant_color_name}'  # Add color tags
        tf = max(lw - 1, 1)
        w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0]
        outside = p1[1] - h >= 3
        p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
        cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA)  # Fill Rectangle
        cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
                    0, lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA)

# Video processing and color detection main loop
def process_video_with_color_detection(video_path, output_video_path):
    cap = cv2.VideoCapture(video_path)
    model = YOLO("yolov8n.pt")

    # Get the video frame width and height
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Initialize the video writer
    out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

    # Dictionary to store color counts
    color_counts = {}

    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            break

        # Vehicle detection
        results = model.predict(frame)
        for box in results[0].boxes.data:
            class_id = int(box[-1]) + 1  # Get the object's category ID
            if class_id == 2:  # Motorcycle
                vehicle_type = "Motorcycle"
            if class_id == 3:  # Car
                vehicle_type = "Car"
            elif class_id == 7:  # Truck
                vehicle_type = "Truck"
            else:
                continue

            x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])

            # Extract vehicle region of interest (ROI)
            vehicle_roi = frame[y1:y2, x1:x2]

            # select the middle 50% of the vehicle bounding box
            vehicle_roi_center = frame[y1 + (y2 - y1) // 4 : y1 + 3 * (y2 - y1) // 4, x1 + (x2 - x1) // 4 : x1 + 3 * (x2 - x1) // 4]
            vehicle_roi_center = cv2.GaussianBlur(vehicle_roi_center, (5, 5), 0)  # Reduce noise
            vehicle_roi_center = cv2.convertScaleAbs(vehicle_roi_center, alpha=1.2, beta=0)  # Enhance contrast
            dominant_color = get_average_color(vehicle_roi_center)
            dominant_color_name = get_color_name(dominant_color)

            # Update color count
            if dominant_color_name in color_counts:
                color_counts[dominant_color_name] += 1
            else:
                color_counts[dominant_color_name] = 1

            # Draw borders and color labels on images
            box_label_with_color(frame, box, label=vehicle_type, dominant_color_name=dominant_color_name)

        # Write the current frame to the output video
        out.write(frame)

    cap.release()
    out.release()

    # Print color counts
    for color, count in color_counts.items():
        print(f"{color}: {count}")

process_video_with_color_detection("../data/I94-US20-35.1.mp4", "output_vehicle_video.mp4")



0: 384x640 2 cars, 1 bench, 55.7ms
Speed: 0.0ms preprocess, 55.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 truck, 48.2ms
Speed: 1.6ms preprocess, 48.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 truck, 1 bench, 32.9ms
Speed: 1.6ms preprocess, 32.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 30.2ms
Speed: 6.0ms preprocess, 30.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 36.2ms
Speed: 0.8ms preprocess, 36.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 43.4ms
Speed: 5.1ms preprocess, 43.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 42.4ms
Speed: 0.0ms preprocess, 42.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 34.2ms
Speed: 1.3ms preprocess, 34.2ms inference, 0.0ms postprocess per imag