In [31]:
from PIL import Image
from collections import Counter
from typing import Tuple, List
from matplotlib import cm


class DominantColor:

    resize_value: int = 16
    minimum_percent_difference_of_rgb: int = 10

    # def __init__(self, image_path: str) -> None:
    #     self.image_path = image_path
    #     self.image = Image.open(self.image_path)
    #     self.dominant_color: str = ""
    #     self.r: int = 0
    #     self.g: int = 0
    #     self.b: int = 0
    #     self.l: int = 0
    #     self.resized_image = self.image.resize(
    #         (DominantColor.resize_value, DominantColor.resize_value), Image.ANTIALIAS
    #     ).convert("RGBA")
    #     self.image.close()
    #     self.image_data = self.resized_image.getdata()
    #     self.generate_dominant_color_of_pixels_of_image_array()
    #     self.resized_image.close()
    #     self.counter = Counter(self.dominant_color_of_pixels_of_image_array)
    #     self.set_rgbl_value_of_image()
    #     self.set_dominat_color_of_image()
    #     self.rgb = (self.r, self.g, self.b)
    #     self.rgbl = (self.r, self.g, self.b, self.l)

    def __init__(self, image_arr) -> None:
        self.image = Image.fromarray(image_arr.astype('uint8'), 'RGB')
        self.dominant_color: str = ""
        self.r: int = 0
        self.g: int = 0
        self.b: int = 0
        self.l: int = 0
        self.resized_image = self.image.resize(
            (DominantColor.resize_value, DominantColor.resize_value), Image.Resampling.LANCZOS
        ).convert("RGBA")
        self.image.close()
        self.image_data = self.resized_image.getdata()
        self.generate_dominant_color_of_pixels_of_image_array()
        self.resized_image.close()
        self.counter = Counter(self.dominant_color_of_pixels_of_image_array)
        self.set_rgbl_value_of_image()
        self.set_dominat_color_of_image()
        self.rgb = (self.r, self.g, self.b)
        self.rgbl = (self.r, self.g, self.b, self.l)

    def __repr__(self) -> str:
        return (
            "DominantColor(r:%s g:%s b:%s l:%s; dominant_color:%s; resize_value:%s; minimum_percent_difference_of_rgb:%s)"
            % (
                self.r,
                self.g,
                self.b,
                self.l,
                self.dominant_color,
                str(self.resize_value),
                str(self.minimum_percent_difference_of_rgb),
            )
        )

    def __str__(self) -> str:
        return self.dominant_color

    def set_dominat_color_of_image(self) -> None:
        self.mpd = int(
            self.total_pixels * (DominantColor.minimum_percent_difference_of_rgb / 100)
        )

        if (
            max(
                set(self.dominant_color_of_pixels_of_image_array),
                key=self.dominant_color_of_pixels_of_image_array.count,
            )
            == "l"
        ):
            self.dominant_color = "l"
            return

        if (self.r - self.mpd) > self.g and (self.r - self.mpd) > self.b:
            self.dominant_color = "r"
            return
        if (self.g - self.mpd) > self.b and (self.g - self.mpd) > self.r:
            self.dominant_color = "g"
            return
        if (self.b - self.mpd) > self.r and (self.b - self.mpd) > self.g:
            self.dominant_color = "b"
            return
        self.dominant_color = "n"

    def set_rgbl_value_of_image(self) -> None:
        """
        Sets the value for attribute r,g,b and l.

        Note that these attributes indicates the number of
        pixels which have dominating r,g,b and l values repectively.

        The sum of r,g,b and l should be equal to total_pixels attribute.
        """
        self.r = self.counter.get("r", 0)
        self.g = self.counter.get("g", 0)
        self.b = self.counter.get("b", 0)
        self.l = self.counter.get("l", 0)

    def generate_dominant_color_of_pixels_of_image_array(self) -> None:

        self.total_pixels: int = 0
        self.dominant_color_of_pixels_of_image_array: List = []

        for i in range(DominantColor.resize_value):

            for j in range(DominantColor.resize_value):

                self.dominant_color_of_pixels_of_image_array.append(
                    self.dominant_color_of_pixel(self.image_data.getpixel((i, j)))
                )

                self.total_pixels += 1

    def dominant_color_of_pixel(self, pixel: Tuple[int, int, int, int]) -> str:

        r, g, b = pixel[0], pixel[1], pixel[2]

        if r > g and r > b:
            return "r"

        if g > b and g > r:
            return "g"

        if b > r and b > g:
            return "b"

        return "l"

In [51]:
import cv2
import webcolors
import numpy as np
import supervision as sv
from ultralytics import YOLO

detect_model = YOLO("yolo11s.pt")
clothes_model = YOLO("best.pt")

tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()
mask_annotator = sv.MaskAnnotator()
label_annotator = sv.LabelAnnotator()

def closest_color(requested_color):
    min_distance = float('inf')
    closest_name = None

    # Iterate through all known colors in webcolors
    for hex_code, color_name in webcolors._definitions._CSS3_HEX_TO_NAMES.items():
        r, g, b = webcolors.hex_to_rgb(hex_code)
        
        # Calculate Euclidean distance
        distance = ((r - requested_color[0]) ** 2 +
                    (g - requested_color[1]) ** 2 +
                    (b - requested_color[2]) ** 2) ** 0.5

        # Update the closest name if this color is closer
        if distance < min_distance:
            min_distance = distance
            closest_name = color_name

    return closest_name

def classify_color(cropped_frame):
    # Calculate the dominant color
    dominant_color = DominantColor(cropped_frame)

    print(dominant_color.rgb)
    
    # Find the closest named color
    closest_name = closest_color(dominant_color.rgb)
    
    return closest_name

# Example usage:
# res_color = classify_color(cropped_frame)
# print("Closest named color:", res_color)
    

def mid_split_top_bottom(cropped_frame: np.ndarray):
    # Get the height of the frame
    height = cropped_frame.shape[0]
    
    # Calculate the midpoint
    midpoint = height // 2
    
    # Split the frame into top and bottom halves
    top_cropped_frame = cropped_frame[:midpoint, :]
    bottom_cropped_frame = cropped_frame[midpoint:, :]

    return f"{classify_color(top_cropped_frame)} unknown top, and {classify_color(bottom_cropped_frame)} unknown bottom."
    

def detect_clothes(cropped_frame: np.ndarray) -> np.ndarray:
    res = clothes_model(cropped_frame)[0]
    detections = sv.Detections.from_ultralytics(res)
    if len(detections.data['class_name']) >= 1:
        clothes_str = ""
        for xyxy, mask, confidence, class_id, tracker_id, data in detections:
            print(confidence)
            # Convert mask to uint8 if it is boolean
            mask_uint8 = mask.astype(np.uint8) * 255

            # Find contours of the solid mask annotation
            contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            
            if contours:
                # Get the bounding box of the largest contour
                x, y, w, h = cv2.boundingRect(max(contours, key=cv2.contourArea))
                
                # Center of the bounding box
                center_x, center_y = x + w // 2, y + h // 2

                # Reduce bounding box dimensions by half
                reduced_side_length = max(w, h) // 2
                half_reduced_side = reduced_side_length // 2

                # Calculate the smaller square crop coordinates
                square_x1 = max(center_x - half_reduced_side, 0)
                square_y1 = max(center_y - half_reduced_side, 0)
                square_x2 = min(center_x + half_reduced_side, cropped_frame.shape[1])
                square_y2 = min(center_y + half_reduced_side, cropped_frame.shape[0])

                # Crop the image to this smaller square region
                cropped_img = cropped_frame[square_y1:square_y2, square_x1:square_x2]

                with sv.ImageSink(target_dir_path="resu2lt") as sink:
                    sink.save_image(image=cropped_img)

                
            clothes_str += f"{classify_color(cropped_img)} {data['class_name']}, "

            
        return clothes_str
    else:
        return mid_split_top_bottom(cropped_frame)

def detect_and_track_people(frame: np.ndarray) -> np.ndarray:
    detect_res = detect_model(frame)[0]
    detections = sv.Detections.from_ultralytics(detect_res)
    detections = detections[detections.class_id == 0]
    detections = detections[detections.confidence > 0.5]
    detections = tracker.update_with_detections(detections)

    clothes = {}
    #Crop the detected images and save
    for xyxy, mask, confidence, class_id, tracker_id, data in detections:
        #Segment each of the cropped image
        cropped_img = sv.crop_image(image=frame, xyxy=xyxy)
        
        clothes[tracker_id] = detect_clothes(cropped_img)


    labels = [
        f"{detect_res.names[class_id]} #{tracker_id} {clothes[tracker_id]}"
        for class_id, tracker_id
        in zip(detections.class_id, detections.tracker_id)
    ]

    print(labels, detections.xyxy)

    annotated_frame = box_annotator.annotate(
        frame.copy(), detections=detections)
    
    return label_annotator.annotate(
        annotated_frame, detections=detections, labels=labels)

def pre_process_frame(frame: np.ndarray):
    #Make it brighter
    frame = cv2.convertScaleAbs(frame, alpha=1.2, beta=0)
    return frame

image = cv2.imread("test333.png")
image = pre_process_frame(image)
result = detect_and_track_people(image)

with sv.ImageSink(target_dir_path="result") as sink:
    # result = pre_process_frame(result)
    sink.save_image(image=result)

# video_info = sv.VideoInfo.from_video_path("test_3.mp4")
# frames_generator = sv.get_video_frames_generator("test_3.mp4")

# with sv.VideoSink(target_path="result_3.mp4", video_info=video_info) as sink:
#     for i, frame in enumerate(frames_generator):
#         if i % 5 == 0:
#             # frame = pre_process_frame(frame)
#             frame = detect_and_track_people(frame)
#             sink.write_frame(frame=frame)
            
    



0: 640x384 4 persons, 52.1ms
Speed: 2.2ms preprocess, 52.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x192 1 long sleeve outwear, 1 long sleeve top, 1 trousers, 19.8ms
Speed: 0.7ms preprocess, 19.8ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 192)
0.45435002
(0, 1, 252)
0.4506395
(0, 1, 255)
0.35052428
(0, 1, 252)

0: 640x224 1 long sleeve top, 1 trousers, 20.8ms
Speed: 0.9ms preprocess, 20.8ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 224)
0.7872292
(205, 27, 18)
0.7156097
(0, 0, 256)

0: 640x256 1 long sleeve top, 1 short sleeve top, 1 trousers, 21.7ms
Speed: 5.2ms preprocess, 21.7ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 256)
0.8220574
(0, 137, 102)
0.40657592
(0, 22, 226)
0.38579988
(0, 22, 226)

0: 640x96 (no detections), 14.1ms
Speed: 2.1ms preprocess, 14.1ms inference, 0.1ms postprocess per image at shape (1, 3, 640, 96)
(23, 81, 139)
(58, 151, 34)
['person #512 blue long sleeve top, blue tro