In [2]:
from ultralytics import YOLO
from PIL import Image
import os
import supervision as sv

In [3]:
MODEL_PATH = "models/"
IMAGE_PATH = "validate_images"
MODEL_VERSION = "v1"

### Load model and predict

In [4]:
model = YOLO(f"{MODEL_PATH}yolo11_{MODEL_VERSION}.pt")
image_files = [os.path.join(IMAGE_PATH, f) for f in os.listdir(IMAGE_PATH) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp'))]

In [5]:
#[0,1,2] = [bottom, coat, top]
images = [Image.open(image) for image in image_files]

results = [model.predict(image, conf=0.25)[0] for image in images]

#results[5].boxes.xyxy, results[5].boxes.conf, results[5].boxes.cls


0: 640x544 1 bottom, 1 coat, 72.6ms
Speed: 2.0ms preprocess, 72.6ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 544)

0: 640x544 1 coat, 3 tops, 72.7ms
Speed: 1.6ms preprocess, 72.7ms inference, 0.3ms postprocess per image at shape (1, 3, 640, 544)

0: 640x640 1 bottom, 1 top, 81.8ms
Speed: 1.8ms preprocess, 81.8ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 top, 87.1ms
Speed: 1.8ms preprocess, 87.1ms inference, 0.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x544 1 coat, 77.4ms
Speed: 1.8ms preprocess, 77.4ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 544)

0: 640x544 1 bottom, 2 tops, 75.1ms
Speed: 1.6ms preprocess, 75.1ms inference, 0.3ms postprocess per image at shape (1, 3, 640, 544)

0: 640x544 1 coat, 2 tops, 75.9ms
Speed: 1.7ms preprocess, 75.9ms inference, 0.3ms postprocess per image at shape (1, 3, 640, 544)

0: 640x640 1 coat, 3 tops, 79.5ms
Speed: 1.6ms preprocess, 79.5ms inference, 0.3ms post

In [6]:
detections = [sv.Detections.from_ultralytics(res) for res in results]
detections

[Detections(xyxy=array([[     225.57,      443.05,      449.67,      837.37],
        [     195.16,      110.13,      529.06,       654.6]], dtype=float32), mask=None, confidence=array([    0.91143,     0.80526], dtype=float32), class_id=array([0, 1]), tracker_id=None, data={'class_name': array(['bottom', 'coat'], dtype='<U6')}, metadata={}),
 Detections(xyxy=array([[     7.4828,      500.35,      338.68,      797.41],
        [     177.92,      61.884,      520.85,      801.92],
        [     361.63,       494.1,      690.31,       796.1],
        [     362.14,      499.46,      690.35,      797.54]], dtype=float32), mask=None, confidence=array([    0.69579,     0.53981,     0.39541,     0.34049], dtype=float32), class_id=array([2, 2, 1, 2]), tracker_id=None, data={'class_name': array(['top', 'top', 'coat', 'top'], dtype='<U4')}, metadata={}),
 Detections(xyxy=array([[     174.24,      539.07,      466.71,         700],
        [     145.15,      210.52,      563.44,      630.35]], dt

In [7]:
bounding_box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()

annotated_images = []


for i in range(len(detections)):
    annotated_image = images[i].copy()  
    annotated_image = bounding_box_annotator.annotate(annotated_image, detections[i])
    annotated_image = label_annotator.annotate(annotated_image, detections[i])
    annotated_images.append(annotated_image) 


In [8]:
# for img in annotated_images:
#     sv.plot_image(img, size=(5, 5))

### Output images and corresponding labels

In [10]:
labels_dir = "../color_analysis_V2/data/label"
imgs_dir = "../color_analysis_V2/data/image"

os.makedirs(labels_dir, exist_ok=True)
os.makedirs(imgs_dir, exist_ok=True)

for idx, (img, res) in enumerate(zip(images, results)):
    # save img
    img_file = os.path.join(imgs_dir, f"image_{idx}.jpg")
    img.save(img_file)
    
    # save boundaries
    label_file = os.path.join(labels_dir, f"image_{idx}.txt")
    with open(label_file, "w") as f:
        index_record = set()
        
        for box in res.boxes:
            class_index = int(box.cls[0])  # Class ID
            
            if class_index not in index_record: # Not consider those duplicated positions
                index_record.add(class_index)
            else:
                continue
            
            #confidence = float(box.conf[0])  # Confidence score
            x_min, y_min, x_max, y_max = box.xyxy[0].tolist()  # Bounding box
            # Image dimensions (replace these with actual dimensions if known)
            img_width, img_height = images[idx].size
            
            # Normalize bounding box coordinates
            center_x = (x_min + x_max) / 2 / img_width
            center_y = (y_min + y_max) / 2 / img_height
            width = (x_max - x_min) / img_width
            height = (y_max - y_min) / img_height
            
            # YOLO format
            f.write(f"{class_index} {center_x:.6f} {center_y:.6f} {width:.6f} {height:.6f}\n")