In [1]:
import os
import cv2
import numpy as np
import math
import json

In [2]:
def rotate(image, angle):
    """
    Rotates an OpenCV 2 / NumPy image about it's centre by the given angle
    (in degrees). The returned image will be large enough to hold the entire
    new image, with a black background

    Source: http://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders
    """
    # Get the image size
    # No that's not an error - NumPy stores image matricies backwards
    image_size = (image.shape[1], image.shape[0])
    image_center = tuple(np.array(image_size) / 2)

    # Convert the OpenCV 3x2 rotation matrix to 3x3
    rot_mat = np.vstack(
        [cv2.getRotationMatrix2D(image_center, angle, 1.0), [0, 0, 1]]
    )

    rot_mat_notranslate = np.matrix(rot_mat[0:2, 0:2])

    # Shorthand for below calcs
    image_w2 = image_size[0] * 0.5
    image_h2 = image_size[1] * 0.5

    # Obtain the rotated coordinates of the image corners
    rotated_coords = [
        (np.array([-image_w2,  image_h2]) * rot_mat_notranslate).A[0],
        (np.array([ image_w2,  image_h2]) * rot_mat_notranslate).A[0],
        (np.array([-image_w2, -image_h2]) * rot_mat_notranslate).A[0],
        (np.array([ image_w2, -image_h2]) * rot_mat_notranslate).A[0]
    ]

    # Find the size of the new image
    x_coords = [pt[0] for pt in rotated_coords]
    x_pos = [x for x in x_coords if x > 0]
    x_neg = [x for x in x_coords if x < 0]

    y_coords = [pt[1] for pt in rotated_coords]
    y_pos = [y for y in y_coords if y > 0]
    y_neg = [y for y in y_coords if y < 0]

    right_bound = max(x_pos)
    left_bound = min(x_neg)
    top_bound = max(y_pos)
    bot_bound = min(y_neg)

    new_w = int(abs(right_bound - left_bound))
    new_h = int(abs(top_bound - bot_bound))

    # We require a translation matrix to keep the image centred
    trans_mat = np.matrix([
        [1, 0, int(new_w * 0.5 - image_w2)],
        [0, 1, int(new_h * 0.5 - image_h2)],
        [0, 0, 1]
    ])

    # Compute the tranform for the combined rotation and translation
    affine_mat = (np.matrix(trans_mat) * np.matrix(rot_mat))[0:2, :]

    # Apply the transform
    result = cv2.warpAffine(
        image,
        affine_mat,
        (new_w, new_h),
        flags=cv2.INTER_LINEAR
    )

    return result

def crop_around_center(image, width, height):
    """
    Given a NumPy / OpenCV 2 image, crops it to the given width and height,
    around it's centre point

    Source: http://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders
    """

    image_size = (image.shape[1], image.shape[0])
    image_center = (int(image_size[0] * 0.5), int(image_size[1] * 0.5))

    if(width > image_size[0]):
        width = image_size[0]

    if(height > image_size[1]):
        height = image_size[1]

    x1 = int(image_center[0] - width * 0.5)
    x2 = int(image_center[0] + width * 0.5)
    y1 = int(image_center[1] - height * 0.5)
    y2 = int(image_center[1] + height * 0.5)

    return image[y1:y2, x1:x2]

def largest_rotated_rect(w, h, angle):
    """
    Given a rectangle of size wxh that has been rotated by 'angle' (in
    radians), computes the width and height of the largest possible
    axis-aligned rectangle within the rotated rectangle.

    Original JS code by 'Andri' and Magnus Hoff from Stack Overflow

    Converted to Python by Aaron Snoswell

    Source: http://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders
    """

    quadrant = int(math.floor(angle / (math.pi / 2))) & 3
    sign_alpha = angle if ((quadrant & 1) == 0) else math.pi - angle
    alpha = (sign_alpha % math.pi + math.pi) % math.pi

    bb_w = w * math.cos(alpha) + h * math.sin(alpha)
    bb_h = w * math.sin(alpha) + h * math.cos(alpha)

    gamma = math.atan2(bb_w, bb_w) if (w < h) else math.atan2(bb_w, bb_w)

    delta = math.pi - alpha - gamma

    length = h if (w < h) else w

    d = length * math.cos(alpha)
    a = d * math.sin(alpha) / math.sin(delta)

    y = a * math.cos(gamma)
    x = y * math.tan(gamma)

    return (
        bb_w - 2 * x,
        bb_h - 2 * y
    )

def crop_largest_rectangle(image, angle, height, width):
    """
    Crop around the center the largest possible rectangle
    found with largest_rotated_rect.
    """
    return crop_around_center(
        image,
        *largest_rotated_rect(
            width,
            height,
            math.radians(angle)
        )
    )

def generate_rotated_image(image, angle, size=None, crop_center=False,
                           crop_largest_rect=False):
    """
    Generate a valid rotated image for the RotNetDataGenerator. If the
    image is rectangular, the crop_center option should be used to make
    it square. To crop out the black borders after rotation, use the
    crop_largest_rect option. To resize the final image, use the size
    option.
    """
    height, width = image.shape[:2]
    if crop_center:
        if width < height:
            height = width
        else:
            width = height

    image = rotate(image, angle)

    if crop_largest_rect:
        image = crop_largest_rectangle(image, angle, height, width)

    if size:
        image = cv2.resize(image, size)

    return image

In [None]:
input_path = 'data/'
output_path = 'src\pytorch-template-master\data\SceneClassify\outdoor'
if not os.path.exists(output_path):
    os.mkdir(output_path)
labels = {}
for img_path in os.listdir(input_path):
    try:
        print(f"Processing in {img_path}")
        img = cv2.imread(os.path.join(input_path, img_path))
        # Create a list random rotations include 10 rotations from -45 to 45
        rotations = np.random.randint(-15, 15, 1)
        for i,angle in enumerate(rotations):
            rotated_img = generate_rotated_image(img, angle, crop_largest_rect=True)
            cv2.imwrite(f"{output_path}/{img_path[:-4]}_{angle}.jpg", rotated_img)
            labels[f"{output_path}/{img_path[:-4]}_{angle}.jpg"] = int(angle)
    except:
        continue

# with open('data/img_rotation/labels.json', 'w') as fp:
#     json.dump(labels, fp)






Processing in 0.png
Processing in 1.png
Processing in 10.png
Processing in 100.png
Processing in 1000.png
Processing in 1001.png
Processing in 1002.png
Processing in 1003.png
Processing in 1004.png
Processing in 1005.png
Processing in 1006.png
Processing in 1007.png
Processing in 1008.png
Processing in 1009.png
Processing in 101.png
Processing in 1010.png
Processing in 1011.png
Processing in 1012.png
Processing in 1013.png
Processing in 1014.png
Processing in 1015.png
Processing in 1016.png
Processing in 1017.png
Processing in 1018.png
Processing in 1019.png
Processing in 102.png
Processing in 1020.png
Processing in 1021.png
Processing in 1022.png
Processing in 1023.png
Processing in 1024.png
Processing in 1025.png
Processing in 1026.png
Processing in 1027.png
Processing in 1028.png
Processing in 1029.png
Processing in 103.png
Processing in 1030.png
Processing in 1031.png
Processing in 1032.png
Processing in 1033.png
Processing in 1034.png
Processing in 1035.png
Processing in 1036.png


In [17]:
from ultralytics import YOLO
from sklearn.cluster import KMeans
import os
import json
import cv2
import numpy as np

In [2]:
model = YOLO(r'runs\detect\train9\weights\best.pt')

In [3]:
def iou(box1, box2):
    """
    Calculate the Intersection over Union (IoU) of two bounding boxes.
    Each box is represented as a list of [x_min, y_min, x_max, y_max].
    """
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
    
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    union_area = box1_area + box2_area - inter_area
    
    if union_area == 0:
        return 0
    
    return inter_area / union_area

def merge_bboxes(bboxes, iou_threshold=0):
    """
    Merge bounding boxes of the same class that intersect with each other.
    
    Parameters:
    - bboxes: List of tuples/lists with each entry formatted as 
              [x_min, y_min, x_max, y_max, class_id].
    - iou_threshold: IoU threshold above which boxes will be merged.
    
    Returns:
    - merged_bboxes: List of merged bounding boxes with the same format.
    """
    merged_bboxes = []
    used_indices = set()

    bboxes = list(map(lambda x: x.xyxy[0].tolist() + x.cls.tolist(), bboxes))

    for i in range(len(bboxes)):
        if i in used_indices:
            continue
            
        box1 = bboxes[i]
        merged_box = box1[:4]
        print(merged_box)
        class_id = box1[4]
        print(class_id)
        
        for j in range(i + 1, len(bboxes)):
            box2 = bboxes[j]
            
            if box2[4] == class_id and j not in used_indices and iou(merged_box, box2) > iou_threshold:
                # Expand the merged box to include box2
                merged_box = [
                    min(merged_box[0], box2[0]),  # x_min
                    min(merged_box[1], box2[1]),  # y_min
                    max(merged_box[2], box2[2]),  # x_max
                    max(merged_box[3], box2[3])   # y_max
                ]
                used_indices.add(j)
        
        merged_bboxes.append(merged_box)
        used_indices.add(i)
    
    return merged_bboxes

In [4]:
def find_binary_img_by_kmean(img):
    # Cover img to hsv
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    img_2d = img.reshape((-1,3))
    kmeans = KMeans(n_clusters=2).fit(img_2d)
    labels = kmeans.labels_
    cluster_img = labels.reshape(img.shape[:2])

    return cluster_img

In [28]:
input_path = 'data/indoor_dataset/total'
output_path = 'data/img_rotation_crop/img'
output_plot_path = 'data/img_rotation_crop/img_plot'
labels = {}

for img_path in os.listdir(input_path):
    try:
        print(f"Processing in {img_path}")
        img = cv2.imread(os.path.join(input_path, img_path))
        # Create a list random rotations include 10 rotations from -45 to 45
        rotations = np.random.randint(-45, 46, 10)
        for i,angle in enumerate(rotations):
            rotated_img = generate_rotated_image(img, angle, crop_largest_rect=True)
            result = model(rotated_img, conf = 0.5)
            result_plot = result[0].plot()
            bboxes = result[0].boxes
            bboxes = merge_bboxes(bboxes)
            for j,box in enumerate(bboxes):
                x1, y1, x2, y2 = box
                crop = img[int(y1):int(y2), int(x1):int(x2)]
                crop_binary = find_binary_img_by_kmean(crop)
                zero_count = cv2.countNonZero(crop_binary)
                one_count = cv2.countNonZero(1-crop_binary)
                rate = min(zero_count/(one_count + 0.0001), one_count/(zero_count + 0.0001))
                if rate > 0.25:
                    cv2.imwrite(f"{output_path}/{img_path[:-4]}_{i}_crop_{j}_{angle}.jpg", crop)
                    labels[f"{output_path}/{img_path[:-4]}_{i}_crop_{j}_{angle}.jpg"] = int(angle)
            # cv2.imwrite(f"{output_plot_path}/{img_path[:-4]}_{i}.jpg", result_plot)
    except Exception as e:
        print(e)
        continue

with open('data/img_rotation_crop/labels.json', 'w') as fp:
    json.dump(labels, fp)

Processing in 0001cb734adac2ee.jpg

0: 576x640 (no detections), 347.9ms
Speed: 6.0ms preprocess, 347.9ms inference, 0.0ms postprocess per image at shape (1, 3, 576, 640)

0: 480x640 (no detections), 17.0ms
Speed: 2.0ms preprocess, 17.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 512x640 3 cabinetDoors, 19.0ms
Speed: 1.9ms preprocess, 19.0ms inference, 1.0ms postprocess per image at shape (1, 3, 512, 640)
[1.0505714416503906, 130.21209716796875, 221.92176818847656, 375.23760986328125]
1.0

0: 512x640 (no detections), 18.0ms
Speed: 1.0ms preprocess, 18.0ms inference, 1.0ms postprocess per image at shape (1, 3, 512, 640)

0: 544x640 (no detections), 20.0ms
Speed: 2.6ms preprocess, 20.0ms inference, 0.0ms postprocess per image at shape (1, 3, 544, 640)

0: 608x640 2 cabinetDoors, 22.4ms
Speed: 3.0ms preprocess, 22.4ms inference, 1.0ms postprocess per image at shape (1, 3, 608, 640)
[0.1894029676914215, 61.111839294433594, 190.3397674560547, 296.80670166015625]
1.

  return fit_method(estimator, *args, **kwargs)



0: 640x640 6 cabinetDoors, 24.9ms
Speed: 3.0ms preprocess, 24.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)
[163.84024047851562, 169.24107360839844, 372.0353088378906, 326.2892761230469]
1.0
[584.77978515625, 168.4383544921875, 702.1256713867188, 265.5672607421875]
1.0

0: 640x640 6 cabinetDoors, 25.8ms
Speed: 2.0ms preprocess, 25.8ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
[738.6383666992188, 285.929443359375, 847.785400390625, 358.9586486816406]
1.0
[328.1829833984375, 431.5469665527344, 518.1707153320312, 522.5947875976562]
1.0
[723.3473510742188, 415.82464599609375, 831.6346435546875, 484.7095947265625]
1.0
[335.6756896972656, 255.4014434814453, 530.4920654296875, 350.5064697265625]
1.0
Processing in 03dad3345ad61f8a.jpg

0: 640x512 (no detections), 21.0ms
Speed: 1.0ms preprocess, 21.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 512)

0: 640x512 (no detections), 20.0ms
Speed: 1.0ms preprocess, 20.0ms inference, 0.0m

  return fit_method(estimator, *args, **kwargs)


[267.80242919921875, 13.098376274108887, 525.1218872070312, 495.0]
2.0
[489.3845520019531, 0.0, 619.908447265625, 147.57151794433594]
1.0
[521.9255981445312, 337.5914306640625, 670.8115844726562, 495.0]
1.0
[91.2070083618164, 0.0, 254.55859375, 57.127647399902344]
1.0

0: 416x640 3 cabinetDoors, 2 refrigeratorDoors, 24.0ms
Speed: 1.0ms preprocess, 24.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)
[0.18350324034690857, 36.03919219970703, 281.355224609375, 419.0]
2.0
[374.9486389160156, 0.0, 508.2826843261719, 96.45140838623047]
1.0
[456.5860595703125, 254.66024780273438, 621.444580078125, 419.0]
1.0
[0.03485994413495064, 0.0, 125.80005645751953, 100.9440689086914]
1.0

0: 384x640 4 cabinetDoors, 3 refrigeratorDoors, 15.0ms
Speed: 1.0ms preprocess, 15.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
[370.61199951171875, 67.10679626464844, 565.3726196289062, 580.7303466796875]
2.0
[599.7346801757812, 0.2945709228515625, 728.9512939453125, 210.3075

  return fit_method(estimator, *args, **kwargs)


Speed: 2.0ms preprocess, 19.0ms inference, 1.0ms postprocess per image at shape (1, 3, 544, 640)
[0.0, 89.23967742919922, 228.5980987548828, 403.0]
2.0
[0.7011328339576721, 0.0, 59.05303955078125, 54.99386215209961]
1.0

0: 544x640 2 refrigeratorDoors, 19.0ms
Speed: 2.0ms preprocess, 19.0ms inference, 1.0ms postprocess per image at shape (1, 3, 544, 640)
[0.0, 96.07511138916016, 224.27978515625, 405.0]
2.0
Processing in 06b7db73a7e952fc.jpg

0: 640x544 1 door, 10 cabinetDoors, 1 refrigeratorDoor, 20.8ms
Speed: 2.0ms preprocess, 20.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 544)
[251.3754119873047, 354.1404113769531, 293.5873107910156, 401.93756103515625]
1.0
[404.66070556640625, 386.0874938964844, 536.8753051757812, 604.2950439453125]
1.0
[101.60942077636719, 0.0, 170.76141357421875, 77.14350128173828]
1.0
[0.4632602334022522, 178.2429962158203, 115.01911926269531, 339.4347839355469]
2.0
[140.662353515625, 79.2989501953125, 250.5668182373047, 261.697021484375]
0.0


  return fit_method(estimator, *args, **kwargs)


0: 640x512 1 door, 8 cabinetDoors, 1 refrigeratorDoor, 20.0ms
Speed: 2.0ms preprocess, 20.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 512)
[11.869723320007324, 175.35719299316406, 117.53363037109375, 343.6972351074219]
2.0
[279.66156005859375, 366.5254821777344, 369.3495178222656, 492.5013427734375]
1.0
[404.4888000488281, 413.940673828125, 545.9303588867188, 637.497802734375]
1.0
[320.3981628417969, 0.0, 519.243896484375, 219.24124145507812]
1.0
[127.2323226928711, 0.0, 196.33413696289062, 86.9498062133789]
1.0
[166.5806884765625, 92.64070892333984, 262.5121154785156, 276.8515319824219]
0.0

0: 640x512 1 door, 9 cabinetDoors, 1 refrigeratorDoor, 19.2ms
Speed: 1.4ms preprocess, 19.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 512)
[300.7490539550781, 532.868408203125, 427.0036315917969, 752.2621459960938]
1.0
[146.02835083007812, 106.63563537597656, 212.48443603515625, 280.6876525878906]
2.0
[389.9693298339844, 0.1318253129720688, 469.1795959472656

  return fit_method(estimator, *args, **kwargs)


Speed: 2.0ms preprocess, 17.4ms inference, 0.0ms postprocess per image at shape (1, 3, 512, 640)

0: 448x640 (no detections), 18.0ms
Speed: 2.0ms preprocess, 18.0ms inference, 0.0ms postprocess per image at shape (1, 3, 448, 640)

0: 512x640 (no detections), 17.3ms
Speed: 1.0ms preprocess, 17.3ms inference, 0.0ms postprocess per image at shape (1, 3, 512, 640)

0: 480x640 (no detections), 17.0ms
Speed: 1.0ms preprocess, 17.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)
Processing in 0c66ca8a2c7536b0.jpg

0: 512x640 11 cabinetDoors, 2 refrigeratorDoors, 1 window, 1 table, 18.3ms
Speed: 1.0ms preprocess, 18.3ms inference, 1.0ms postprocess per image at shape (1, 3, 512, 640)
[475.3374328613281, 300.30902099609375, 646.3302001953125, 552.842041015625]
2.0
[205.36961364746094, 93.46329498291016, 321.3085632324219, 263.2148132324219]
1.0
[456.2301940917969, 171.78268432617188, 534.5697021484375, 253.29786682128906]
1.0
[29.50813865661621, 57.07183837890625, 139.4452972

  return fit_method(estimator, *args, **kwargs)


Speed: 2.0ms preprocess, 24.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 608)
[228.1811981201172, 280.86895751953125, 349.9064025878906, 419.2701110839844]
3.0
[155.745361328125, 170.4951171875, 247.3097686767578, 264.95745849609375]
3.0
[344.4989318847656, 453.20233154296875, 423.8428649902344, 530.0652465820312]
3.0
[263.6961975097656, 506.2576599121094, 313.39385986328125, 571.2366943359375]
3.0
[2.9424734115600586, 279.9597473144531, 114.38885498046875, 430.86798095703125]
3.0
[47.831050872802734, 167.3802490234375, 110.62623596191406, 244.6379852294922]
3.0
[369.5675048828125, 32.32945251464844, 467.4495849609375, 131.33555603027344]
3.0
[483.772216796875, 201.29116821289062, 523.0, 244.40618896484375]
3.0
[111.92607879638672, 65.16072082519531, 144.0645294189453, 101.07785034179688]
3.0

0: 640x512 21 windows, 1 openedDoor, 22.1ms
Speed: 3.0ms preprocess, 22.1ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 512)
[201.97393798828125, 362.3776245117

  return fit_method(estimator, *args, **kwargs)


[179.40927124023438, 10.30904483795166, 263.3258361816406, 68.50801086425781]
1.0
[398.6921691894531, 135.64572143554688, 513.86962890625, 219.56488037109375]
1.0

0: 256x640 11 cabinetDoors, 1 cabinet, 24.0ms
Speed: 1.0ms preprocess, 24.0ms inference, 3.5ms postprocess per image at shape (1, 3, 256, 640)
[182.61216735839844, 86.64471435546875, 267.53619384765625, 143.3247528076172]
1.0
[0.0, 0.0, 141.91842651367188, 74.54782104492188]
1.0
[309.8204345703125, 16.788997650146484, 404.5432434082031, 69.6677474975586]
1.0
[0.286440372467041, 0.0, 193.0062255859375, 219.2959747314453]
6.0
[422.03802490234375, 7.957542896270752, 551.8076782226562, 80.03872680664062]
1.0

0: 384x640 7 cabinetDoors, 21.5ms
Speed: 1.0ms preprocess, 21.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
[146.28173828125, 7.975407123565674, 225.614013671875, 71.73737335205078]
1.0
[264.3229675292969, 84.88142395019531, 355.9317321777344, 153.1179962158203]
1.0
[347.2892761230469, 158.61961364746

  return fit_method(estimator, *args, **kwargs)


0: 480x640 2 cabinetDoors, 2 refrigeratorDoors, 26.2ms
Speed: 1.0ms preprocess, 26.2ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)
[448.13092041015625, 16.31731605529785, 639.7941284179688, 259.2101135253906]
2.0
[6.50516414642334, 187.63589477539062, 186.42892456054688, 405.4964904785156]
1.0

0: 448x640 3 cabinetDoors, 1 refrigeratorDoor, 24.0ms
Speed: 2.0ms preprocess, 24.0ms inference, 1.0ms postprocess per image at shape (1, 3, 448, 640)
[0.0, 5.525511264801025, 108.02075958251953, 201.0262451171875]
1.0
[449.7186584472656, 142.34735107421875, 695.509765625, 497.5497131347656]
2.0
[140.5323944091797, 327.54425048828125, 309.72076416015625, 503.0]
1.0

0: 640x640 1 refrigeratorDoor, 37.1ms
Speed: 2.0ms preprocess, 37.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)
[0.0, 12.820995330810547, 151.08363342285156, 349.3572998046875]
2.0

0: 608x640 1 refrigeratorDoor, 34.9ms
Speed: 2.1ms preprocess, 34.9ms inference, 1.0ms postprocess per image 

  return fit_method(estimator, *args, **kwargs)



0: 640x576 7 cabinetDoors, 20.8ms
Speed: 3.0ms preprocess, 20.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 576)
[1.4620015621185303, 0.09234581142663956, 455.76031494140625, 295.7757263183594]
1.0

0: 640x576 8 cabinetDoors, 20.0ms
Speed: 2.0ms preprocess, 20.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 576)
[1.337900996208191, 0.14429901540279388, 462.6405944824219, 296.025146484375]
1.0

0: 640x608 7 cabinetDoors, 1 cabinet, 22.1ms
Speed: 2.0ms preprocess, 22.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 608)
[222.76600646972656, 8.496885299682617, 522.422119140625, 331.6036376953125]
1.0
[59.42818069458008, 0.0, 527.0, 478.4656677246094]
6.0

0: 640x512 6 cabinetDoors, 1 cabinet, 18.6ms
Speed: 1.0ms preprocess, 18.6ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 512)
[2.7281346321105957, 172.29248046875, 133.5909881591797, 338.2236328125]
1.0
[150.8354949951172, 680.2999267578125, 584.9681396484375, 822.61859130