In [None]:
import cv2
import matplotlib.pyplot as plt
from doclayout_yolo import YOLOv10
from ultralytics import YOLO
import matplotlib.pyplot as plt


def display_image(image, 
                  figsize= (10,10),
                  label= "Image"):
    plt.figure(figsize=figsize)
    plt.imshow(X=image)
    plt.title(label=label)
    plt.gca().set_axis_off()
    plt.show()




#I don't know why the model needs to loaded using the YOLO library first. 
# If this line isn't present, it errors out on the next line where I try to load it using doclayout_yolo's YOLOv10 class
model = YOLO("doclayout_yolo_docstructbench_imgsz1024.pt")

# Load the pre-trained model using 
model = YOLOv10("doclayout_yolo_docstructbench_imgsz1024.pt")



# Load the original image from file
image_file_location= "C:/Users/53344/Downloads/Screenshot 2025-02-04 165449.png"
original_image = cv2.imread(filename=image_file_location)
original_image_rgb = cv2.cvtColor(src=original_image, code=cv2.COLOR_BGR2RGB)
# Perform prediction
det_res = model.predict(image_file_location,   # Image to predict
    # imgsz=1024,        # Prediction image size
    # conf=0.2,          # Confidence threshold
    device="cpu"    # Device to use (e.g., 'cuda:0' or 'cpu')
)


annotated_frame = det_res[0].plot(pil=True, line_width=2, font_size=10)
display_image(annotated_frame)

# Extract the bounding box coordinates from the detection result.
# Assuming that det_res[0].boxes.data is a tensor or array where each row is:
# [x1, y1, x2, y2, confidence, class]
boxes_array = (
    det_res[0].boxes.data.cpu().numpy()
    if hasattr(det_res[0].boxes.data, "cpu")
    else det_res[0].boxes.data.numpy()
)

# Iterate over each detected box, crop the region, and display it.

def display_each_segment(boxes_array, original_image_rgb):
    for idx, box in enumerate(boxes_array):
        x1 = int(box[0])
        y1 = int(box[1])
        x2 = int(box[2])
        y2 = int(box[3])
        confidence_score= box[4]
        class_name= det_res[0].names[box[5]]
        cropped_box = original_image_rgb[y1:y2, x1:x2]
        display_image(cropped_box, label=f"{class_name}: {confidence_score:.2%}")


annotated_image = original_image_rgb.copy()

for idx, box in enumerate(boxes_array):

    segment_class= det_res[0].names[box[5]]
    if segment_class != 'abandon':
        x1 = int(box[0])
        y1 = int(box[1])
        x2 = int(box[2])
        y2 = int(box[3])
        
        # Draw the rectangle on the image
        cv2.rectangle(img=annotated_image, pt1=(x1, y1), pt2=(x2, y2), color=(0, 255, 0), thickness=2)
        
        # Annotate the box with a number
        cv2.putText(img=annotated_image, 
                    text=str(idx + 1), 
                    org=(x1-10, y1), 
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX, 
                    fontScale=0.5, 
                    color=(255, 0, 0), 
                    thickness=1)

# Display the annotated image
display_image(image=annotated_image, label="Annotated Image with Box Numbers", figsize=(15,15))
