In [None]:
# Here I am loading the required packages, to run the code
import cv2
import numpy as np
from ultralytics import YOLO
from collections import defaultdict

In [None]:
# Here I specify the path to the best weights from our main model, YOLOv11
model_path = r"C:\Users\rasmn\Desktop\Speciale\YOLO\runs_YOLO11\train\weights\best.pt"

# I specify the path to the first test image, since the model performed well on the unaltered version
image_path = "test_image1.jpg"

# I specify the output path, but I need to manually change it depending on which of the three configurations I choose further down.
output_path = "whiteout_mirrored_output.jpg"
#output_path = "green_mirrored_output.jpg"
#output_path = "flipped_green_mirrored_output.jpg"

# Specify which model to use, in this case our fine-tuned YOLOv11 model.
model = YOLO(model_path)
# Load the original image
original_img = cv2.imread(image_path)

# Load the height and width of the image
h, w = original_img.shape[:2]

# Use the fine-tuned model to detect the symbols on the page at a resolution of 2000 (while maintaining the aspect ratio as descriped in the thesis)
results = model.predict(source=image_path, imgsz=2000)

# Unpacks the bounding boxes from the results, moves the data from GPU to CPU and converts it to a numpy array
boxes = results[0].boxes.xyxy.cpu().numpy()

# Create an empty black mask of the same dimensions as the image
mask = np.zeros((h, w), dtype=np.uint8)

# For each bounding box, set the corresponding region in the mask to white so that we can later extract the symbols
for box in boxes:
    x1, y1, x2, y2 = map(int, box)
    mask[y1:y2, x1:x2] = 255

### FOR THE WHITE BACKGROUND REPLACE bg_color AND colored_bg WITH THIS LINE. CHANGE OUTPUT PATH AND BACKGROUND AS WELL ###

#white_bg = 255 * np.ones_like(original_img)

### FOR THE GREEN BACKGROUND REPLACE white_bg WITH THESE TWO LINES. CHANGE OUTPUT PATH AND BACKGROUND AS WELL ###

# Define the RGB color for the background, here I used an online tool to tell me the RBG value of the background in the original image
bg_color = [99, 123, 113]

# We can then use this to create a full background image filled with the chosen color
colored_bg = np.full_like(original_img, bg_color)

# Extract only the parts of the original image that fall inside the mask (symbols)
foreground = cv2.bitwise_and(original_img, original_img, mask=mask)

# We can then combine the chosen background color with the inverted mask to fill the non-symbol areas, either with white or green
background = cv2.bitwise_and(colored_bg, colored_bg, mask=255 - mask)
#background = cv2.bitwise_and(white_bg, white_bg, mask= 255- mask)

# We can then combine the foreground (which should be our labels) with the new background in a new image
new_img = cv2.add(foreground, background)

# The cv2.flip function is used to mirror the image to differentiate it further 
mirrored_img = cv2.flip(new_img, 1)

# Optional addition of a vertical flip to further test generalization
flipped_img = cv2.flip(mirrored_img, 0)

# Finally we save the new image, based on the settings we chose, with to the specified name/folder
#cv2.imwrite(output_path, mirrored_img)
cv2.imwrite(output_path, flipped_img)
print(f"Saved the new image to: {output_path}")

# The following sections do exactly the same thing, but they are simply set up to run each of the image variations. As such only one of these will be commented, but the comments are relevant for all three. Any code repeated from the previous section will also not be commented.

In [None]:
# Loading the packages and code required to run the following code brackets without having to run the above code again
from ultralytics import YOLO
from collections import defaultdict

model = YOLO("runs_YOLO11/train/weights/best.pt")

In [None]:
results_new = model.predict(source="whiteout_mirrored_output.jpg", imgsz=2000)

# Shows the image with all the bounding boxes our model was able to detect, along with their associated confidence score/label.
results_new[0].show()

# Create an empty dictionary of where every key has an empty list as default value, since we want to sum all confidence scores across each class.
class_confidences = defaultdict(list)

# Loop through each of the bounding boxes in the results variable 
for box in results_new[0].boxes:

    # Save the class id associated with the bbox (as an integer, since it is 0 or 1)
    cls_id = int(box.cls[0])
    # Save the confidence associated with the bbox (as a float, since it is given as e.g. 0.81)
    conf = float(box.conf[0])
    # For every bounding box, append the associated confidence score. We end up with two keys, and each has an associated list of all the confidence scores for that class in the image.
    class_confidences[cls_id].append(conf)

# In the dictionary loop through each key (class) one at a time:
for cls_id, confs in class_confidences.items():
    # Access the list in the current key and sum its values, which is divided by the total number of confidence scores in the list providing an average score.
    avg_conf = sum(confs) / len(confs)
    # Print the average confidence score for each class/key rounding to 3 decimals 
    print(f"Class: {cls_id}, Average Confidence: {avg_conf:.3f}")

In [None]:
results_new = model.predict(source="green_mirrored_output.jpg", imgsz=2000)
results_new[0].show()

class_confidences = defaultdict(list)

for box in results_new[0].boxes:
    cls_id = int(box.cls[0])
    conf = float(box.conf[0])
    class_confidences[cls_id].append(conf)

for cls_id, confs in class_confidences.items():
    avg_conf = sum(confs) / len(confs)
    print(f"Class: {cls_id}, Average Confidence: {avg_conf:.3f}")

In [None]:
results_new = model.predict(source="flipped_green_mirrored_output.jpg", imgsz=2000)
results_new[0].show()

class_confidences = defaultdict(list)

for box in results_new[0].boxes:
    cls_id = int(box.cls[0])
    conf = float(box.conf[0])
    class_confidences[cls_id].append(conf)

for cls_id, confs in class_confidences.items():
    avg_conf = sum(confs) / len(confs)
    print(f"Class: {cls_id}, Average Confidence: {avg_conf:.3f}")