In [1]:
def decode_chars(chars):
    """
    Decodes the chars list into a string by sorting based on y-coordinates (lines) first
    and then x-coordinates within each line.

    Args:
        chars (list): A list of characters with their x and y positions.
                      Each character is represented as a tuple (char, x, y).

    Returns:
        str: The decoded string with line breaks as per the image layout.
    """
    line_threshold = 5  
    spacing_threshold = 48*float(entry["scale"])
    # print("Tresh:", spacing_threshold)
    
    chars = sorted(chars, key=lambda c: c[2])  # Sort by y-coordinate initially

    lines = []
    current_line = []
    for i, char in enumerate(chars):
        if i == 0 or abs(char[2] - chars[i - 1][2]) <= line_threshold:
            current_line.append(char)
        else:
            lines.append(current_line)
            current_line = [char]
            
    lines.append(current_line)

    # Sort each line by x-coordinate 
    decoded_lines = []
    for line in lines:
        sorted_line = sorted(line, key=lambda c: c[1])
        line_str = ""
        for j in range(len(sorted_line)):
            if j > 0 and (sorted_line[j][1] - sorted_line[j - 1][1]) > spacing_threshold:
                #print(sorted_line[j][1] - sorted_line[j - 1][1])
                line_str += " "  # Add tab space if characters are not too close
            line_str += str(sorted_line[j][0])
        decoded_lines.append(line_str)

    return '\n'.join(decoded_lines)

In [None]:
import cv2
import matplotlib.pyplot as plt
from ultralytics import YOLO
import numpy as np
from NewPal import getCodeWidth
import json

# Paths
MODEL_PATH = r"/mnt/DADES/home/jgarcia/CODE/2) DIGIT LOCATION AND RECOGNITION/runs/detect/train12/weights/best.pt"
model = YOLO(MODEL_PATH)

# DIRECTORIES

RealImagesDir = "/mnt/DADES/home/jgarcia/CODE/6) STYLE TRANSFER/Generated Data SDXL Final Model 2"
RealImagesJsonDir = "/mnt/DADES/home/jgarcia/CODE/6) STYLE TRANSFER/Generated Data SDXL Final Model 2/generated_data_valid.json"


try:
    with open(RealImagesJsonDir, "r") as f: 
        Annotations = json.load(f)
        i = len(Annotations)
        print("JSON file loaded successfully.")
        print(f"Number of entries: {i}")

except FileNotFoundError:
    print("JSON file not found.")

import random
random_entries = random.sample(list(Annotations.items()), 50)

JSON file loaded successfully.
Number of entries: 3000


In [None]:
import random
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont

counter = {"Valid Case": 0, "Missplaced Case": 0, "Incorrect Case": 0, "SUrface Deffect Case": 0}

for entry in random_entries:

    entry = random_entries[0][1]
    # print(entry)
    
    file = entry["file_name"]
    drawing_region = entry['reverse']
    original_scale = entry['scale']
    original_code = entry["chars"]
    original_code = decode_chars(original_code).replace(" ", "").replace("\n", "")
    # print("Original code:", original_code)

    image_path = os.path.join(RealImagesDir, *file.split('\\'))

    # Load and convert the image
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Run model prediction
    results = model.predict(source=image_rgb, conf=0.5, imgsz=600)
    class_names = model.names

    annotated_image = image_rgb.copy()
    detected_text = []

    heights = []
    widths = []

    # Convert image to PIL for text drawing
    image_pil = Image.fromarray(annotated_image)
    draw = ImageDraw.Draw(image_pil)
    font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"  # Make sure this font exists!
    font = ImageFont.truetype(font_path, 20)

    region = None  # Fix: Initialize region before using it!

    for result in results:
        for box in result.boxes:
            cls = int(box.cls.item())
            if class_names[cls] == "Marking_Region":
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                region = [(x1, y1), (x2, y2)]
                draw.rectangle([x1, y1, x2, y2], outline=(0, 255, 0), width=2)
                draw.text((x1, y1 - 22), "Marking Region", font=font, fill=(0, 255, 0))
                break  # Optional: Stop after finding one marking region

    scores = []

    if region:  # Only proceed if region was found
        for result in results:
            for box in result.boxes:
                cls = int(box.cls.item())

                score = box.conf.item()
                scores.append(score)

                class_name = class_names[cls]

                if class_name == "Marking_Region":
                    continue

                x1, y1, x2, y2 = map(int, box.xyxy[0])
                text_x, text_y = x1, max(y1 - 5, 10)

                width = x2 - x1
                height = y2 - y1
                heights.append(height)
                widths.append(width)

                draw.text((text_x, text_y - 22), class_name, font=font, fill=(255, 0, 0))

                char_width = getCodeWidth(class_name)
                draw.rectangle([x1, y1 - 2, x2 + char_width - 8, y2 + 2], outline=(255, 0, 0), width=2)

                detected_text.append([class_name, text_x, text_y])

        annotated_image = np.array(image_pil)
        detected_text.sort(key=lambda item: (item[2], item[1]))

        avg_height = np.mean(heights)
        avg_width = np.mean(widths)

        line_threshold = avg_height
        spacing_threshold = 50 / 32 * avg_width

        lines, current_line = [], []

        for i, char in enumerate(detected_text):
            if i == 0 or abs(char[2] - detected_text[i - 1][2]) <= line_threshold:
                current_line.append(char)
            else:
                lines.append(current_line)
                current_line = [char]
        if current_line:
            lines.append(current_line)

        decoded_lines = []
        for line in lines:
            sorted_line = sorted(line, key=lambda c: c[1])
            line_str = "".join(
                (f" {c[0]}" if j > 0 and (c[1] - sorted_line[j - 1][1]) > spacing_threshold else c[0])
                for j, c in enumerate(sorted_line)
            )
            decoded_lines.append(line_str)

        predicted_code = "\n".join(decoded_lines).replace(" ", "").replace("\n", "")

        print("Predicted code:", predicted_code)

        # Fix: Check if characters are within the region for the entire code
        all_inside = all(
            region[0][0] <= char[1] <= region[1][0] and
            region[0][1] <= char[2] <= region[1][1]
            for char in detected_text
        )

        if predicted_code == original_code:
            if all_inside:
                counter["Valid Case"] += 1
            else:
                counter["Missplaced Case"] += 1
        if predicted_code != original_code:
            if scores.mean() > 0.8:
                counter["Incorrect Case"] += 1
            else:
                counter["SUrface Deffect Case"] += 1
                
print("Final count:", counter) # Should be valid

Final count: {'Valid Case': 50, 'Missplaced Case': 0, 'Incorrect Case': 0, 'SUrface Deffect Case': 0}
