In [1]:
import cv2
import numpy as np
import os
import torch
import torchvision.transforms as transforms
from PIL import Image
import time
import math

from corner_detector_linear import CornerDetectorLinear
from ultralytics import YOLO

In [2]:
resnet_model_ordered = CornerDetectorLinear()
resnet_model_ordered.load_state_dict(torch.load('models/corner_detector_test2.pth'))
resnet_model_ordered.eval()

resnet_model_top_to_bottom = CornerDetectorLinear()
resnet_model_top_to_bottom.load_state_dict(torch.load('models/corner_detector_v2_test1.pth'))
resnet_model_top_to_bottom.eval()

transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
            ])


In [3]:
yolo_model = YOLO('models/medium_best.pt')

In [4]:
def calculate_euclidean_error_ordered(original, predicted):
    error = 0
    for (x1, y1), (x2, y2) in zip(original, predicted):
        error += np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)
    return error / len(original)


In [5]:
def calculate_euclidean_error_closest(original, predicted):
    error = 0
    for (x1, y1) in original:
        min_dist = float('inf')
        for (x2, y2) in predicted:
            dist = np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)
            if dist < min_dist:
                min_dist = dist
        error += min_dist
    return error / len(original)


In [6]:
test_images_folder = 'test_images'
images = [f for f in os.listdir(test_images_folder) if f.endswith('.jpg')]

In [7]:
resnet_errors_ordered = []
resnet_errors_closest = []
yolo_errors = []

resnet_runtimes = []
yolo_runtimes = []

for image in images:
    image_path = os.path.join(test_images_folder, image)
    label_path = image_path.replace('.jpg', '.txt')

    original_corners = []
    with open(label_path, 'r') as f:
        for line in f:
            x_str, y_str = line.strip().split(",")
            original_corners.append((float(x_str), float(y_str)))

    resnet_input_image = Image.open(image_path).convert('RGB')
    resnet_input_image = transform(resnet_input_image).unsqueeze(0)
    with torch.no_grad():
        t1 = time.perf_counter()
        resnet_ordered_output = resnet_model_ordered(resnet_input_image)
        resnet_ordered_corners = resnet_ordered_output.squeeze().numpy().reshape(-1, 2)
        t2 = time.perf_counter()
    resnet_runtimes.append(t2 - t1)
    with torch.no_grad():
        t1 = time.perf_counter()
        resnet_top_to_bottom_output = resnet_model_top_to_bottom(resnet_input_image)
        resnet_top_to_bottom_corners = resnet_top_to_bottom_output.squeeze().numpy().reshape(-1, 2)
        t2 = time.perf_counter()
    resnet_runtimes.append(t2 - t1)
    resnet_errors_ordered.append(calculate_euclidean_error_ordered(original_corners, resnet_ordered_corners))
    resnet_errors_closest.append(calculate_euclidean_error_closest(original_corners, resnet_top_to_bottom_corners))

    yolo_input_image = cv2.imread(image_path)
    t1 = time.perf_counter()
    yolo_results = yolo_model(yolo_input_image, verbose=False)
    t2 = time.perf_counter()
    yolo_runtimes.append(t2 - t1)

    yolo_corners = []
    for result in yolo_results:
        for kpt in result.keypoints.xyn[0]:
            yolo_corners.append((float(kpt[0]), float(kpt[1])))
    yolo_errors.append(calculate_euclidean_error_closest(original_corners, yolo_corners))

print(f"ResNet Average Euclidean Error (Ordered): {np.mean(resnet_errors_ordered)}")
print(f"ResNet Average Euclidean Error (Closest): {np.mean(resnet_errors_closest)}")
print(f"YOLO Average Euclidean Error: {np.mean(yolo_errors)}")
print(f"ResNet Average Inference Time: {np.mean(resnet_runtimes) * 1000} ms")
print(f"YOLO Average Inference Time: {np.mean(yolo_runtimes) * 1000} ms")

ResNet Average Euclidean Error (Ordered): 0.5003104209899902
ResNet Average Euclidean Error (Closest): 0.01740335300564766
YOLO Average Euclidean Error: 0.0353339342221957
ResNet Average Inference Time: 36.59228732135489 ms
YOLO Average Inference Time: 29.271585410372477 ms


In [8]:

def warp_board(image, corners_xy, out_size=640, use_resnet=False):
    src = np.array(corners_xy, dtype=np.float32)

    if use_resnet:
        dst = np.array([
            [0, 0],
            [out_size - 1, 0],
            [0, out_size - 1],
            [out_size - 1, out_size - 1]
        ], dtype=np.float32)
    else:
        dst = np.array([
            [0, 0],
            [out_size - 1, 0],
            [out_size - 1, out_size - 1],
            [0, out_size - 1]
        ], dtype=np.float32)

    H = cv2.getPerspectiveTransform(src, dst)
    warped = cv2.warpPerspective(image, H, (out_size, out_size))
    return warped, H

def wide_corners(corners, expansion=40, expansion_dynamic=0.2, dynamic_expansion=False):
    corners_copy = [list(corner) for corner in corners]

    center_x = sum(c[0] for c in corners_copy) / 4
    center_y = sum(c[1] for c in corners_copy) / 4

    for corner in corners_copy:
        if dynamic_expansion:
            distance = math.hypot(corner[0] - center_x, corner[1] - center_y)
            expansion = distance * expansion_dynamic

        angle = math.atan2(corner[1] - center_y, corner[0] - center_x)
        corner[0] += int(expansion * math.cos(angle))
        corner[1] += int(expansion * math.sin(angle))

    return corners_copy

def warped_points_to_original(H, warped_points):
    H_inv = np.linalg.inv(H)
    original_points = []
    for point in warped_points:
        wp = np.array([point[0], point[1], 1]).reshape(3, 1)
        op = np.dot(H_inv, wp)
        op /= op[2, 0]
        original_points.append((int(op[0, 0]), int(op[1, 0])))
    return original_points

def calculate_average_corners(corners_list1, corners_list2):
    if len(corners_list1) != len(corners_list2):
        raise ValueError("Both corner lists must have the same number of corners.")
    corners_list1 = [tuple(c) for c in corners_list1]
    corners_list2 = [tuple(c) for c in corners_list2]
    averaged_corners = []
    for c1 in corners_list1:
        min_distance = float('inf')
        closest_c2 = None
        for c2 in corners_list2:
            distance = math.hypot(c1[0] - c2[0], c1[1] - c2[1])
            if distance < min_distance:
                min_distance = distance
                closest_c2 = c2
        if closest_c2 is not None:
            avg_x = int((c1[0] + closest_c2[0]) / 2)
            avg_y = int((c1[1] + closest_c2[1]) / 2)
            averaged_corners.append((avg_x, avg_y))
            corners_list2.remove(closest_c2)
    return averaged_corners


In [9]:
yolo_errors_on_original = []
yolo_errors_on_averaged = []
for image in images:
    image_path = os.path.join(test_images_folder, image)
    label_path = image_path.replace('.jpg', '.txt')

    original_corners = []
    with open(label_path, 'r') as f:
        for line in f:
            x_str, y_str = line.strip().split(",")
            original_corners.append((float(x_str), float(y_str)))

    yolo_input_image = cv2.imread(image_path)
    yolo_results = yolo_model(yolo_input_image, verbose=False)
    yolo_corners = []
    for result in yolo_results:
        for kpt in result.keypoints.xyn[0]:
            yolo_corners.append((float(kpt[0]), float(kpt[1])))

    yolo_corners_scaled = []
    width, height = yolo_input_image.shape[1], yolo_input_image.shape[0]
    for corner in yolo_corners:
        yolo_corners_scaled.append((corner[0]*width, corner[1]*height))

    yolo_errors_on_original.append(calculate_euclidean_error_closest(original_corners, yolo_corners))

    warped_board, H1 = warp_board(yolo_input_image, yolo_corners_scaled, use_resnet=False)
    widen_corners = wide_corners(yolo_corners_scaled, expansion_dynamic=0.15, dynamic_expansion=True)
    warped_wide_board, H2 = warp_board(yolo_input_image, widen_corners, use_resnet=False)

    warped_yolo_results = yolo_model(warped_wide_board, verbose=False)
    warped_yolo_corners = []

    width, height = warped_wide_board.shape[1], warped_wide_board.shape[0]
    for result in warped_yolo_results:
        for kpt in result.keypoints.xyn[0]:
            warped_yolo_corners.append((float(kpt[0])*width, float(kpt[1])*height))

    wide_corners_in_original = warped_points_to_original(H2, warped_yolo_corners)
    avg_corners = calculate_average_corners(yolo_corners_scaled, wide_corners_in_original)

    avg_corners_downscaled = []
    for corner in avg_corners:
        avg_corners_downscaled.append((corner[0]/width, corner[1]/height))

    yolo_errors_on_averaged.append(calculate_euclidean_error_closest(original_corners, avg_corners_downscaled))


print(f"YOLO Average Euclidean Error on Original Corners: {np.mean(yolo_errors_on_original)}")
print(f"YOLO Average Euclidean Error on Averaged Corners: {np.mean(yolo_errors_on_averaged)}")

YOLO Average Euclidean Error on Original Corners: 0.0353339342221957
YOLO Average Euclidean Error on Averaged Corners: 0.031243952537232698


In [10]:
resnet_errors_on_original = []
resnet_errors_on_averaged = []
for image in images:
    image_path = os.path.join(test_images_folder, image)
    label_path = image_path.replace('.jpg', '.txt')

    original_corners = []
    with open(label_path, 'r') as f:
        for line in f:
            x_str, y_str = line.strip().split(",")
            original_corners.append((float(x_str), float(y_str)))

    cv_image = cv2.imread(image_path)
    width, height = cv_image.shape[1], cv_image.shape[0]

    resnet_input_image = Image.open(image_path).convert('RGB')
    resnet_input_image = transform(resnet_input_image).unsqueeze(0)
    with torch.no_grad():
        resnet_top_to_bottom_output = resnet_model_top_to_bottom(resnet_input_image)
        resnet_top_to_bottom_corners = resnet_top_to_bottom_output.squeeze().numpy().reshape(-1, 2)
    resnet_errors_on_original.append(calculate_euclidean_error_closest(original_corners, resnet_top_to_bottom_corners))

    resnet_corners_pixel = []
    for corner in resnet_top_to_bottom_corners:
        resnet_corners_pixel.append((corner[0] * width, corner[1] * height))

    widen_corners_res = wide_corners(resnet_corners_pixel, expansion_dynamic=0.15, dynamic_expansion=True)
    warped_board, H_res = warp_board(cv_image, widen_corners_res, use_resnet=True)
    
    warped_pil = Image.fromarray(cv2.cvtColor(warped_board, cv2.COLOR_BGR2RGB))
    warped_input = transform(warped_pil).unsqueeze(0)

    with torch.no_grad():
        warped_output = resnet_model_top_to_bottom(warped_input)
        warped_corners_norm = warped_output.squeeze().numpy().reshape(-1, 2)

    warped_w, warped_h = warped_board.shape[1], warped_board.shape[0]
    warped_corners_pixel = []
    for corner in warped_corners_norm:
        warped_corners_pixel.append((corner[0] * warped_w, corner[1] * warped_h))

    refined_corners_in_original = warped_points_to_original(H_res, warped_corners_pixel)

    avg_corners = calculate_average_corners(resnet_corners_pixel, refined_corners_in_original)

    avg_corners_downscaled = []
    for corner in avg_corners:
        avg_corners_downscaled.append((corner[0] / width, corner[1] / height))

    resnet_errors_on_averaged.append(calculate_euclidean_error_closest(original_corners, avg_corners_downscaled))

print(f"ResNet Average Euclidean Error on Original Corners: {np.mean(resnet_errors_on_original)}")
print(f"ResNet Average Euclidean Error on Averaged Corners: {np.mean(resnet_errors_on_averaged)}")

ResNet Average Euclidean Error on Original Corners: 0.01740335300564766
ResNet Average Euclidean Error on Averaged Corners: 0.019139241199919745
