# Прокопчук Роман, ШІ-2

Варіант 11
- Задача 1: Зображення для знаходження відстані: KR2/ picture / . Зображення з відомою
відстанню picture_1.jpg. Знаходження кутів: LOG, Знаходження ключових точок: SURF
- Задача 2: Натренувати класифікатор на класах: `plate`, `elephant`, `mouse`

# Задача 1

#### P.S.: Задачі були окремо розділені по файлах через недоступність алгоритму SURF у сучасних версіях OpenCV

In [3]:
import os

PATH_TO_PICTURE = os.path.join(".", "task_1_images", "picture")

In [4]:
import cv2

GROUND_TRUTH_IMAGE_NAME = "picture_1.jpg"

gt_image = cv2.imread(os.path.join(PATH_TO_PICTURE, GROUND_TRUTH_IMAGE_NAME), cv2.IMREAD_GRAYSCALE)

images_by_name = {
    "picture_2.jpg": None,
    "picture_3.jpg": None,
    "picture_4.jpg": None,
    "picture_5.jpg": None
}

for image_name in images_by_name.keys():
    images_by_name[image_name] = cv2.imread(os.path.join(PATH_TO_PICTURE, image_name), cv2.IMREAD_GRAYSCALE)

In [5]:
PATH_METADATA = os.path.join(PATH_TO_PICTURE, "metadata.txt")

gt_values = {}
object_size = (-1, -1, -1)
for line in open(PATH_METADATA, "r"):
    if line.strip() == "":
        continue

    if line.startswith("picture"):
        image_name, value = line.split(" - ")
        gt_values[image_name] = float(value)
    elif line.startswith("size"):
        measurements = line.split(" ")[1]
        w_str, l_str, h_str = measurements.split("х") # Cyrillic "x" is used in file
        object_size = (float(w_str), float(l_str), float(h_str))

print("Ground truth values:", gt_values)
print("Object size:", object_size)

Ground truth values: {'picture_1.jpg': 130.0, 'picture_2.jpg': 94.0, 'picture_3.jpg': 187.0, 'picture_4.jpg': 228.0, 'picture_5.jpg': 114.0}
Object size: (39.5, 49.5, 1.7)


In [108]:
import cv2
import numpy as np

def estimate_distance(
    gt_image,
    target_image,
    gt_distance,
    hessian_threshold=500,
    lowe_ratio=0.75,
    max_pairs_per_point=10,
    min_distance_threshold=10
):
    surf = cv2.xfeatures2d.SURF_create(hessian_threshold)
    
    gt_keypoints, gt_descriptors = surf.detectAndCompute(gt_image, mask=None)
    target_keypoints, target_descriptors = surf.detectAndCompute(target_image, mask=None)

    matcher = cv2.BFMatcher(cv2.NORM_L2, crossCheck=False)
    
    gt_descriptors = gt_descriptors.astype(np.float32)
    target_descriptors = target_descriptors.astype(np.float32)
    knn_matches = matcher.knnMatch(gt_descriptors, target_descriptors, k=2)
    
    good_matches = []
    for match_pair in knn_matches:
        best_match, second_match = match_pair
        if best_match.distance < lowe_ratio * second_match.distance:
            good_matches.append(best_match)
        
    gt_points = np.float32([gt_keypoints[m.queryIdx].pt for m in good_matches])
    target_points = np.float32([target_keypoints[m.trainIdx].pt for m in good_matches])
    
    gt_distances = []
    target_distances = []
    
    for i in range(len(gt_points)):
        for j in range(i + 1, min(i + max_pairs_per_point, len(gt_points))):
            distance_on_reference = np.linalg.norm(gt_points[i] - gt_points[j])
            distance_on_target = np.linalg.norm(target_points[i] - target_points[j])
            
            if distance_on_reference > min_distance_threshold and distance_on_target > min_distance_threshold:
                gt_distances.append(distance_on_reference)
                target_distances.append(distance_on_target)
    
    gt_distances = np.array(gt_distances)
    target_distances = np.array(target_distances)
    
    object_width, object_length, _ = object_size

    object_real_diagonal_cm = np.sqrt(object_width**2 + object_length**2)

    mean_gt_distance_pixels = np.mean(gt_distances)
    mean_target_distance_pixels = np.mean(target_distances)

    pixels_per_cm_reference = mean_gt_distance_pixels / object_real_diagonal_cm
    pixels_per_cm_target = mean_target_distance_pixels / object_real_diagonal_cm
    
    scale_ratio = pixels_per_cm_reference / pixels_per_cm_target

    estimated_distance = gt_distance * scale_ratio
    
    return estimated_distance

In [119]:
sum_error = 0.0

for image_name in images_by_name.keys():
    estimated_distance = estimate_distance(
        gt_image=gt_image,
        target_image=images_by_name[image_name],
        gt_distance=gt_values[GROUND_TRUTH_IMAGE_NAME],
        hessian_threshold=500,
        lowe_ratio=0.8,
        max_pairs_per_point=10,
        min_distance_threshold=15
    )
    
    distance_error = np.abs(estimated_distance - gt_values[image_name])
    sum_error += distance_error
    
    print(f"Estimated for image {image_name}: {estimated_distance:.3f}. Difference: {distance_error:.3f}")

avg_sum_error = sum_error / len(images_by_name.keys())

print(f"Average distance error: {avg_sum_error:.3f}")

Estimated for image picture_2.jpg: 88.649. Difference: 5.351
Estimated for image picture_3.jpg: 192.607. Difference: 5.607
Estimated for image picture_4.jpg: 187.180. Difference: 40.820
Estimated for image picture_5.jpg: 111.646. Difference: 2.354
Average distance error: 13.533
