In [1]:
from ultralytics import YOLO
import torch, torchvision
import cv2 as cv
import numpy as np

In [2]:

# Load the weights directly
model = YOLO('./train/weights/best.pt')

results = model('./gateRender1_903.jpg',device='mps')



image 1/1 /Users/michalkozicki/yolo-trained/gateRender1_903.jpg: 640x640 1 gate, 19.6ms
Speed: 22.9ms preprocess, 19.6ms inference, 998.7ms postprocess per image at shape (1, 3, 640, 640)


In [3]:
import pathlib
import time
val_src = pathlib.Path('/Users/michalkozicki/yolo-trained/drone-vision/datasets/images/val')
times = []
results = model(val_src)




image 1/186 /Users/michalkozicki/yolo-trained/drone-vision/datasets/images/val/gateRender1_1.jpg: 640x640 1 gate, 19.3ms
image 2/186 /Users/michalkozicki/yolo-trained/drone-vision/datasets/images/val/gateRender1_101.jpg: 640x640 1 gate, 11.2ms
image 3/186 /Users/michalkozicki/yolo-trained/drone-vision/datasets/images/val/gateRender1_115.jpg: 640x640 1 gate, 14.3ms
image 4/186 /Users/michalkozicki/yolo-trained/drone-vision/datasets/images/val/gateRender1_126.jpg: 640x640 1 gate, 14.4ms
image 5/186 /Users/michalkozicki/yolo-trained/drone-vision/datasets/images/val/gateRender1_132.jpg: 640x640 1 gate, 16.3ms
image 6/186 /Users/michalkozicki/yolo-trained/drone-vision/datasets/images/val/gateRender1_135.jpg: 640x640 1 gate, 20.2ms
image 7/186 /Users/michalkozicki/yolo-trained/drone-vision/datasets/images/val/gateRender1_137.jpg: 640x640 1 gate, 21.0ms
image 8/186 /Users/michalkozicki/yolo-trained/drone-vision/datasets/images/val/gateRender1_14.jpg: 640x640 1 gate, 13.3ms
image 9/186 /Users

In [4]:
reference_path = "./res/gate_ref.jpg"
ref_gray = cv.cvtColor(cv.imread(reference_path), cv.COLOR_BGR2GRAY)

In [5]:
import math
def rotationMatrixToEulerAngles(R):
    sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])

    singular = sy < 1e-6

    if not singular:
        x = math.atan2(R[2, 1], R[2, 2])
        y = math.atan2(-R[2, 0], sy)
        z = math.atan2(R[1, 0], R[0, 0])
    else:
        x = math.atan2(-R[1, 2], R[1, 1])
        y = math.atan2(-R[2, 0], sy)
        z = 0

    return np.array([x, y, z])



In [6]:

cv.imwrite('./res/gate_Front.jpg', cv.imread('./res/Gate teksture_A2RL X DCL-2.png'), [int(cv.IMWRITE_JPEG_QUALITY), 95]) 
cv.imwrite('./res/gate_Back.jpg', cv.imread('./res/Gate teksture_A2RL X DCL-2_mirror.png'), [int(cv.IMWRITE_JPEG_QUALITY), 95]) 



True

In [7]:
# Load the texture images /Users/michalkozicki/yolo-trained/res/Gate teksture_A2RL X DCL-2.png

texture_front = cv.imread('./res/gate_Front.jpg', cv.IMREAD_GRAYSCALE)

# Initialize a feature detector (e.g., SIFT)
sift = cv.SIFT_create()

# Detect keypoints and compute descriptors for the front texture
keypoints_ref_texture, descriptors_ref_texture = sift.detectAndCompute(texture_front, None)


In [8]:
#LOADING REFERENCE FROM 3D model
key_points_3d = []
with open('./res/3d_points.txt', 'r') as f:
    for line in f:
        x, y, z = map(float, line.strip().split())
        key_points_3d.append([x, y, z])

key_points_3d = np.array(key_points_3d, dtype=np.float32)

In [9]:
#program runs for all results
angles=[]
for result in results:
    path = result.path
    if result.boxes.xyxy.shape[0] == 0:
        print(f"No bounding box found for image {path}")
        continue
    #IMAGE PREPROCESSING
    x_1, y_1, x_2, y_2 = map(int, result.boxes.xyxy[:,:4][0]) #getting xyxy values for bounding box
    image_path=path
    gray_image = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2GRAY) #converting to grayscale
    if gray_image is None:
        print(f"Error: Could not read image at {image_path}")
        continue
    
    cropped_image = gray_image[y_1:y_2, x_1:x_2] #cropping to bounding box
    
    
    #ANALYSIS OF IMAGE
    sift = cv.SIFT_create() #extracting keypoints from the image being processed
    keypoints_photo, descriptors_photo = sift.detectAndCompute(cropped_image, None)
    #assuming you have texture
    bf = cv.BFMatcher()
    
    if descriptors_photo is None:
        print("No descriptors found in the reference texture.")
        continue
    if descriptors_ref_texture.dtype != np.float32:
        descriptors_ref_texture = descriptors_ref_texture.astype(np.float32)    
    matches = bf.knnMatch(descriptors_ref_texture, descriptors_photo, k=2) #using the already determined keypoints and descriptors for reference photo
    
    good_matches = []
    for m, n in matches:
        if m.distance < 0.8 * n.distance:
            good_matches.append(m)
    object_points = np.float32([key_points_3d[m.queryIdx] for m in good_matches if m.queryIdx < len(key_points_3d)])
    image_points = np.float32([keypoints_photo[m.trainIdx].pt for m in good_matches if m.queryIdx < len(key_points_3d)])
    
    
    
    camera_matrix = np.array([[279.35,0,311.76],[0,371.99,240.00],[0,0,1]],np.float64)

    dist_coeffs = np.array([[-2.89405277e-01,  1.38811576e-01,  2.75327250e-04,  1.47092084e-03,-4.17448721e-02]]  ,np.float64)
    
    #CALCULATING ROTATION
    success, rotation_vector, translation_vector = cv.solvePnP(
    object_points,
    image_points,
    camera_matrix,
    dist_coeffs,
    flags=cv.SOLVEPNP_ITERATIVE
    )
    rotation_matrix, _ = cv.Rodrigues(rotation_vector)
    #TRANSLATING TO RPY
    euler_angles = rotationMatrixToEulerAngles(rotation_matrix)
    pitch, yaw, roll = np.degrees(euler_angles)
    angles.append((result, (yaw,pitch, roll)))

No descriptors found in the reference texture.
No bounding box found for image /Users/michalkozicki/yolo-trained/drone-vision/datasets/images/val/gateRender1_23.jpg
No bounding box found for image /Users/michalkozicki/yolo-trained/drone-vision/datasets/images/val/gateRender1_444.jpg


In [10]:
i = 7
angles[i][0].show()
print(angles[i][1])

(-51.639560030623244, -122.4593814305164, 60.51167499576547)


In [23]:
width_rl = 270 #width of gate in cm
height_rl = 270 #height of gate in cm
f_x= camera_matrix[0][0]
f_y = camera_matrix[1][1]

for result in results: 
    if result.boxes.xyxy.numel() == 0:
        print("No bounding box found for image")
        continue
    _, y_1, _, y_2 = map(int, result.boxes.xyxy[:,:4][0])
    h = y_2-y_1
    dist = f_y*height_rl/h
    print(dist)
    #testComment

858.4384615384615
722.5705035971223
778.5837209302326
687.9267123287672
411.6282786885246
580.5624277456648
1014.5181818181818
913.0663636363637
727.8065217391304
577.2258620689655
687.9267123287672
502.1865
278.2196675900277
707.3049295774648
482.8716346153846
534.2409574468086
291.96889534883724
692.6710344827586
577.2258620689655
1079.9709677419355
647.9825806451613
929.975
339.3152027027027
317.8395569620253
462.84470046082953
551.8532967032967
254.27164556962026
669.582
913.0663636363637
No bounding box found for image
473.7608490566038
904.8405405405406
665.1476821192053
904.8405405405406
766.6969465648855
409.9481632653061
947.5216981132075
247.38251231527093
896.7616071428572
873.3678260869566
692.6710344827586
359.99032258064517
406.62874493927126
984.679411764706
683.2469387755102
567.4423728813559
743.98
507.2590909090909
1068.4819148936172
377.5838345864662
229.30890410958904
384.81724137931036
984.679411764706
310.9513931888545
349.9557491289199
361.28525179856115
692.6710

In [14]:
result.boxes.xyxy[:,:4][0][3]

tensor(377.3774, device='mps:0')