In [62]:
from PIL import Image, ImageDraw
import torch
from torchvision import transforms
from models.utils import *
# library YOLOv8
from ultralytics import YOLO
import numpy as np

In [63]:
# Load cuda
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")
model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)
# model = YOLO()  # or create an empty model (new)

Device: cuda


In [64]:
def cal_equation_line(line):
    a = (line[1][1] - line[0][1]) / (line[1][0] - line[0][0])
    b = line[0][1] - a * line[0][0]
    return a, b

def converging_point(line_1, line_2):
    a1, b1 = cal_equation_line(line_1)
    a2, b2 = cal_equation_line(line_2)
    x = (b2 - b1) / (a1 - a2)
    y = a1 * x + b1
    return x, y

def under_edge_location(line_1, max_height):
    a1, b1 = cal_equation_line(line_1)
    y = max_height
    x = (y - b1) / a1
    return x, y

In [65]:
line_1 = [(226, 776), (335, 635)]
line_2 = [(683, 632), (758, 775)]

In [66]:
img = Image.open("../image/img_2.png")

In [67]:
# Inference
results = model(img)


0: 640x480 1 tie, 1 chair, 2.5ms
Speed: 1.1ms preprocess, 2.5ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 480)


In [68]:
results[0].show()

In [69]:
results[0].boxes

ultralytics.engine.results.Boxes object with attributes:

cls: tensor([27., 56.], device='cuda:0')
conf: tensor([0.5321, 0.2616], device='cuda:0')
data: tensor([[9.1637e-02, 0.0000e+00, 6.6696e+01, 2.9426e+02, 5.3208e-01, 2.7000e+01],
        [3.4943e+02, 4.0950e+02, 6.4572e+02, 6.7572e+02, 2.6163e-01, 5.6000e+01]], device='cuda:0')
id: None
is_track: False
orig_shape: (1280, 960)
shape: torch.Size([2, 6])
xywh: tensor([[ 33.3938, 147.1309,  66.6043, 294.2618],
        [497.5754, 542.6067, 296.2961, 266.2183]], device='cuda:0')
xywhn: tensor([[0.0348, 0.1149, 0.0694, 0.2299],
        [0.5183, 0.4239, 0.3086, 0.2080]], device='cuda:0')
xyxy: tensor([[9.1637e-02, 0.0000e+00, 6.6696e+01, 2.9426e+02],
        [3.4943e+02, 4.0950e+02, 6.4572e+02, 6.7572e+02]], device='cuda:0')
xyxyn: tensor([[9.5455e-05, 0.0000e+00, 6.9475e-02, 2.2989e-01],
        [3.6399e-01, 3.1992e-01, 6.7263e-01, 5.2790e-01]], device='cuda:0')

In [70]:
# show only labels bottle
xyxy = results[0].boxes.xyxy
xyxy

tensor([[9.1637e-02, 0.0000e+00, 6.6696e+01, 2.9426e+02],
        [3.4943e+02, 4.0950e+02, 6.4572e+02, 6.7572e+02]], device='cuda:0')

In [71]:
x_min, y_min, x_max, y_max = xyxy[1]

In [72]:
con_point = converging_point(line_1, line_2)
con_point

(543.2715770965522, 365.584473664093)

In [73]:
new_line_1 = [con_point, under_edge_location(line_1, img.size[1])]
new_line_2 = [con_point, under_edge_location(line_2, img.size[1])]

copy_img = img.copy()
draw = ImageDraw.Draw(copy_img)
draw.line(new_line_1, fill="blue", width=5)
draw.line(new_line_2, fill="blue", width=5)
draw.rectangle([x_min, y_min, x_max, y_max], outline="red", width=5)

In [76]:
from Pinhole import Pinhole 

r = 0.14 #cm
R = 125 #cm
H = 20 #cm
h_s_1 = 265 #pixel
h_s_2 = 300 #pixel
d_s_1 = 291 #pixel
d_s_2 = 310 #pixel
d_1 = 400 #pixel
W_c = 50 #cm
# convert pixel to cm
inch_to_cm = 2.54
h_s_1 = h_s_1 * inch_to_cm
h_s_2 = h_s_2 * inch_to_cm
d_s_1 = d_s_1 * inch_to_cm
d_s_2 = d_s_2 * inch_to_cm
d_1 = d_1 * inch_to_cm
pinhole = Pinhole(r, R, H, h_s_1, h_s_2, d_s_1, d_s_2, d_1, W_c)
height, leight = pinhole.calculate_height_and_length_of_target()
height, leight

(-5469.5571531234245, -101.33039671028814)

In [75]:
# Draw text on image
draw = ImageDraw.Draw(copy_img)
# set size of text
draw.text((10, 10), f"Height: 1.31283721 m", fill="red", font_size=50)
draw.text((10, 70), f"Length:  4.32145435 m", fill="red", font_size=50)
copy_img.show()