# Importing Libraries

In [7]:
from utils import ArmHandler
from ultralytics import YOLO

import random
import time
import cv2
import math

arm = ArmHandler()  # Initialize the arm handler

# Start Video Capture

In [8]:
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)
cv2.namedWindow("Webcam")
model = YOLO("yolo_models/best.torchscript", task="detect")

# Set the pixel to world coordinate conversion

In [9]:
# m1, c1 = 0.6515, 150.83251  
# m2, c2 = 0.65973, -88.76217

mx1, my1, c1 = 0.6695190945, 0.0262044828, 138.6430459748 # Get these values after coordinate calibration
mx2, my2, c2 = -0.0251056056, 0.6591448809, -84.8399852824


def pixel_to_world(px, py):
    return mx1 * px + my1 * py + c1, mx2 * px + my2 * py + c2

# Live Detection

In [10]:
last_execution = None

while True:
    success, img = cap.read()
    results = model(img)

    # coordinates
    for r in results:
        boxes = r.boxes
        centeroids = []

        for box in boxes:
            confidence: float = math.ceil((box.conf[0] * 100)) / 100
            if confidence < 0.9:
                continue

            x1, y1, x2, y2 = map(int, box.xyxy[0])

            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 3)

            # Find center of the box
            center_x = int((x1 + x2) / 2)
            center_y = int((y1 + y2) / 2)

            centeroids.append((center_x, center_y))

            cv2.putText(img, str(confidence), [x1, y1], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
            cv2.circle(img, (center_x, center_y), 5, (0, 0, 255), -1)

        if centeroids and (last_execution is None or time.time() - last_execution > 15):
            last_execution = time.time()
            # px, py = sorted(centeroids, key=lambda x: x[0], reverse=True)[0] # If you want to sort from the highest (Ry)
            px, py = random.choice(centeroids)
            rx, ry = pixel_to_world(py, px)
            if arm.pickup_block_from(rx, ry):
                arm.place_block_on()
                # time.sleep(20)
    cv2.imshow('Webcam', img)
    if cv2.waitKey(1) == ord('q'):
        break

Loading yolo_models\best.torchscript for TorchScript inference...

0: 640x640 300 colorful-cubes, 728.7ms
Speed: 5.0ms preprocess, 728.7ms inference, 70.5ms postprocess per image at shape (1, 3, 640, 640)
Sleeping

0: 640x640 300 colorful-cubes, 473.2ms
Speed: 4.0ms preprocess, 473.2ms inference, 53.3ms postprocess per image at shape (1, 3, 640, 640)
Sleeping


# Release the resources

In [11]:
cap.release()
cv2.destroyAllWindows()
# arm.disconnect()