In [1]:
import cv2
import torch
import torchvision
import time
import numpy
from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
import torchvision.transforms as T
from PIL import Image
device = ("cuda" if torch.cuda.is_available() else "cpu")
print(f"device: {device}")
print("Loading pre-trained Faster R-CNN model...")
weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)
model.to(device=device)
model.eval()
preprocess = weights.transforms()
coco_names = weights.meta["categories"]

TARGET_WIDTH = 640
TARGET_HEIGHT = 480
url = "http://10.66.1.64:4747/video"
print("Model loaded successfully!")

  from .autonotebook import tqdm as notebook_tqdm


device: cuda
Loading pre-trained Faster R-CNN model...
Model loaded successfully!


In [2]:

print("Attempting to open webcam...")
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()
print("Webcam opened successfully!")

transform = T.ToTensor()
prev_frame_time = 0

frame_count = 0
process_every_n_frames = 3

last_boxes, last_labels, last_scores = [], [], []

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Can't receive frame. Exiting...")
        break


    frame_count += 1

    cv2.resize(frame, (TARGET_WIDTH, TARGET_HEIGHT))


    if frame_count % process_every_n_frames == 0:

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        pil_image = Image.fromarray(frame_rgb)
        img_tensor = transform(pil_image).to(device)
        batch = [img_tensor]

        with torch.no_grad():
            pred = model(batch)[0]

        last_boxes = pred["boxes"]
        last_labels = pred["labels"]
        last_scores = pred["scores"]

    boxes, labels, scores = last_boxes, last_labels, last_scores       

    threshold = 0.5
    for i in range(len(boxes)):
        score = scores[i].item()
        if score < threshold:
            continue

        x1, y1, x2, y2 = map(int, boxes[i].cpu().numpy().tolist())
        label_index = labels[i].item()
        class_name = coco_names[label_index]

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        text = f"{class_name}: {score:.2f}"
        cv2.putText(frame, text, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    new_frame_time = time.time()
    if prev_frame_time != 0:
        fps = 1 / (new_frame_time - prev_frame_time)
    else:
        fps = 0
    prev_frame_time = new_frame_time
    cv2.putText(frame, f"FPS: {int(fps)}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    # Show output
    cv2.imshow("Webcam Feed", frame)
    if cv2.waitKey(1) == ord("q"):
        print("Exiting...")
        break

cap.release()
cv2.destroyAllWindows()
print("Webcam released and windows closed.")


Attempting to open webcam...
Webcam opened successfully!
Exiting...
Webcam released and windows closed.
