In [None]:
import cv2
from ultralytics import YOLO

# Load YOLOv8 model (Pre-trained)
model = YOLO("yolov8n.pt")  # YOLOv8 Nano (Fastest)

# Open webcam
cap = cv2.VideoCapture(0)  # Change to video path if using a video file


while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Perform object detection
    results = model(frame)

    # Draw bounding boxes
    for r in results:
        frame_ = r.plot()  

    # Display the frame
    cv2.imshow("YOLOv8 Real-Time Object Detection", frame_)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


In [None]:
import cv2
import torch
import torchvision
from ultralytics import YOLO
from torchvision.transforms import functional as F

# Load YOLOv8 model (Object Detection)
yolo_model = YOLO("yolov8n.pt")  # Pre-trained YOLOv8 model

# Load Mask R-CNN model (Instance Segmentation)
maskrcnn_model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
maskrcnn_model.eval()

# Open webcam
cap = cv2.VideoCapture(0)

# Define Full COCO Object List (Including More Objects)
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
    'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
    'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
    'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
    'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
    'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
    'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'TV', 'laptop', 'mouse',
    'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

# Generate Random Colors for Each Class
import random
colors = {label: (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for label in COCO_INSTANCE_CATEGORY_NAMES}

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # YOLOv8 Object Detection
    yolo_results = yolo_model(frame)
    for r in yolo_results:
        frame = r.plot()

    # Mask R-CNN Instance Segmentation
    img_tensor = F.to_tensor(frame).unsqueeze(0)
    with torch.no_grad():
        outputs = maskrcnn_model(img_tensor)

    for i in range(len(outputs[0]['boxes'])):
        score = outputs[0]['scores'][i].item()
        if score > 0.5:
            box = outputs[0]['boxes'][i].detach().numpy().astype(int)
            label_idx = outputs[0]['labels'][i].item()

            # Ensure the label index exists in our COCO list
            if label_idx < len(COCO_INSTANCE_CATEGORY_NAMES):
                label = COCO_INSTANCE_CATEGORY_NAMES[label_idx]
                color = colors[label]

                cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), color, 2)
                cv2.putText(frame, f"{label} {score:.2f}", (box[0], box[1] - 5),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Display Result
    cv2.imshow("YOLOv8 + Mask R-CNN", frame)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
