In [2]:
import cv2

In [None]:
from ultralytics import YOLO
model = YOLO("yolo-Weights/yolov8n.pt")

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolo-Weights\yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 7.16MB/s]


## 1: Connect to Camera

In [1]:
import cv2

cap = cv2.VideoCapture(1)
cap.set(3, 640)
cap.set(4, 480)

while True:
    success, img= cap.read()
    cv2.imshow('Webcam', img)

    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

## 2: Load in Yolo

In [None]:
import cv2
import math
from ultralytics import YOLO
import time

cap = cv2.VideoCapture(1)
cap.set(3, 640)
cap.set(4, 480)

model = YOLO("yolo-Weights/yolov8n.pt", verbose=False)

classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"]

# Variables to control cooldown and tracking
target_class = "cell phone"  # Object class to detect
cooldown_time = 5  # Seconds to wait before triggering the action again
last_action_time = 0  # Track the last time an action was executed
threshold = 0.5

while True:
    success, img = cap.read()
    if not success:
        break

    results = model(img, stream=True, verbose=False)
    objects_detected = []  # Store positions of objects of the target class

    for r in results:
        boxes = r.boxes

        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

            confidence = float(box.conf[0])
            cls = int(box.cls[0])
            detected_class = classNames[cls]

            # Only proceed if confidence is above the threshold
            if confidence >= threshold:
                # Draw bounding box and center dot
                cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
                cv2.putText(img, f"{detected_class} {confidence:.2f}", (x1, y1 - 10), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)


    cv2.imshow('Webcam', img)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


## 3: Put Dot in Centre

In [None]:
import cv2
import math
from ultralytics import YOLO
import time

cap = cv2.VideoCapture(1)
cap.set(3, 640)
cap.set(4, 480)

model = YOLO("yolo-Weights/yolov8n.pt", verbose=False)

classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"]

# Variables to control cooldown and tracking
target_class = "cell phone"  # Object class to detect
cooldown_time = 5  # Seconds to wait before triggering the action again
last_action_time = 0  # Track the last time an action was executed
threshold = 0.5

while True:
    success, img = cap.read()
    if not success:
        break

    results = model(img, stream=True, verbose=False)
    objects_detected = []  # Store positions of objects of the target class

    for r in results:
        boxes = r.boxes

        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

            # Calculate center coordinates
            x_centre = (x1 + x2) // 2
            y_centre = (y1 + y2) // 2

            confidence = float(box.conf[0])
            cls = int(box.cls[0])
            detected_class = classNames[cls]

            # Only proceed if confidence is above the threshold
            if confidence >= threshold:
                # Draw bounding box and center dot
                cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
                cv2.circle(img, (x_centre, y_centre), radius=5, color=(0, 0, 255), thickness=-1)
                cv2.putText(img, f"{detected_class} {confidence:.2f}", (x1, y1 - 10), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)


    cv2.imshow('Webcam', img)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


## 4: Detect only Target Class
Also normalize coordinates

In [6]:
import cv2
import math
from ultralytics import YOLO
import time

cap = cv2.VideoCapture(1)
cap.set(3, 640)
cap.set(4, 480)

model = YOLO("yolo-Weights/yolov8n.pt", verbose=False)

classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"]

# Variables to control cooldown and tracking
target_class = "cell phone"  # Object class to detect
threshold = 0.5

while True:
    success, img = cap.read()
    if not success:
        break

    # Detect objects
    results = model(img, stream=True, verbose=False)

    for r in results:
        for box in r.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            confidence = box.conf[0]
            cls = int(box.cls[0])
            detected_class = classNames[cls]

            # Combined condition
            if confidence >= threshold and detected_class == target_class:
                
                x_centre = (x1 + x2) // 2
                y_centre = (y1 + y2) // 2

                img_width = img.shape[1]
                img_height = img.shape[0]
                norm_x = x_centre / img_width
                norm_y = x_centre / img_height

                # Draw bounding box
                cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 2)
                cv2.putText(img, f"{detected_class} {confidence:.2f}", (x1, y1 - 10), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
                cv2.circle(img, (x_centre, y_centre), radius=5, color=(0, 0, 255), thickness=-1)                
                coord_text = f"({norm_x:.2f}, {norm_y:.2f})"
                cv2.putText(img, coord_text, 
                            (x_centre + 10, y_centre), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

                # Print statements
                side_of_image = norm_x > 0.5  # Determine side of image
                print(f"Detected object at center: ({norm_x}, {norm_y})")
                print(f"Object is on {'right' if side_of_image else 'left'} side of the image.")

    # Display the video feed
    cv2.imshow('YOLO Detection', img)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Detected object at center: (0.3890625, 299)
Object is on left side of the image.
Detected object at center: (0.3921875, 329)
Object is on left side of the image.
Detected object at center: (0.375, 388)
Object is on left side of the image.
Detected object at center: (0.3703125, 380)
Object is on left side of the image.
Detected object at center: (0.3625, 373)
Object is on left side of the image.
Detected object at center: (0.3578125, 360)
Object is on left side of the image.
Detected object at center: (0.3578125, 360)
Object is on left side of the image.
Detected object at center: (0.3609375, 360)
Object is on left side of the image.
Detected object at center: (0.3546875, 365)
Object is on left side of the image.
Detected object at center: (0.3546875, 365)
Object is on left side of the image.
Detected object at center: (0.3484375, 380)
Object is on left side of the image.
Detected object at center: (0.35, 382)
Object is on left side of the image.
Detected object at center: (0.35625, 386

In [4]:
import cv2
import math
from ultralytics import YOLO
import time  # For cooldown timer

cap = cv2.VideoCapture(1)
cap.set(3, 640)
cap.set(4, 480)

model = YOLO("yolo-Weights/yolov8n.pt", verbose=False)

classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"]

threshold = 0.5

while True:
    success, img = cap.read()
    if not success:
        break

    results = model(img, stream=True, verbose=False)
    detected_objects = []  # Store details of all detected objects

    for r in results:
        boxes = r.boxes

        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

            # Calculate center coordinates
            x_centre = (x1 + x2) // 2
            y_centre = (y1 + y2) // 2

            confidence = float(box.conf[0])
            cls = int(box.cls[0])
            detected_class = classNames[cls]

            # Only proceed if confidence is above the threshold
            if confidence >= threshold:
                # Draw bounding box and center dot
                cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
                cv2.circle(img, (x_centre, y_centre), radius=5, color=(0, 0, 255), thickness=-1)
                cv2.putText(img, f"{detected_class} {confidence:.2f}", (x1, y1 - 10), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

                # Add details to detected objects list
                detected_objects.append((detected_class, confidence, x_centre, y_centre))

    # Print details of all detected objects
    if detected_objects:
        for idx, (detected_class, confidence, x_centre, y_centre) in enumerate(detected_objects):
            print(f"Object {idx + 1}: {detected_class} (Confidence: {confidence:.2f}) at ({x_centre}, {y_centre})")

    cv2.imshow('Webcam', img)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Object 1: vase (Confidence: 0.54) at (91, 167)
Object 1: vase (Confidence: 0.51) at (91, 167)
Object 1: vase (Confidence: 0.52) at (92, 167)
Object 1: vase (Confidence: 0.55) at (91, 166)
Object 1: vase (Confidence: 0.55) at (92, 165)
Object 1: vase (Confidence: 0.53) at (92, 167)
Object 1: vase (Confidence: 0.56) at (92, 166)
Object 1: vase (Confidence: 0.53) at (92, 167)
Object 1: vase (Confidence: 0.56) at (92, 167)
Object 1: vase (Confidence: 0.56) at (92, 165)
Object 1: vase (Confidence: 0.60) at (92, 166)
Object 1: vase (Confidence: 0.59) at (91, 167)
Object 1: vase (Confidence: 0.57) at (92, 166)
Object 1: vase (Confidence: 0.51) at (91, 167)
Object 1: vase (Confidence: 0.59) at (92, 166)
Object 1: vase (Confidence: 0.60) at (92, 165)
Object 1: vase (Confidence: 0.63) at (91, 166)
Object 1: vase (Confidence: 0.57) at (91, 167)
Object 1: vase (Confidence: 0.51) at (91, 168)
Object 1: vase (Confidence: 0.56) at (91, 168)
Object 1: vase (Confidence: 0.53) at (92, 165)
Object 1: vas

KeyboardInterrupt: 