<h1 span style="color:yellow; text-align:center;">Object Detection Using YOLOv8</span>

In [1]:
from ultralytics import YOLO

# <span style="color:red">YOLOv8 has several features:</span>
### 1. *Image Classification*
### 2. *Object Detection*
### 3. *Instance Segmentation*
### 4. *Pose Estimation*

## <span style="color:red">Let's run YOLOv8 for images</span>

### Downloading and loading models

In [2]:
image_classification = YOLO("object_detection/models/yolov8m-cls.pt")
object_detection = YOLO("object_detection/models/yolov8m.pt")
instance_segmentation = YOLO("object_detection/models/yolov8m-seg.pt")
pose_estimation = YOLO("object_detection/models/yolov8m-pose.pt")

### Applying models on images

In [3]:
result1 = image_classification("object_detection/images/image1.jpg")
result2 = object_detection("object_detection/images/image2.jpg")
result3 = instance_segmentation("object_detection/images/image3.jpg")
result4 = pose_estimation("object_detection/images/image4.jpg")


image 1/1 C:\Users\niloy\Data_Science\Deep Learning\object_detection\images\image1.jpg: 224x224 minibus 0.70, trolleybus 0.18, moving_van 0.04, streetcar 0.03, minivan 0.01, 35.9ms
Speed: 5.6ms preprocess, 35.9ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

image 1/1 C:\Users\niloy\Data_Science\Deep Learning\object_detection\images\image2.jpg: 448x640 1 cat, 1 bear, 191.0ms
Speed: 0.9ms preprocess, 191.0ms inference, 1.0ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 C:\Users\niloy\Data_Science\Deep Learning\object_detection\images\image3.jpg: 480x640 1 bicycle, 1 truck, 1 dog, 261.5ms
Speed: 2.2ms preprocess, 261.5ms inference, 3.3ms postprocess per image at shape (1, 3, 480, 640)

image 1/1 C:\Users\niloy\Data_Science\Deep Learning\object_detection\images\image4.jpg: 640x480 3 persons, 209.3ms
Speed: 2.1ms preprocess, 209.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 480)


### Showing results

In [4]:
result1[0].show()
result2[0].show()
result3[0].show()
result4[0].show()

## <span style="color:red">Object Detection In Video</span>

### Let's run Object Detection in a local video

In [6]:
import cv2
from ultralytics import YOLO

# Load the YOLOv8 model
model = YOLO("object_detection/models/yolov8m.pt")  # You can use 'yolov8s.pt', 'm', 'l', or 'x'

# Open video file or webcam
cap = cv2.VideoCapture("object_detection/videos/video1.mp4")  # Use 0 for webcam, or replace with 'video.mp4'

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLOv8 inference on the frame
    results = model(frame, stream=True)  # stream=True for real-time

    # Draw results on the frame
    for r in results:
        boxes = r.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])  # Bounding box
            conf = box.conf[0]
            cls = int(box.cls[0])
            label = model.names[cls]

            # Draw bounding box and label
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"{label} {conf:.2f}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("YOLOv8 Detection", frame)
    
    # Exit on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 640x384 2 persons, 1 handbag, 172.4ms
Speed: 2.1ms preprocess, 172.4ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2 persons, 1 backpack, 1 handbag, 173.3ms
Speed: 2.4ms preprocess, 173.3ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2 persons, 1 backpack, 1 handbag, 164.4ms
Speed: 1.7ms preprocess, 164.4ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 3 persons, 1 handbag, 166.0ms
Speed: 1.3ms preprocess, 166.0ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 3 persons, 1 handbag, 165.8ms
Speed: 1.9ms preprocess, 165.8ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 3 persons, 1 handbag, 159.2ms
Speed: 1.6ms preprocess, 159.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 3 persons, 1 handbag, 161.9ms
Speed: 1.3ms preprocess, 161.9ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 3

### Let's run Object Detection in webcam

In [8]:
import cv2
from ultralytics import YOLO

# Load the YOLOv8 model
model = YOLO("object_detection/models/yolov8m.pt")  # You can use 'yolov8s.pt', 'm', 'l', or 'x'

# Open video file or webcam
cap = cv2.VideoCapture(0)  # Use 0 for webcam, or replace with 'video.mp4'

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLOv8 inference on the frame
    results = model(frame, stream=True)  # stream=True for real-time

    # Draw results on the frame
    for r in results:
        boxes = r.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])  # Bounding box
            conf = box.conf[0]
            cls = int(box.cls[0])
            label = model.names[cls]

            # Draw bounding box and label
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"{label} {conf:.2f}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("YOLOv8 Detection", frame)
    
    # Exit on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 384x640 (no detections), 174.0ms
Speed: 1.8ms preprocess, 174.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 181.8ms
Speed: 1.7ms preprocess, 181.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 175.1ms
Speed: 1.4ms preprocess, 175.1ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 171.2ms
Speed: 1.3ms preprocess, 171.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 234.6ms
Speed: 1.5ms preprocess, 234.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 vase, 174.2ms
Speed: 1.4ms preprocess, 174.2ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 vase, 173.2ms
Speed: 1.6ms preprocess, 173.2ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 vase, 175.1ms
Speed: 1.2ms preprocess, 175.1ms inference, 0

KeyboardInterrupt: 