# Environment Setup
Install necessary packages (ultralytics, opencv-python, numpy) and import required libraries for working with YOLO models.

In [None]:
# Install required packages
!pip install ultralytics opencv-python numpy

# Import necessary libraries
import cv2
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO
import os

# For displaying images in the notebook
%matplotlib inline

# Working with Pre-trained Models
Load pre-trained YOLOv8 models of various sizes (nano, small, medium, large, and xlarge) using the Ultralytics library and explore their capabilities.

In [None]:
# Load a pre-trained YOLOv8 model
yolov8_nano = YOLO('yolov8n.pt')  # Nano model (smallest)

# Alternative models:
yolov8_small = YOLO('yolov8s.pt')  # Small model
yolov8_medium = YOLO('yolov8m.pt')  # Medium model
yolov8_large = YOLO('yolov8l.pt')  # Large model
yolov8_xlarge = YOLO('yolov8x.pt')  # Extra large model

# Print model information
print("Available YOLOv8 Models:")
print(f"Nano Model: {yolov8_nano}")
print(f"Small Model: {yolov8_small}")
print(f"Medium Model: {yolov8_medium}")
print(f"Large Model: {yolov8_large}")
print(f"Extra Large Model: {yolov8_xlarge}")

# Basic Object Detection
Perform object detection on sample images using the pre-trained YOLO model, visualize results with bounding boxes, and extract detailed information about detected objects.

In [None]:
# Download a sample image if needed
!wget -q https://ultralytics.com/images/bus.jpg -O sample_image.jpg

# Perform object detection
results = yolov8_nano('sample_image.jpg')

# Display results
results_image = results[0].plot()
plt.figure(figsize=(12, 8))
plt.imshow(cv2.cvtColor(results_image, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.title('YOLOv8 Detection Results')
plt.show()

# Extract results data
boxes = results[0].boxes

# Print detection information
print(f"Number of detections: {len(boxes)}")
print("\nDetection details:")
for i, box in enumerate(boxes):
    class_id = int(box.cls.item())
    class_name = results[0].names[class_id]
    confidence = box.conf.item()
    x1, y1, x2, y2 = box.xyxy[0].tolist()
    
    print(f"Detection {i+1}: Class: {class_name}, Confidence: {confidence:.2f}, Box: [{int(x1)}, {int(y1)}, {int(x2)}, {int(y2)}]")

# Advanced Concepts: IoU and NMS
Implement Intersection over Union (IoU) calculation and Non-Maximum Suppression (NMS) algorithms to understand how YOLO handles overlapping detections and improves accuracy.

In [None]:
# Intersection over Union (IoU) Calculation
def calculate_iou(box1, box2):
    """
    Calculate IoU between two boxes.
    Each box format: [x1, y1, x2, y2]
    """
    # Get intersection coordinates
    x1_inter = max(box1[0], box2[0])
    y1_inter = max(box1[1], box2[1])
    x2_inter = min(box1[2], box2[2])
    y2_inter = min(box1[3], box2[3])
    
    # Calculate intersection area
    if x2_inter < x1_inter or y2_inter < y1_inter:
        return 0.0  # No intersection
    
    inter_area = (x2_inter - x1_inter) * (y2_inter - y1_inter)
    
    # Calculate individual box areas
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    # Calculate IoU
    union_area = box1_area + box2_area - inter_area
    return inter_area / union_area

# Example usage
box1 = [100, 100, 200, 200]
box2 = [150, 150, 250, 250]
print(f"IoU between box1 and box2: {calculate_iou(box1, box2):.4f}")

# Non-Maximum Suppression (NMS)
def non_max_suppression(boxes, scores, iou_threshold):
    """
    Apply Non-Maximum Suppression.
    
    Args:
        boxes: List of bounding boxes [x1, y1, x2, y2]
        scores: List of confidence scores
        iou_threshold: IoU threshold for considering boxes as duplicates
        
    Returns:
        List of indices of boxes to keep
    """
    # If no boxes, return empty list
    if len(boxes) == 0:
        return []
    
    # Convert to numpy arrays if they aren't already
    if not isinstance(boxes, np.ndarray):
        boxes = np.array(boxes)
    if not isinstance(scores, np.ndarray):
        scores = np.array(scores)
    
    # Initialize list of picked indices
    picked_indices = []
    
    # Sort score indices in descending order
    idxs = np.argsort(scores)[::-1]
    
    # Loop while we still have indices to process
    while len(idxs) > 0:
        # Grab the current index with highest score
        curr_idx = idxs[0]
        picked_indices.append(curr_idx)
        
        # Find the IoUs for all boxes with the current box
        ious = [calculate_iou(boxes[curr_idx], boxes[i]) for i in idxs[1:]]
        
        # Remove indices with IoU > threshold
        idxs = [idxs[i+1] for i, iou in enumerate(ious) if iou <= iou_threshold]
    
    return picked_indices

# Example usage
example_boxes = [
    [100, 100, 200, 200],
    [110, 110, 210, 210],
    [150, 150, 250, 250]
]
example_scores = [0.9, 0.8, 0.7]
iou_threshold = 0.5

kept_indices = non_max_suppression(example_boxes, example_scores, iou_threshold)
print("Kept indices after NMS:", kept_indices)

# Processing Video with YOLO
Create functions to process video files frame by frame using YOLO for object detection, with options to save the output video containing detection annotations.

In [None]:
# Function to process a video with YOLO
def process_video(video_path, model, output_path, confidence_threshold=0.25, iou_threshold=0.5):
    """
    Process a video file frame by frame using YOLO for object detection.
    
    Args:
        video_path (str): Path to the input video file.
        model (YOLO): Pre-trained YOLO model.
        output_path (str): Path to save the output video with detections.
        confidence_threshold (float): Minimum confidence score for detections.
        iou_threshold (float): IoU threshold for Non-Maximum Suppression.
    """
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    
    # Get video properties
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Create VideoWriter object to save the output video
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    frame_idx = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Perform object detection on the frame
        results = model(frame, conf=confidence_threshold)
        
        # Extract bounding boxes, scores, and class IDs
        boxes = results[0].boxes.xyxy.cpu().numpy()
        scores = results[0].boxes.conf.cpu().numpy()
        class_ids = results[0].boxes.cls.cpu().numpy()
        
        # Apply Non-Maximum Suppression
        kept_indices = non_max_suppression(boxes, scores, iou_threshold)
        kept_boxes = [boxes[i] for i in kept_indices]
        kept_scores = [scores[i] for i in kept_indices]
        kept_class_ids = [class_ids[i] for i in kept_indices]
        
        # Draw detections on the frame
        for i, box in enumerate(kept_boxes):
            x1, y1, x2, y2 = map(int, box)
            conf = kept_scores[i]
            class_id = int(kept_class_ids[i])
            label = f"{results[0].names[class_id]}: {conf:.2f}"
            
            # Draw bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            # Draw label
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        # Write the processed frame to the output video
        out.write(frame)
        
        frame_idx += 1
        if frame_idx % 100 == 0:
            print(f"Processed {frame_idx}/{total_frames} frames")
    
    # Release resources
    cap.release()
    out.release()
    print(f"Video processing complete. Output saved to {output_path}")

# Example usage
input_video_path = "input_video.mp4"  # Replace with the path to your input video
output_video_path = "output_video.mp4"  # Replace with the desired output path
process_video(input_video_path, yolov8_nano, output_video_path)

# Real-time Object Detection System
Develop a complete real-time object detection system using webcam input, with threading for smooth performance and visualization of detections.

In [None]:
import cv2
import time
from threading import Thread

class RealTimeObjectDetection:
    def __init__(self, model, confidence_threshold=0.5, iou_threshold=0.5):
        """
        Initialize the real-time object detection system.
        Args:
            model: YOLO model object.
            confidence_threshold (float): Minimum confidence score for detections.
            iou_threshold (float): IoU threshold for Non-Max Suppression.
        """
        self.model = model
        self.confidence_threshold = confidence_threshold
        self.iou_threshold = iou_threshold
        self.running = False

    def process_frame(self, frame):
        """
        Process a single frame with YOLO and draw detections.
        Args:
            frame: Input video frame.
        Returns:
            Processed frame with detections drawn.
        """
        results = self.model(frame, conf=self.confidence_threshold)
        boxes = results[0].boxes.xyxy.cpu().numpy()
        scores = results[0].boxes.conf.cpu().numpy()
        class_ids = results[0].boxes.cls.cpu().numpy()

        # Apply Non-Maximum Suppression
        kept_indices = non_max_suppression(boxes, scores, self.iou_threshold)
        kept_boxes = [boxes[i] for i in kept_indices]
        kept_scores = [scores[i] for i in kept_indices]
        kept_class_ids = [class_ids[i] for i in kept_indices]

        # Draw detections on the frame
        for i, box in enumerate(kept_boxes):
            x1, y1, x2, y2 = map(int, box)
            conf = kept_scores[i]
            class_id = int(kept_class_ids[i])
            label = f"{results[0].names[class_id]}: {conf:.2f}"

            # Draw bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            # Draw label
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        return frame

    def start_detection(self, video_source=0):
        """
        Start real-time object detection using a webcam or video file.
        Args:
            video_source: Video source (default is webcam, use file path for video).
        """
        self.running = True
        cap = cv2.VideoCapture(video_source)

        while self.running and cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            # Process the frame
            processed_frame = self.process_frame(frame)

            # Display the frame
            cv2.imshow("Real-Time Object Detection", processed_frame)

            # Exit on 'q' key press
            if cv2.waitKey(1) & 0xFF == ord('q'):
                self.running = False

        cap.release()
        cv2.destroyAllWindows()

    def stop_detection(self):
        """
        Stop the real-time object detection.
        """
        self.running = False

# Example usage
real_time_detector = RealTimeObjectDetection(yolov8_nano, confidence_threshold=0.6, iou_threshold=0.4)

# To use the detector, run these commands in separate cells:
# 1. Start detection: detection_thread = Thread(target=real_time_detector.start_detection, args=(0,))
# 2. Start the thread: detection_thread.start()
# 3. Stop detection after use: real_time_detector.stop_detection()

# Object Counting and Alerting
Extend the real-time detection system to count detected objects by class and implement an alerting mechanism for specific object classes of interest.

In [None]:
class RealTimeObjectDetectionWithAlerts(RealTimeObjectDetection):
    def __init__(self, model, confidence_threshold=0.5, iou_threshold=0.5, alert_classes=None):
        """
        Extend the real-time object detection system to include object counting and alerting.
        Args:
            model: YOLO model object.
            confidence_threshold (float): Minimum confidence score for detections.
            iou_threshold (float): IoU threshold for Non-Max Suppression.
            alert_classes (list): List of class IDs to trigger alerts for.
        """
        super().__init__(model, confidence_threshold, iou_threshold)
        self.alert_classes = alert_classes if alert_classes else []

    def process_frame_with_alerts(self, frame):
        """
        Process a single frame with YOLO, count objects, and trigger alerts.
        Args:
            frame: Input video frame.
        Returns:
            Processed frame with detections drawn and alerts triggered.
        """
        results = self.model(frame, conf=self.confidence_threshold)
        boxes = results[0].boxes.xyxy.cpu().numpy()
        scores = results[0].boxes.conf.cpu().numpy()
        class_ids = results[0].boxes.cls.cpu().numpy()

        # Apply Non-Maximum Suppression
        kept_indices = non_max_suppression(boxes, scores, self.iou_threshold)
        kept_boxes = [boxes[i] for i in kept_indices]
        kept_scores = [scores[i] for i in kept_indices]
        kept_class_ids = [class_ids[i] for i in kept_indices]

        # Count objects by class and check for alert classes
        class_counts = {}
        alert_count = 0
        for i, class_id in enumerate(kept_class_ids):
            class_name = results[0].names[int(class_id)]
            if class_name in class_counts:
                class_counts[class_name] += 1
            else:
                class_counts[class_name] = 1

            if int(class_id) in self.alert_classes:
                alert_count += 1

        # Draw detections on the frame
        for i, box in enumerate(kept_boxes):
            x1, y1, x2, y2 = map(int, box)
            conf = kept_scores[i]
            class_id = int(kept_class_ids[i])
            label = f"{results[0].names[class_id]}: {conf:.2f}"

            # Draw bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            # Draw label
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Display object counts on the frame
        y_offset = 30
        for class_name, count in class_counts.items():
            text = f"{class_name}: {count}"
            cv2.putText(frame, text, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
            y_offset += 30

        # Trigger alert if needed
        if alert_count > 0:
            alert_text = f"ALERT: {alert_count} objects of interest detected!"
            cv2.putText(frame, alert_text, (10, frame.shape[0] - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
            print(alert_text)

        return frame

# Example usage
alert_classes = [0, 1]  # Replace with class IDs of interest
real_time_detector_with_alerts = RealTimeObjectDetectionWithAlerts(
    yolov8_nano, confidence_threshold=0.6, iou_threshold=0.4, alert_classes=alert_classes
)

# To use the detector with alerts, run these commands in separate cells:
# 1. Start detection: detection_thread = Thread(target=real_time_detector_with_alerts.start_detection, args=(0,))
# 2. Start the thread: detection_thread.start()
# 3. Stop detection after use: real_time_detector_with_alerts.stop_detection()