In [1]:
# # Install required packages
# !pip install ultralytics opencv-python matplotlib numpy Pillow
# !pip install --upgrade ultralytics

In [2]:
# Import necessary libraries
import cv2
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import time
import threading
from collections import deque
from ultralytics import YOLO
import os
from IPython.display import HTML, display
import io
import base64

In [3]:
# YOLO Video Inference Class with FPS Tracking
class YOLOVideoInference:
    def __init__(self, model_name="yolo11x.pt", confidence=0.5):
        """
        Initialize YOLO model for video inference
        
        Args:
            model_name (str): YOLO model name (e.g., 'yolo11x.pt')
            confidence (float): Confidence threshold for detections
        """
        self.model = YOLO(model_name)
        self.confidence = confidence
        self.fps_history = deque(maxlen=30)  # Store last 30 FPS values
        self.frame_times = deque(maxlen=30)  # Store frame processing times
        
    def process_frame(self, frame):
        """
        Process a single frame for object detection
        
        Args:
            frame: Input frame from video
            
        Returns:
            processed_frame: Frame with bounding boxes and labels
            fps: Current FPS
        """
        start_time = time.time()
        
        # Run YOLO inference
        results = self.model(frame, conf=self.confidence)
        
        # Draw bounding boxes and labels
        annotated_frame = results[0].plot()
        
        # Calculate FPS
        end_time = time.time()
        frame_time = end_time - start_time
        fps = 1.0 / frame_time if frame_time > 0 else 0
        
        # Update FPS history
        self.fps_history.append(fps)
        self.frame_times.append(frame_time)
        
        # Add FPS text to frame
        cv2.putText(annotated_frame, f'FPS: {fps:.1f}', 
                   (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        
        return annotated_frame, fps
    
    def get_average_fps(self):
        """Get average FPS from recent frames"""
        if len(self.fps_history) > 0:
            return np.mean(self.fps_history)
        return 0
    
    def get_detection_stats(self, results):
        """Get detection statistics"""
        if results and len(results) > 0:
            boxes = results[0].boxes
            if boxes is not None:
                return len(boxes.data)
        return 0

In [4]:
# FPS Plotting Class for Real-time Visualization
class FPSPlotter:
    def __init__(self, max_points=100):
        """
        Initialize FPS plotter for real-time visualization
        
        Args:
            max_points (int): Maximum number of points to display on plot
        """
        self.max_points = max_points
        self.fps_data = deque(maxlen=max_points)
        self.time_data = deque(maxlen=max_points)
        self.start_time = time.time()
        
        # Setup matplotlib for real-time plotting
        plt.ion()
        self.fig, (self.ax1, self.ax2) = plt.subplots(2, 1, figsize=(12, 8))
        
        # FPS plot setup
        self.line1, = self.ax1.plot([], [], 'b-', linewidth=2, label='FPS')
        self.ax1.set_xlabel('Time (seconds)')
        self.ax1.set_ylabel('FPS')
        self.ax1.set_title('Real-time FPS Monitoring')
        self.ax1.grid(True)
        self.ax1.legend()
        
        # Detection count plot setup
        self.detection_data = deque(maxlen=max_points)
        self.line2, = self.ax2.plot([], [], 'r-', linewidth=2, label='Detections')
        self.ax2.set_xlabel('Time (seconds)')
        self.ax2.set_ylabel('Number of Detections')
        self.ax2.set_title('Real-time Detection Count')
        self.ax2.grid(True)
        self.ax2.legend()
        
        plt.tight_layout()
        
    def update_plot(self, fps, detection_count=0):
        """
        Update the FPS and detection count plots
        
        Args:
            fps (float): Current FPS value
            detection_count (int): Number of detections in current frame
        """
        current_time = time.time() - self.start_time
        
        # Add new data points
        self.fps_data.append(fps)
        self.time_data.append(current_time)
        self.detection_data.append(detection_count)
        
        # Update FPS plot
        self.line1.set_data(list(self.time_data), list(self.fps_data))
        self.ax1.relim()
        self.ax1.autoscale_view()
        
        # Update detection count plot
        self.line2.set_data(list(self.time_data), list(self.detection_data))
        self.ax2.relim()
        self.ax2.autoscale_view()
        
        # Refresh the plot
        self.fig.canvas.draw()
        self.fig.canvas.flush_events()
        
    def save_plot(self, filename='fps_analysis.png'):
        """Save the current plot to file"""
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        print(f"Plot saved as {filename}")
        
    def close(self):
        """Close the plot"""
        plt.ioff()
        plt.close(self.fig)

In [5]:
# Main Video Processing Function
def process_video_with_yolo(input_path, output_path=None, model_name="yolo11x.pt", 
                           confidence=0.5, show_plots=True, save_video=True):
    """
    Process video with YOLO object detection and FPS monitoring
    
    Args:
        input_path (str): Path to input video file
        output_path (str): Path to save output video (optional)
        model_name (str): YOLO model to use
        confidence (float): Detection confidence threshold
        show_plots (bool): Whether to show real-time FPS plots
        save_video (bool): Whether to save processed video
    
    Returns:
        dict: Processing statistics
    """
    
    # Initialize YOLO inference
    yolo_inference = YOLOVideoInference(model_name, confidence)
    
    # Initialize FPS plotter if requested
    fps_plotter = None
    if show_plots:
        fps_plotter = FPSPlotter()
    
    # Open video capture
    cap = cv2.VideoCapture(input_path)
    
    if not cap.isOpened():
        raise ValueError(f"Could not open video file: {input_path}")
    
    # Get video properties
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    print(f"Video Properties:")
    print(f"  Resolution: {width}x{height}")
    print(f"  Original FPS: {fps:.1f}")
    print(f"  Total Frames: {total_frames}")
    print(f"  Duration: {total_frames/fps:.1f} seconds")
    
    # Setup video writer if saving output
    out = None
    if save_video and output_path:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    # Processing statistics
    stats = {
        'total_frames': 0,
        'processed_frames': 0,
        'total_detections': 0,
        'processing_time': 0,
        'avg_fps': 0,
        'avg_detections_per_frame': 0
    }
    
    frame_count = 0
    total_detections = 0
    start_time = time.time()
    
    print("\\nProcessing video... Press 'q' to quit early")
    
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            frame_count += 1
            
            # Process frame with YOLO
            processed_frame, current_fps = yolo_inference.process_frame(frame)
            
            # Get detection count for current frame
            results = yolo_inference.model(frame, conf=confidence)
            detection_count = yolo_inference.get_detection_stats(results)
            total_detections += detection_count
            
            # Update FPS plot if enabled
            if fps_plotter:
                fps_plotter.update_plot(current_fps, detection_count)
            
            # Write frame to output video
            if out is not None:
                out.write(processed_frame)
            
            # Display progress
            if frame_count % 30 == 0:  # Update every 30 frames
                progress = (frame_count / total_frames) * 100
                avg_fps = yolo_inference.get_average_fps()
                print(f"Progress: {progress:.1f}% | Frame: {frame_count}/{total_frames} | "
                      f"Avg FPS: {avg_fps:.1f} | Detections: {detection_count}")
            
            # Optional: Display frame (uncomment for real-time viewing)
            # cv2.imshow('YOLO Detection', processed_frame)
            # if cv2.waitKey(1) & 0xFF == ord('q'):
            #     break
    
    except KeyboardInterrupt:
        print("\\nProcessing interrupted by user")
    
    finally:
        # Clean up
        processing_time = time.time() - start_time
        
        cap.release()
        if out is not None:
            out.release()
        cv2.destroyAllWindows()
        
        # Calculate final statistics
        stats.update({
            'total_frames': total_frames,
            'processed_frames': frame_count,
            'total_detections': total_detections,
            'processing_time': processing_time,
            'avg_fps': yolo_inference.get_average_fps(),
            'avg_detections_per_frame': total_detections / frame_count if frame_count > 0 else 0
        })
        
        # Print final statistics
        print(f"\\n=== Processing Complete ===")
        print(f"Processed: {frame_count}/{total_frames} frames")
        print(f"Processing time: {processing_time:.1f} seconds")
        print(f"Average FPS: {stats['avg_fps']:.1f}")
        print(f"Total detections: {total_detections}")
        print(f"Average detections per frame: {stats['avg_detections_per_frame']:.1f}")
        
        if save_video and output_path:
            print(f"Output video saved: {output_path}")
        
        # Save FPS plot if enabled
        if fps_plotter:
            fps_plotter.save_plot('fps_analysis.png')
            fps_plotter.close()
    
    return stats

In [6]:
# Example Usage - Process Video with YOLO
def run_yolo_inference_example():
    """
    Example function to demonstrate YOLO video inference
    """
    
    # Example video paths (modify these paths according to your needs)
    input_video = r"C:\Users\tungp\Downloads\observing\train\samples\Lifering_0\drone_video.mp4"  # Path to your input video
    output_video = "output_video.mp4"  # Path to save processed video
    
    # Check if input video exists
    if not os.path.exists(input_video):
        print(f"Creating sample configuration...")
        print(f"Please place your video file at: {input_video}")
        print(f"Or modify the 'input_video' variable in this cell")
        print(f"\\nAvailable YOLO models:")
        print(f"  - yolo11n.pt (fastest, least accurate)")
        print(f"  - yolo11s.pt (fast)")
        print(f"  - yolo11m.pt (medium)")
        print(f"  - yolo11l.pt (large)")
        print(f"  - yolo11x.pt (extra large, most accurate)")
        print(f"\\nFor webcam input, use: input_video = 0")
        return
    
    # Process video with YOLO
    try:
        print("Starting YOLO inference on video...")
        
        # Run inference with FPS plotting
        stats = process_video_with_yolo(
            input_path=input_video,
            output_path=output_video,
            model_name="yolo11x.pt",  # Using YOLO11x (equivalent to YOLOv8x)
            confidence=0.5,
            show_plots=True,
            save_video=True
        )
        
        print(f"\\nProcessing completed successfully!")
        return stats
        
    except Exception as e:
        print(f"Error during processing: {str(e)}")
        return None

# Uncomment the line below to run the example
# stats = run_yolo_inference_example()

In [7]:
# Webcam Real-time Inference (Optional)
def run_webcam_inference():
    """
    Run YOLO inference on webcam feed with real-time FPS display
    """
    try:
        # Initialize YOLO inference
        yolo_inference = YOLOVideoInference("yolo11x.pt", confidence=0.5)
        fps_plotter = FPSPlotter()
        
        # Open webcam
        cap = cv2.VideoCapture(0)  # 0 for default webcam
        
        if not cap.isOpened():
            print("Error: Could not open webcam")
            return
        
        print("Starting webcam inference... Press 'q' to quit")
        
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            # Process frame
            processed_frame, current_fps = yolo_inference.process_frame(frame)
            
            # Get detection count
            results = yolo_inference.model(frame, conf=0.5)
            detection_count = yolo_inference.get_detection_stats(results)
            
            # Update plots
            fps_plotter.update_plot(current_fps, detection_count)
            
            # Display frame
            cv2.imshow('YOLO Webcam Detection', processed_frame)
            
            # Break on 'q' key press
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        
        # Clean up
        cap.release()
        cv2.destroyAllWindows()
        fps_plotter.close()
        
        print("Webcam inference stopped")
        
    except Exception as e:
        print(f"Error during webcam inference: {str(e)}")

# Uncomment to run webcam inference
# run_webcam_inference()

In [8]:
input_video = r"C:\Users\tungp\Downloads\observing\train\samples\Person1_0\drone_video.mp4"
output_video = "output_video.mp4"

# Chạy inference
stats = process_video_with_yolo(
    input_path=input_video,
    output_path=output_video,
    model_name="yolo11x.pt",
    confidence=0.5,
    show_plots=True,
    save_video=True
)

Video Properties:
  Resolution: 1024x576
  Original FPS: 25.0
  Total Frames: 5609
  Duration: 224.4 seconds
\nProcessing video... Press 'q' to quit early

0: 384x640 (no detections), 783.2ms
Speed: 8.2ms preprocess, 783.2ms inference, 8.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 624.8ms
Speed: 4.5ms preprocess, 624.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 630.8ms
Speed: 2.2ms preprocess, 630.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 578.3ms
Speed: 2.5ms preprocess, 578.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 626.2ms
Speed: 1.8ms preprocess, 626.2ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 1050.1ms
Speed: 2.2ms preprocess, 1050.1ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 633.

# YOLO Video Object Detection với FPS Monitoring

Notebook này cung cấp một giải pháp hoàn chỉnh để chạy YOLO object detection trên video với tính năng:

## Tính năng chính:
- ✅ **YOLO11x Object Detection**: Sử dụng model YOLO11x (tương đương YOLOv8x) cho độ chính xác cao
- ✅ **Real-time FPS Tracking**: Theo dõi FPS trong thời gian thực 
- ✅ **FPS Plotting**: Vẽ biểu đồ FPS và số lượng detection theo thời gian
- ✅ **Video Output**: Lưu video đã xử lý với bounding boxes
- ✅ **Statistics**: Thống kê chi tiết về quá trình xử lý
- ✅ **Webcam Support**: Hỗ trợ inference trực tiếp từ webcam

## Cách sử dụng:

### 1. Xử lý Video:
```python
# Đặt đường dẫn video của bạn
input_video = "path/to/your/video.mp4"
output_video = "output_video.mp4"

# Chạy inference
stats = process_video_with_yolo(
    input_path=input_video,
    output_path=output_video,
    model_name="yolo11x.pt",
    confidence=0.5,
    show_plots=True,
    save_video=True
)
```

### 2. Webcam Real-time:
```python
run_webcam_inference()
```

## Models có sẵn:
- `yolo11n.pt`: Nhanh nhất, độ chính xác thấp
- `yolo11s.pt`: Nhanh  
- `yolo11m.pt`: Trung bình
- `yolo11l.pt`: Lớn
- `yolo11x.pt`: Lớn nhất, độ chính xác cao nhất (khuyến nghị)

## Output:
- Video đã xử lý với bounding boxes
- Biểu đồ FPS real-time 
- File ảnh `fps_analysis.png` chứa biểu đồ phân tích
- Thống kê chi tiết về performance