In [1]:
!pip install torch torchvision torchaudio
!pip install ultralytics
!pip install deep_sort_realtime
!pip install opencv-python

Collecting ultralytics
  Downloading ultralytics-8.2.87-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.8/41.8 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.6-py3-none-any.whl.metadata (9.1 kB)
Downloading ultralytics-8.2.87-py3-none-any.whl (872 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m872.1/872.1 kB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.6-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.2.87 ultralytics-thop-2.0.6
Collecting deep_sort_realtime
  Downloading deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Downloading deep_sort_realtime-1.3.2-py3-none-any.whl (8.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m69.2 MB/s[0m eta [36m0:00:00[0m
[?25hInsta

In [None]:
from ultralytics import YOLO
import cv2
from deep_sort_realtime.deepsort_tracker import DeepSort
import os

# Load the default YOLOv8 model (no need to specify a model file path)
model = YOLO("yolov8n.pt")

# Create DeepSort object for tracking
deepsort = DeepSort(max_age=100, n_init=3, nms_max_overlap=1.0, max_iou_distance=0.7)

# Specify the input video folder
input_folder = "input_videos"

# Specify the output video folder
output_folder = "output_videos"

# Create the output folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Get a list of all video files in the input folder
video_files = [os.path.join(input_folder, file) for file in os.listdir(input_folder) if file.endswith(".mp4")]

# Process each video file
for VIDEO_FILE in video_files:
    # Open the video file
    cap = cv2.VideoCapture(VIDEO_FILE)

    # Get video properties
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')

    # Create output video file name
    output_file_name = os.path.basename(VIDEO_FILE).split(".")[0] + "_output.mp4"
    output_file_path = os.path.join(output_folder, output_file_name)
    out = cv2.VideoWriter(output_file_path, fourcc, fps, (width, height))

    # Initialize a dictionary to store the track IDs
    track_ids = {}
    next_id = 1  # The next ID to assign to a new person

    # Process video frames
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Run detection with YOLOv8
        results = model(frame)  # Perform inference

        # Extract bounding boxes, confidences, and class IDs
        detections = []
        for box in results[0].boxes:  # Iterate through detected objects
            if box.cls.cpu().numpy() == 0:  # Filter for person class (ID 0)
                bbox = box.xyxy[0].cpu().numpy()  # Bounding box coordinates in xyxy format
                confidence = box.conf.cpu().numpy()  # Confidence score
                if confidence > 0.5:  # Apply confidence threshold
                    detections.append((bbox, confidence, 0))  # Class ID is 0 for person

        # Update tracker
        tracks = deepsort.update_tracks(detections, frame=frame)

        # Draw bounding boxes and IDs on the output frame
        for track in tracks:
            if track.is_confirmed() and track.time_since_update <= 1:
                bbox = track.to_tlbr()  # Bounding box coordinates
                track_id = track.track_id  # Track ID

                # Check if the track ID is new
                if track_id not in track_ids:
                    track_ids[track_id] = next_id
                    next_id += 1

                # Draw bounding box with custom color and thickness
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 3)
                # Draw track ID with custom font and color
                cv2.putText(frame, f"ID {track_ids[track_id]}", (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2)

        # Write output frame to file
        out.write(frame)

    # Release video capture and writer
    cap.release()
    out.release()

Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 176MB/s]


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
0: 480x640 2 persons, 1 chair, 1 dining table, 1 book, 12.6ms
Speed: 7.4ms preprocess, 12.6ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 1 chair, 1 dining table, 9.6ms
Speed: 7.7ms preprocess, 9.6ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 1 chair, 1 dining table, 1 book, 11.3ms
Speed: 2.5ms preprocess, 11.3ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 1 chair, 1 dining table, 1 book, 9.1ms
Speed: 4.4ms preprocess, 9.1ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 1 chair, 1 dining table, 1 book, 9.0ms
Speed: 4.0ms preprocess, 9.0ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 1 chair, 1 dining table, 10.0ms
Speed: 2.4ms preprocess, 10.0ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640