## DeepSORT

In [6]:
import os

HOME = os.getcwd()
print(HOME)

/kaggle/working


In [4]:
!pip install deep_sort_realtime
!pip install supervision
!pip install ultralytics

from IPython import display
display.clear_output()

from deep_sort_realtime.deepsort_tracker import DeepSort
import numpy as np
import supervision as sv

In [28]:
CLASS_NAMES_DICT = model.model.names
# SELECTED_CLASS_NAMES = ['car', 'truck', 'bus', 'motorcycle']
SELECTED_CLASS_NAMES = ['person']
SELECTED_CLASS_IDS = [{value: key for key, value in CLASS_NAMES_DICT.items()}[class_name] for class_name in SELECTED_CLASS_NAMES]

In [26]:
from ultralytics import YOLO

model = YOLO('yolov8s.pt')

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt'...


100%|██████████| 21.5M/21.5M [00:00<00:00, 45.2MB/s]


In [21]:
SOURCE_VIDEO_PATH = '/kaggle/input/people-walking-resolution/people-walking.mp4'
TARGET_VIDEO_PATH = f"{HOME}/result_DeepSORT.mp4"

# Initialize the DeepSORT tracker with custom parameters
deep_sort_tracker = DeepSort(
    max_age=30,           # Number of frames to keep a lost track before removing it
    n_init=3,             # Minimum number of detections before a track is confirmed
    max_cosine_distance=0.2  # Cosine distance threshold for feature matching
)

In [22]:
# Define the start and end point for the line used in zone tracking
LINE_START = sv.Point(0, 540)
LINE_END = sv.Point(1920, 540)  # Assuming video width is 3840

In [23]:
# Create an instance of BoxAnnotator to draw bounding boxes around detected objects
box_annotator = sv.BoxAnnotator(thickness=4)

# Create an instance of LabelAnnotator to display text labels on detected objects
label_annotator = sv.LabelAnnotator(text_thickness=2, text_scale=1.5, text_color=sv.Color.BLACK)

# Create an instance of TraceAnnotator to visualize object movement traces
trace_annotator = sv.TraceAnnotator(thickness=4, trace_length=50)

# Create an instance of LineZoneAnnotator to visualize and annotate line-based object tracking
line_zone_annotator = sv.LineZoneAnnotator(thickness=4, text_thickness=4, text_scale=2)

# Define a LineZone for tracking objects crossing a specific line
line_zone = sv.LineZone(start=LINE_START, end=LINE_END)

In [19]:
def callback(frame: np.ndarray, index: int) -> np.ndarray:
    # Run the YOLO model on the input frame to detect objects
    results = model(frame, verbose=False)[0]

    # Convert YOLO detection results into a Supervision Detections object
    detections = sv.Detections.from_ultralytics(results)

    # Filter detections to retain only selected classes (car, motorcycle, bus, truck)
    detections = detections[np.isin(detections.class_id, SELECTED_CLASS_IDS)]

    # Convert detections into DeepSORT-compatible format (bbox, confidence, class_name)
    ds_detections = []
    for bbox, conf, class_id in zip(detections.xyxy, detections.confidence, detections.class_id):
        class_name = CLASS_NAMES_DICT.get(class_id, "Unknown")  # Retrieve class name from ID
        left, top, xmax, ymax = bbox  # Extract bounding box coordinates
        width, height = xmax - left, ymax - top  # Compute width and height
        ds_detections.append(([left, top, width, height], float(conf), class_name))
    
    # Update the DeepSORT tracker with the new detections
    tracks = deep_sort_tracker.update_tracks(ds_detections, frame=frame)
    
    # Initialize lists to store tracked object details
    tracked_bboxes = []
    tracked_confidences = []
    tracked_class_ids = []
    tracked_ids = []
    
    # Iterate through tracked objects and extract relevant information
    for track in tracks:
        if not track.is_confirmed():  # Ignore unconfirmed tracks
            continue
    
        track_id = track.track_id  # Retrieve track ID
        bbox = track.to_ltrb()  # Convert bbox format for tracking
        conf = track.det_conf if hasattr(track, 'det_conf') else 1.0  # Retrieve detection confidence
        class_name = track.get_det_class() or "Unknown"  # Retrieve class name
        class_id = {value: key for key, value in CLASS_NAMES_DICT.items()}.get(class_name, None)
    
        # Assign a default class ID if not found
        if class_id is None:
            class_id = 0
    
        # Store tracking details for visualization
        tracked_bboxes.append(bbox)
        tracked_confidences.append(conf)
        tracked_class_ids.append(class_id)
        tracked_ids.append(track_id)
    
    # Create a Supervision Detections object for tracked objects
    if len(tracked_bboxes) > 0:
        tracked_detections = sv.Detections(
            xyxy=np.array(tracked_bboxes, dtype=np.float32).reshape(-1, 4),
            confidence=np.array(tracked_confidences, dtype=np.float32),
            class_id=np.array(tracked_class_ids, dtype=np.int32),
            tracker_id=np.array(tracked_ids, dtype=np.int32)
        )
    else:
        # Handle cases where no objects are detected or tracked
        tracked_detections = sv.Detections(
            xyxy=np.zeros((0, 4), dtype=np.float32),
            confidence=np.array([], dtype=np.float32),
            class_id=np.array([], dtype=np.int32),
            tracker_id=np.array([], dtype=np.int32)
        )
    
    # Generate labels for tracked objects with ID and class name
    labels = [
        f"#{tracker_id} {CLASS_NAMES_DICT.get(class_id, 'Unknown')}"
        for tracker_id, class_id in zip(tracked_ids, tracked_class_ids)
    ]

    # Copy the frame to apply annotations
    annotated_frame = frame.copy()
    
    # Apply trace annotations to visualize object movement history
    annotated_frame = trace_annotator.annotate(scene=annotated_frame, detections=tracked_detections)
    
    # Draw bounding boxes around tracked objects
    annotated_frame = box_annotator.annotate(scene=annotated_frame, detections=tracked_detections)
    
    # Display labels containing tracker IDs and class names
    annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=tracked_detections, labels=labels)
    
    # Trigger the line zone when objects cross it
    line_zone.trigger(tracked_detections)
    
    # Annotate the line zone with object counts and return the final frame
    return line_zone_annotator.annotate(annotated_frame, line_counter=line_zone)

In [29]:
sv.process_video(
    source_path=SOURCE_VIDEO_PATH,
    target_path=TARGET_VIDEO_PATH,
    callback=callback
)