# DeepSORT

## Installing Required Libraries

In [1]:
!pip install git+https://github.com/openai/CLIP.git

Defaulting to user installation because normal site-packages is not writeable
Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to c:\users\suxin\appdata\local\temp\pip-req-build-gkrvpepk
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'


  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git 'C:\Users\suxin\AppData\Local\Temp\pip-req-build-gkrvpepk'


## Helper Function

In [2]:
from IPython.display import Video, display

import os
import requests
from IPython.display import Video, display

def show_mp4(
    url: str,
    local_folder: str = "../videos",
    filename: str    = None,
    width: int       = 640,
    autoplay: bool   = True,
    loop: bool       = True,
    muted: bool      = True,
    timeout: float   = 10.0,
):
    """
    Downloads an MP4 from a CDN URL into a local folder, then embeds it in Colab.

    Args:
        url (str): Full URL to the .mp4 file.
        local_folder (str): Path to folder where you want to save the download.
        filename (str, optional): Name to save the file as; if None, inferred from URL.
        width (int): Display width in pixels.
        autoplay (bool): Start playing automatically.
        loop (bool): Loop playback.
        muted (bool): Mute audio.
        timeout (float): Max seconds to wait for HTTP response.

    Raises:
        ValueError: if the URL doesn’t look like an MP4 or response isn’t correct.
        requests.HTTPError: if download fails (non-2xx status).
    """
    # Infer filename
    if filename is None:
        filename = os.path.basename(url.split("?", 1)[0])
    if not filename.lower().endswith(".mp4"):
        raise ValueError(f"Expected an .mp4 file, got '{filename}'")

    # Ensure folder
    os.makedirs(local_folder, exist_ok=True)
    local_path = os.path.join(local_folder, filename)

    # Download if not already present
    if not os.path.isfile(local_path):
        resp = requests.get(url, stream=True, timeout=timeout)
        resp.raise_for_status()
        content_type = resp.headers.get("Content-Type", "")
        if "video/mp4" not in content_type.lower():
            raise ValueError(f"URL did not return MP4 (Content-Type={content_type})")

        # Stream write to disk
        with open(local_path, "wb") as f:
            for chunk in resp.iter_content(chunk_size=1024*1024):
                if chunk:
                    f.write(chunk)
    else:
        print(f"✓ Using cached file at {local_path}")

    # Build video tag attributes
    attrs = []
    if autoplay: attrs.append("autoplay")
    if loop:     attrs.append("loop")
    if muted:    attrs.append("muted")
    html_attrs = " ".join(attrs)

    # Display
    display(Video(local_path, embed=True, width=width, html_attributes=html_attrs))

## DeepSORT Tracking Function

In [3]:
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
import cv2
import os
import random

def deepsort(path, output='output.mp4', target_classes=None):
    # Initialize YOLOv10 model
    model = YOLO('yolo11x.pt')  # Choose your model

    # Initialize video capture
    cap = cv2.VideoCapture(path)
    
    # Get video properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Create output directory if not exists
    os.makedirs("output_videos", exist_ok=True)
    output_path = f"output_videos/{output}"

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    # Initialize DeepSort tracker
    tracker = DeepSort(
        max_age=10,
        n_init=2,
        embedder='clip_ViT-B/16',
        half=True,
        embedder_gpu=True
    )
    
    # Create color palette for IDs
    color_palette = {}
    
    # Set default target classes (person, car, truck) if none provided
    if target_classes is None:
        target_classes = [0, 2, 7]  # COCO class IDs: 0=person, 2=car, 7=truck

    frame_count = 0
    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            
            # Run YOLOv11 detection
            results = model(frame, verbose=False)[0]
            
            # Convert detections to DeepSort format
            detections = []
            for box in results.boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                cls_id = int(box.cls[0])
                
                # Filter by target classes
                if cls_id in target_classes:
                    detections.append(([x1, y1, x2-x1, y2-y1], conf, cls_id))
            
            # Update tracker
            tracks = tracker.update_tracks(detections, frame=frame)
            
            # Draw tracking results
            for track in tracks:
                if not track.is_confirmed():
                    continue
                    
                track_id = track.track_id
                ltrb = track.to_ltrb()
                x1, y1, x2, y2 = map(int, ltrb)
                
                # Generate unique color for each ID
                if track_id not in color_palette:
                    # Generate random but distinct color
                    color_palette[track_id] = (
                        random.randint(50, 200),
                        random.randint(50, 200),
                        random.randint(50, 200)
                    )
                color = color_palette[track_id]
                
                # Draw thicker bounding box (4px instead of 2)
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 4)
                
                # Create white background for ID text
                text = f"ID:{track_id}"
                text_scale = 1.5  # Increased from 0.7 (3x larger)
                text_thickness = 4
                text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 
                                           text_scale, text_thickness)[0]
                
                # Position background above bounding box
                bg_x1 = x1
                bg_y1 = max(0, y1 - text_size[1] - 10)  # Ensure within frame
                bg_x2 = x1 + text_size[0] + 5
                bg_y2 = y1 - 10
                
                # Draw background if it's within frame boundaries
                if bg_y1 >= 0 and bg_y2 < frame_height and bg_x2 < frame_width:
                    cv2.rectangle(frame, 
                                 (bg_x1, bg_y1),
                                 (bg_x2, bg_y2),
                                 (255, 255, 255), -1)  # White background
                
                    # Display ID with same color as bounding box
                    cv2.putText(frame, text, (x1, y1 - 15), 
                               cv2.FONT_HERSHEY_SIMPLEX, text_scale, color, 
                               text_thickness)
            
            # Write frame to video file
            out.write(frame)
            
            # Print progress
            frame_count += 1
            if frame_count % 10 == 0:
                print(f"Processed {frame_count} frames")
                
    except KeyboardInterrupt:
        print("Interrupted by user")
    finally:
        # Release resources
        cap.release()
        out.release()
        print(f"Video saved to: {output_path}")
        print(f"Total frames processed: {frame_count}")


## Tracking People

In [4]:
show_mp4("https://storage.googleapis.com/labellerr-cdn/%200%20Sample-videos/people.mp4", filename="people.mp4",width=800)

In [5]:
# Track only people (class 0)
deepsort("../videos/people.mp4", output="people.mp4", target_classes=[0])

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt to 'yolo11x.pt'...


100%|██████████| 109M/109M [00:06<00:00, 17.0MB/s] 


Processed 10 frames
Processed 20 frames
Processed 30 frames
Processed 40 frames
Processed 50 frames
Processed 60 frames
Processed 70 frames
Processed 80 frames
Processed 90 frames
Processed 100 frames
Processed 110 frames
Processed 120 frames
Processed 130 frames
Processed 140 frames
Processed 150 frames
Processed 160 frames
Processed 170 frames
Processed 180 frames
Processed 190 frames
Processed 200 frames
Processed 210 frames
Processed 220 frames
Processed 230 frames
Processed 240 frames
Processed 250 frames
Processed 260 frames
Processed 270 frames
Processed 280 frames
Processed 290 frames
Processed 300 frames
Processed 310 frames
Processed 320 frames
Processed 330 frames
Video saved to: output_videos/people.mp4
Total frames processed: 338


## Tracking Plane

In [None]:
show_mp4("https://storage.googleapis.com/labellerr-cdn/%200%20Sample-videos/plane.mp4", filename="plane.mp4", width=800)

In [11]:
# Track only plane (class 4)
deepsort("../videos/plane.mp4", output="plane.mp4", target_classes=[4])

Processed 10 frames
Processed 20 frames
Processed 30 frames
Processed 40 frames
Processed 50 frames
Processed 60 frames
Processed 70 frames
Processed 80 frames
Processed 90 frames
Processed 100 frames
Processed 110 frames
Processed 120 frames
Processed 130 frames
Processed 140 frames
Processed 150 frames
Processed 160 frames
Processed 170 frames
Processed 180 frames
Processed 190 frames
Processed 200 frames
Video saved to: output_videos/plane.mp4
Total frames processed: 208


## Tracking Car

In [None]:
show_mp4("https://storage.googleapis.com/labellerr-cdn/%200%20Sample-videos/car.mp4", filename="car.mp4", width=800)

In [24]:
# Track only car (class 2)
deepsort("../videos/car.mp4", output="car.mp4", target_classes=[2])

Processed 10 frames
Processed 20 frames
Processed 30 frames
Processed 40 frames
Processed 50 frames
Processed 60 frames
Processed 70 frames
Processed 80 frames
Processed 90 frames
Processed 100 frames
Processed 110 frames
Processed 120 frames
Processed 130 frames
Processed 140 frames
Processed 150 frames
Processed 160 frames
Processed 170 frames
Processed 180 frames
Processed 190 frames
Processed 200 frames
Processed 210 frames
Processed 220 frames
Processed 230 frames
Processed 240 frames
Video saved to: output_videos/car.mp4
Total frames processed: 246


---