In [2]:
!pip install ultralytics deep-sort-realtime opencv-python numpy
!pip install git+https://github.com/openai/CLIP.git

Collecting ultralytics
  Downloading ultralytics-8.3.156-py3-none-any.whl.metadata (37 kB)
Collecting deep-sort-realtime
  Downloading deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Collecting opencv-python
  Downloading opencv_python-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting numpy
  Downloading numpy-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Collecting matplotlib>=3.3.0 (from ultralytics)
  Downloading matplotlib-3.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting pillow>=7.1.2 (from ultralytics)
  Downloading pillow-11.2.1-cp39-cp39-manylinux_2_28_x86_64.whl.metadata (8.9 kB)
Collecting scipy>=1.4.1 (from ultralytics)
  Downloading scipy-1.13.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Collecting torch>=1.8.0 (from ultralytics)
  Downloading torch-2.7.1-cp39-cp39-manylinux_2_28_x86_64.whl.metadata (29 kB)
Collect

In [3]:
import torch
print(torch.__version__)
print(type(torch.__version__))


2.7.1+cu126
<class 'torch.torch_version.TorchVersion'>


In [4]:
import numpy
print(numpy.__version__)  # Should print: 1.24.4


2.0.2


In [9]:
import cv2

cap = cv2.VideoCapture('/home/lschwarz/Code/reu2025/deepSORT2/trimmedFrance.mp4')  # Replace with your video path

if cap.isOpened():
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    print(f"Resolution: {width} x {height}")
else:
    print("Failed to open video.")

cap.release()


Resolution: 1920 x 1080


In [10]:
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
import cv2
import os
import numpy as np
import random
import clip

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/home/lschwarz/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [22]:
def deepsort1(path, output='output.mp4', target_classes=None):
    # Initialize YOLOv10 model
    # model = YOLO('yolov5l.pt')  # Choose your model (mac)
    model = torch.hub.load('ultralytics/yolov5','custom', path='yolov5l.pt') # Choose your model (linux)

    # Initialize video capture
    cap = cv2.VideoCapture(path)
    
    # Get video properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Create output directory if not exists
    os.makedirs("output_videos", exist_ok=True)
    output_path = f"output_videos/{output}"

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    # Initialize DeepSort tracker
    tracker = DeepSort(
        max_age=20,
        n_init=2,
        embedder='clip_ViT-B/16',
        half=True,
        embedder_gpu=True
    )
    
    # Create color palette for IDs
    color_palette = {}
    
    # Set default target classes (person, car, truck) if none provided
    if target_classes is None:
        target_classes = [0, 2, 7]  # COCO class IDs: 0=person, 2=car, 7=truck

    frame_count = 0
    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            
            # Run YOLOv10 detection
            # results = model(frame, verbose=False)[0] # mac only
            results = model(frame)[0] # linux

            
            # Convert detections to DeepSort format
            detections = []
            for box in results.boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                cls_id = int(box.cls[0])
                
                # Filter by target classes
                if cls_id in target_classes:
                    detections.append(([x1, y1, x2-x1, y2-y1], conf, cls_id))
            
            # Update tracker
            tracks = tracker.update_tracks(detections, frame=frame)
            
            # Draw tracking results
            for track in tracks:
                if not track.is_confirmed():
                    continue
                    
                track_id = track.track_id
                ltrb = track.to_ltrb()
                x1, y1, x2, y2 = map(int, ltrb)
                
                # Generate unique color for each ID
                if track_id not in color_palette:
                    # Generate random but distinct color
                    color_palette[track_id] = (
                        random.randint(50, 200),
                        random.randint(50, 200),
                        random.randint(50, 200)
                    )
                color = color_palette[track_id]
                
                # Draw thicker bounding box (4px instead of 2)
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 4)
                
                # Create white background for ID text
                text = f"ID:{track_id}"
                text_scale = 1.5  # Increased from 0.7 (3x larger)
                text_thickness = 4
                text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 
                                           text_scale, text_thickness)[0]
                
                # Position background above bounding box
                bg_x1 = x1
                bg_y1 = max(0, y1 - text_size[1] - 10)  # Ensure within frame
                bg_x2 = x1 + text_size[0] + 5
                bg_y2 = y1 - 10
                
                # Draw background if it's within frame boundaries
                if bg_y1 >= 0 and bg_y2 < frame_height and bg_x2 < frame_width:
                    cv2.rectangle(frame, 
                                 (bg_x1, bg_y1),
                                 (bg_x2, bg_y2),
                                 (255, 255, 255), -1)  # White background
                
                    # Display ID with same color as bounding box
                    cv2.putText(frame, text, (x1, y1 - 15), 
                               cv2.FONT_HERSHEY_SIMPLEX, text_scale, color, 
                               text_thickness)
            
            # Write frame to video file
            out.write(frame)
            
            # Print progress
            frame_count += 1
            if frame_count % 50 == 0:
                print(f"Processed {frame_count} frames")
                
    except KeyboardInterrupt:
        print("Interrupted by user")
    finally:
        # Release resources
        cap.release()
        out.release()
        print(f"Video saved to: {output_path}")
        print(f"Total frames processed: {frame_count}")

In [27]:
def deepsort(path, output='output.mp4', target_classes=None):
    import torch

    # Initialize YOLOv5 model (torch.hub)
    model = torch.hub.load('ultralytics/yolov5', 'custom', path='yolov5x6.pt')

    # Initialize video capture
    cap = cv2.VideoCapture(path)
    
    # Get video properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Create output directory if not exists
    os.makedirs("output_videos", exist_ok=True)
    output_path = f"output_videos/{output}"

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    # Initialize DeepSort tracker
    tracker = DeepSort(
        max_age=20,
        n_init=2,
        embedder='clip_ViT-B/16',
        half=True,
        embedder_gpu=True
    )
    
    # Create color palette for IDs
    color_palette = {}
    
    # Set default target classes (person, car, truck) if none provided
    if target_classes is None:
        target_classes = [0, 2, 7]

    frame_count = 0
    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            
            # Run YOLOv5 detection
            results = model(frame)
            boxes = results.xyxy[0].cpu().numpy()  # shape: (N, 6) → x1, y1, x2, y2, conf, cls

            # Convert detections to DeepSort format
            detections = []
            for x1, y1, x2, y2, conf, cls_id in boxes:
                cls_id = int(cls_id)
                if cls_id in target_classes:
                    bbox = [int(x1), int(y1), int(x2 - x1), int(y2 - y1)]
                    detections.append((bbox, float(conf), cls_id))
            
            # Update tracker
            tracks = tracker.update_tracks(detections, frame=frame)
            
            # Draw tracking results
            for track in tracks:
                if not track.is_confirmed():
                    continue
                    
                track_id = track.track_id
                ltrb = track.to_ltrb()
                x1, y1, x2, y2 = map(int, ltrb)
                
                # Generate unique color for each ID
                if track_id not in color_palette:
                    color_palette[track_id] = (
                        random.randint(50, 200),
                        random.randint(50, 200),
                        random.randint(50, 200)
                    )
                color = color_palette[track_id]
                
                # Draw bounding box
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 4)
                
                # ID label
                text = f"ID:{track_id}"
                text_scale = 1.5
                text_thickness = 4
                text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, text_scale, text_thickness)[0]
                
                bg_x1 = x1
                bg_y1 = max(0, y1 - text_size[1] - 10)
                bg_x2 = x1 + text_size[0] + 5
                bg_y2 = y1 - 10
                
                if bg_y1 >= 0 and bg_y2 < frame_height and bg_x2 < frame_width:
                    cv2.rectangle(frame, (bg_x1, bg_y1), (bg_x2, bg_y2), (255, 255, 255), -1)
                    cv2.putText(frame, text, (x1, y1 - 15), cv2.FONT_HERSHEY_SIMPLEX, text_scale, color, text_thickness)
            
            # Write frame to file
            # out.write(frame) # Only use if saving video as file
            cv2.imshow("DeepSORT Output", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

            
            frame_count += 1
            if frame_count % 50 == 0:
                print(f"Processed {frame_count} frames")
                
    except KeyboardInterrupt:
        print("Interrupted by user")
    finally:
        cv2.destroyAllWindows()
        cap.release()
        out.release()
        # print(f"Video saved to: {output_path}")
        print(f"Total frames processed: {frame_count}")


In [28]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
deepsort('/home/lschwarz/Code/reu2025/deepSORT2/trimmedFrance.mp4', output='output8.mp4', target_classes= [0,1,2,3,5,6,7])  # Adjust input video path and target classes as needed

Using cache found in /home/lschwarz/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2025-6-17 Python-3.9.23 torch-2.7.1+cu126 CUDA:0 (NVIDIA GeForce GTX TITAN X, 12199MiB)

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5x6.pt to yolov5x6.pt...
100%|████████████████████████████████████████████████████████████████| 270M/270M [00:05<00:00, 49.8MB/s]

Fusing layers... 
YOLOv5x6 summary: 574 layers, 140730220 parameters, 0 gradients, 209.6 GFLOPs
Adding AutoShape... 


Processed 50 frames
Processed 100 frames
Processed 150 frames
Processed 200 frames
Interrupted by user
Total frames processed: 223
