In [1]:
!pip install ultralytics deep-sort-realtime opencv-python numpy
!pip install git+https://github.com/openai/CLIP.git

Collecting ultralytics
  Using cached ultralytics-8.3.155-py3-none-any.whl.metadata (37 kB)
Collecting deep-sort-realtime
  Using cached deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Collecting opencv-python
  Using cached opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl.metadata (20 kB)
Collecting matplotlib>=3.3.0 (from ultralytics)
  Using cached matplotlib-3.9.4-cp39-cp39-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting pyyaml>=5.3.1 (from ultralytics)
  Using cached PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl.metadata (2.1 kB)
Collecting scipy>=1.4.1 (from ultralytics)
  Using cached scipy-1.13.1-cp39-cp39-macosx_12_0_arm64.whl.metadata (60 kB)
Collecting tqdm>=4.64.0 (from ultralytics)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting py-cpuinfo (from ultralytics)
  Using cached py_cpuinfo-9.0.0-py3-none-any.whl.metadata (794 bytes)
Collecting pandas>=1.1.4 (from ultralytics)
  Using cached pandas-2.3.0-cp39-cp39-macosx_11_0_arm64.whl.m

In [2]:
import torch
print(torch.__version__)
print(type(torch.__version__))


2.0.1
<class 'torch.torch_version.TorchVersion'>


In [1]:
import numpy
print(numpy.__version__)  # Should print: 1.24.4


1.24.4


In [9]:
import cv2

cap = cv2.VideoCapture('/Users/laraschwarz/Code/reu2025/deepSORT2/trimmedFrance.mp4')  # Replace with your video path

if cap.isOpened():
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    print(f"Resolution: {width} x {height}")
else:
    print("Failed to open video.")

cap.release()


Resolution: 1920 x 1080


In [2]:
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
import cv2
import os
import numpy as np
import random
import clip

In [23]:
def deepsort(path, output='output.mp4', target_classes=None):
    # Initialize YOLOv10 model
    model = YOLO('yolov5l.pt')  # Choose your model

    # Initialize video capture
    cap = cv2.VideoCapture(path)
    
    # Get video properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Create output directory if not exists
    os.makedirs("output_videos", exist_ok=True)
    output_path = f"output_videos/{output}"

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    # Initialize DeepSort tracker
    tracker = DeepSort(
        max_age=20,
        n_init=2,
        embedder='clip_ViT-B/16',
        half=False,
        embedder_gpu=False
    )
    
    # Create color palette for IDs
    color_palette = {}
    
    # Set default target classes (person, car, truck) if none provided
    if target_classes is None:
        target_classes = [0, 2, 7]  # COCO class IDs: 0=person, 2=car, 7=truck

    frame_count = 0
    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            
            # Run YOLOv10 detection
            results = model(frame, verbose=False)[0]
            
            # Convert detections to DeepSort format
            detections = []
            for box in results.boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0])
                cls_id = int(box.cls[0])
                
                # Filter by target classes
                if cls_id in target_classes:
                    detections.append(([x1, y1, x2-x1, y2-y1], conf, cls_id))
            
            # Update tracker
            tracks = tracker.update_tracks(detections, frame=frame)
            
            # Draw tracking results
            for track in tracks:
                if not track.is_confirmed():
                    continue
                    
                track_id = track.track_id
                ltrb = track.to_ltrb()
                x1, y1, x2, y2 = map(int, ltrb)
                
                # Generate unique color for each ID
                if track_id not in color_palette:
                    # Generate random but distinct color
                    color_palette[track_id] = (
                        random.randint(50, 200),
                        random.randint(50, 200),
                        random.randint(50, 200)
                    )
                color = color_palette[track_id]
                
                # Draw thicker bounding box (4px instead of 2)
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 4)
                
                # Create white background for ID text
                text = f"ID:{track_id}"
                text_scale = 1.5  # Increased from 0.7 (3x larger)
                text_thickness = 4
                text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 
                                           text_scale, text_thickness)[0]
                
                # Position background above bounding box
                bg_x1 = x1
                bg_y1 = max(0, y1 - text_size[1] - 10)  # Ensure within frame
                bg_x2 = x1 + text_size[0] + 5
                bg_y2 = y1 - 10
                
                # Draw background if it's within frame boundaries
                if bg_y1 >= 0 and bg_y2 < frame_height and bg_x2 < frame_width:
                    cv2.rectangle(frame, 
                                 (bg_x1, bg_y1),
                                 (bg_x2, bg_y2),
                                 (255, 255, 255), -1)  # White background
                
                    # Display ID with same color as bounding box
                    cv2.putText(frame, text, (x1, y1 - 15), 
                               cv2.FONT_HERSHEY_SIMPLEX, text_scale, color, 
                               text_thickness)
            
            # Write frame to video file
            out.write(frame)
            
            # Print progress
            frame_count += 1
            if frame_count % 50 == 0:
                print(f"Processed {frame_count} frames")
                
    except KeyboardInterrupt:
        print("Interrupted by user")
    finally:
        # Release resources
        cap.release()
        out.release()
        print(f"Video saved to: {output_path}")
        print(f"Total frames processed: {frame_count}")

In [25]:
deepsort('/Users/laraschwarz/Code/reu2025/deepSORT2/trimmedFrance.mp4', output='output7.mp4', target_classes= [0,1,2,3,5,6,7])  # Adjust input video path and target classes as needed

PRO TIP 💡 Replace 'model=yolov5l.pt' with new 'model=yolov5lu.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov5lu.pt to 'yolov5lu.pt'...


100%|██████████| 102M/102M [00:06<00:00, 16.2MB/s] 


Processed 50 frames
Processed 100 frames
Processed 150 frames
Processed 200 frames
Processed 250 frames
Processed 300 frames
Processed 350 frames
Processed 400 frames
Processed 450 frames
Processed 500 frames
Processed 550 frames
Processed 600 frames
Video saved to: output_videos/output7.mp4
Total frames processed: 615
