# Object Tracking Using YOLO11 and DeepSort

In [1]:
!unzip '/content/deep_sort.zip'

Archive:  /content/deep_sort.zip
   creating: deep_sort/
  inflating: deep_sort/README.md     
  inflating: deep_sort/__init__.py   
   creating: deep_sort/__pycache__/
  inflating: deep_sort/__pycache__/__init__.cpython-310.pyc  
  inflating: deep_sort/__pycache__/__init__.cpython-39.pyc  
  inflating: deep_sort/__pycache__/deep_sort.cpython-310.pyc  
  inflating: deep_sort/__pycache__/deep_sort.cpython-39.pyc  
   creating: deep_sort/configs/
  inflating: deep_sort/configs/deep_sort.yaml  
   creating: deep_sort/deep/
  inflating: deep_sort/deep/__init__.py  
   creating: deep_sort/deep/__pycache__/
  inflating: deep_sort/deep/__pycache__/__init__.cpython-310.pyc  
  inflating: deep_sort/deep/__pycache__/__init__.cpython-39.pyc  
  inflating: deep_sort/deep/__pycache__/feature_extractor.cpython-310.pyc  
  inflating: deep_sort/deep/__pycache__/feature_extractor.cpython-39.pyc  
  inflating: deep_sort/deep/__pycache__/model.cpython-310.pyc  
  inflating: deep_sort/deep/__pycache__/mod

In [3]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.28-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.11-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.28-py3-none-any.whl (881 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m881.2/881.2 kB[0m [31m41.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.11-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.28 ultralytics-thop-2.0.11


In [4]:
import ultralytics

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


'8.3.28'

In [8]:
import cv2

In [12]:
import time

In [13]:
from ultralytics import YOLO

In [14]:
import numpy as np

In [47]:
from deep_sort.utils.parser import get_config
from deep_sort.deep_sort import DeepSort
from deep_sort.sort.tracker import Tracker

deep_sort_weights = 'deep_sort/deep/checkpoint/ckpt.t7'
tracker = DeepSort(model_path=deep_sort_weights, max_age=10)

In [76]:
# Define the video path
video_path='/content/Frisbee_Highlights_short.mp4'
video_out_path = '/content/out.mp4'

cap=cv2.VideoCapture(video_path)
ret, frame = cap.read()
cap_out = cv2.VideoWriter(video_out_path, cv2.VideoWriter_fourcc(*'MP4V'),
                          cap.get(cv2.CAP_PROP_FPS),
                          (frame.shape[1],frame.shape[0]))


In [None]:
model = YOLO("yolo11x.pt")

while cap.isOpened():
    ret, frame = cap.read()

    if ret:

        results = model(frame,conf=0.7)

        class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane',
                       'bus', 'train', 'truck', 'boat', 'traffic light',
                       'fire hydrant', 'stop sign', 'parking meter', 'bench',
                       'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
                       'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
                       'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
                       'kite', 'baseball bat', 'baseball glove', 'skateboard',
                       'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
                       'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
                       'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
                       'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table',
                       'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
                       'cell phone', 'microwave', 'oven', 'toaster', 'sink',
                       'refrigerator', 'book', 'clock', 'vase', 'scissors',
                       'teddy bear','hair drier', 'toothbrush']

        boxes = probs = cls = conf = xywh = class_name = None

        for result in results:
            boxes = result.boxes # Boxes object for bbox outputs
            probs = result.probs # Class probabilities for classification outputs
            cls = boxes.cls.tolist()  # Convert tensor to list
            conf = boxes.conf # confidence score of the detection
            xywh = boxes.xywh  # box with xywh format, (N, 4)
            for class_index in cls:
                # get class name accourding to the class id
                class_name = class_names[int(class_index)]

        conf = conf.detach().cpu().numpy()
        bboxes_xywh = xywh
        bboxes_xywh = xywh.cpu().numpy()
        bboxes_xywh = np.array(bboxes_xywh, dtype=float)

        # give bounding box of detected subject to tracker
        tracks = tracker.update(bboxes_xywh, conf, frame)

        for track in tracker.tracker.tracks:

            track_id = track.track_id # get tracker id
            # Get bounding box coordinates in (x1, y1, x2, y2) format
            x1, y1, x2, y2 = track.to_tlbr()
            w = x2 - x1  # Calculate width
            h = y2 - y1  # Calculate height

            # Set color values for red, blue, and green
            red_color = (0, 0, 255)
            blue_color = (255, 0, 0)
            green_color = (0, 255, 0)

            # Determine color based on track_id
            color_id = track_id % 3

            if color_id == 0: color = red_color
            elif color_id == 1: color = blue_color
            else: color = green_color

            # draw reclangle on the frame
            cv2.rectangle(frame,
                          (int(x1), int(y1)),
                          (int(x1 + w),int(y1 + h)),
                          color, 2)

            text_color = (0, 0, 255)  # Black color for text
            
            # put text on the frame
            cv2.putText(frame,
                        f"{class_name}-{track_id}",
                        (int(x1) + 10, int(y1) - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 1,
                        text_color, 2, cv2.LINE_AA)

        cap_out.write(frame) # save the frames
        
    # ret is false then break the loop
    else: break

cap.release()
cap_out.release()
cv2.destroyAllWindows()



0: 384x640 7 persons, 64.2ms
Speed: 1.4ms preprocess, 64.2ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 63.2ms
Speed: 1.6ms preprocess, 63.2ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 34.9ms
Speed: 1.8ms preprocess, 34.9ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 35.0ms
Speed: 1.6ms preprocess, 35.0ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 34.9ms
Speed: 3.3ms preprocess, 34.9ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 32.6ms
Speed: 3.5ms preprocess, 32.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 35.3ms
Speed: 1.9ms preprocess, 35.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 34.6ms
Speed: 1.8ms preprocess, 34.6ms inference, 1.5ms postprocess per image at shape (