In [1]:
%pip install ultralytics opencv-python filterpy pillow
%pip install pyqt5


Collecting ultralyticsNote: you may need to restart the kernel to use updated packages.

  Downloading ultralytics-8.3.139-py3-none-any.whl.metadata (37 kB)
Collecting filterpy
  Downloading filterpy-1.4.5.zip (177 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting torch>=1.8.0 (from ultralytics)
  Downloading torch-2.7.0-cp312-cp312-win_amd64.whl.metadata (29 kB)
Collecting torchvision>=0.9.0 (from ultralytics)
  Downloading torchvision-0.22.0-cp312-cp312-win_amd64.whl.metadata (6.3 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting sympy>=1.13.3 (from torch>=1.8.0->ultralytics)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Downloading ultralytics-8.3.139-py3-none-any.whl (1.0 MB)
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   ---------- ----------------------------- 0.3/1.0 MB ? eta -:--:--

In [None]:
import cv2
import threading
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
from tkinter import *
from PIL import Image, ImageTk

# Load YOLOv8 model
model = YOLO('yolov8n.pt')
tracker = DeepSort(max_age=30)

# GUI setup
class ObjectDetectionApp:
    def __init__(self, root):
        self.root = root
        self.root.title("YOLOv8 + DeepSORT Object Tracker")

        # GUI Components
        self.video_label = Label(root)
        self.video_label.pack()

        self.start_button = Button(root, text="Start Tracking", command=self.start_tracking)
        self.start_button.pack(pady=10)

        self.stop_button = Button(root, text="Stop", command=self.stop_tracking, state=DISABLED)
        self.stop_button.pack()

        self.cap = None
        self.running = False

    def start_tracking(self):
        self.running = True
        self.start_button.config(state=DISABLED)
        self.stop_button.config(state=NORMAL)
        self.cap = cv2.VideoCapture(0)  # webcam
        threading.Thread(target=self.update_frame).start()

    def stop_tracking(self):
        self.running = False
        self.start_button.config(state=NORMAL)
        self.stop_button.config(state=DISABLED)
        if self.cap:
            self.cap.release()

    def update_frame(self):
        while self.running:
            ret, frame = self.cap.read()
            if not ret:
                break

            resized_frame = cv2.resize(frame, (640, 480))
            results = model(resized_frame)[0]
            detections = []

            for result in results.boxes:
                x1, y1, x2, y2 = map(int, result.xyxy[0].tolist())
                conf = float(result.conf[0])
                cls = int(result.cls[0])
                detections.append(([x1, y1, x2 - x1, y2 - y1], conf, cls))

            tracks = tracker.update_tracks(detections, frame=resized_frame)

            for track in tracks:
                if not track.is_confirmed():
                    continue
                track_id = track.track_id
                ltrb = track.to_ltrb()
                x1, y1, x2, y2 = map(int, ltrb)
                cv2.rectangle(resized_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(resized_frame, f'ID: {track_id}', (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

            # Convert frame to display in Tkinter
            image = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)
            img = Image.fromarray(image)
            imgtk = ImageTk.PhotoImage(image=img)

            self.video_label.imgtk = imgtk
            self.video_label.configure(image=imgtk)

        if self.cap:
            self.cap.release()

# Run the GUI
if __name__ == "__main__":
    root = Tk()
    app = ObjectDetectionApp(root)
    root.mainloop()



0: 480x640 2 persons, 1 dog, 213.2ms
Speed: 5.3ms preprocess, 213.2ms inference, 5.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 persons, 175.2ms
Speed: 4.9ms preprocess, 175.2ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)
