In [1]:
import cv2
import numpy as np
import torch
from deep_sort_realtime.deepsort_tracker import DeepSort




In [2]:
# Load YOLO model with OpenCV
net = cv2.dnn.readNetFromDarknet("weights/yolov4.cfg","weights/yolov4.weights")

In [3]:
with open("weights/coco.names", "r") as f:
    class_names = f.read().strip().split("\n")

layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Initialize DeepSORT from pip
tracker = DeepSort(max_age=30, n_init=3, nms_max_overlap=1.0)


In [4]:
import time
tracking_data = {}

# Open webcam (device index 0)
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam")
    exit()

# Open file for logging tracking data
output_file = "tracking_log.txt"

# Run for 10 seconds
start_time = time.time()
while time.time() - start_time < 10:
    ret, frame = cap.read()
    if not ret:
        break

    current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

    height, width = frame.shape[:2]

    # Prepare frame for YOLO
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)

    # Forward pass
    layer_outputs = net.forward(output_layers)

    # Extract detections
    boxes, confidences, class_ids = [], [], []
    for output in layer_outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence > 0.5:  # Detection threshold
                center_x, center_y, w, h = (detection[:4] * np.array([width, height, width, height])).astype("int")
                x, y = int(center_x - w / 2), int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Apply Non-Maximum Suppression (NMS) with stricter threshold (0.3)
    indices = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.5, nms_threshold=0.3)
    detections_to_track = []

    if len(indices) > 0:
        for i in indices.flatten():
            x, y, w, h = boxes[i]
            detections_to_track.append(([x, y, x + w, y + h], confidences[i], class_ids[i]))

    # DeepSORT tracking
    tracks = tracker.update_tracks(detections_to_track, frame=frame)

    # Update tracking dictionary
    for track in tracks:
        if not track.is_confirmed():
            continue
        track_id = track.track_id
        class_name = class_names[track.det_class] if track.det_class is not None else "Unknown"
        obj_key = (track_id, class_name)  # Unique key for each object-class combination

        # If first time seeing the object, store first seen time
        if obj_key not in tracking_data:
            tracking_data[obj_key] = {"first_seen": current_time, "last_seen": current_time}
        else:
            tracking_data[obj_key]["last_seen"] = current_time

        # Draw bounding box
        x1, y1, x2, y2 = map(int, track.to_tlbr())
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f"ID: {track_id} {class_name}", (x1, y1 - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

    # Display frame
    cv2.imshow("YOLOv4 + DeepSORT Tracking", frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Write unique tracking data to file
with open(output_file, "w") as f:
    f.write("ID,Class Name,Time of Appearance,Time Last Seen\n")
    for (track_id, class_name), times in tracking_data.items():
        f.write(f"{track_id},{class_name},{times['first_seen']},{times['last_seen']}\n")

# Release webcam and close windows
cap.release()
cv2.destroyAllWindows()
print(f"Tracking finished. Results saved in {output_file}")

Tracking finished. Results saved in tracking_log.txt
