In [19]:
import cv2
from ultralytics import YOLO
from collections import defaultdict

model = YOLO('yolov8l.pt')

In [20]:
cap = cv2.VideoCapture("C:/Users/admin/Desktop/yolov8/argoverse.mp4")
fps = cap.get(cv2.CAP_PROP_FPS)
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
FNUMS = cap.get(cv2.CAP_PROP_FRAME_COUNT)

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
videoWriter = cv2.VideoWriter("C:/Users/admin/Desktop/yolov8/test.mp4", fourcc, fps, size)

In [21]:
def box_label(image, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
    p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) 
    cv2.rectangle(image, p1, p2, color, thickness=1, lineType=cv2.LINE_AA) 
    if label: 
        w, h = cv2.getTextSize(label, 0, fontScale=2 / 3, thickness=1)[0] 
        outside = p1[1] - h >= 3 
        p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 
        cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA)
        cv2.putText(image, 
                    label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
                    0,
                    2 / 3,
                    txt_color,
                    thickness=1,
                    lineType=cv2.LINE_AA)

In [22]:
track_history = defaultdict(lambda: [])
vehicle_in = 0
vehicle_out = 0

while cap.isOpened():  
    success, frame = cap.read() 
    
    if success: 
        results = model.track(frame,conf=0.3, persist=True) 
        track_ids = results[0].boxes.id.int().cpu().tolist() 
        for track_id, box in zip(track_ids, results[0].boxes.data): 
            if box[-1] == 2:
                box_label(frame, box, '#'+str(track_id)+' car', (255, 20, 212)) 
                x1, y1, x2, y2 = box[:4] 
                x = (x1+x2)/2 
                y = (y1+y2)/2 
                track = track_history[track_id] 
                track.append((float(x), float(y)))
                if len(track) > 1: 
                    _, h = track[-2]
                    if h < size[1]-400 and y >= size[1]-400: 
                        vehicle_out +=1
                        if h > size[1]-400 and y <= size[1]-400: 
                            vehicle_in +=1
        cv2.imshow("YOLOv8 Tracking", frame)
        videoWriter.write(frame)
    else:
        break 
cap.release()
videoWriter.release()
cv2.destroyAllWindows()


0: 416x640 8 cars, 1 truck, 88.4ms
Speed: 7.0ms preprocess, 88.4ms inference, 4.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 8 cars, 1 truck, 20.5ms
Speed: 3.0ms preprocess, 20.5ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 8 cars, 1 truck, 21.0ms
Speed: 2.5ms preprocess, 21.0ms inference, 1.5ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 7 cars, 1 truck, 20.6ms
Speed: 2.0ms preprocess, 20.6ms inference, 4.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 8 cars, 1 truck, 21.0ms
Speed: 2.0ms preprocess, 21.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 7 cars, 1 truck, 20.1ms
Speed: 2.0ms preprocess, 20.1ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 7 cars, 1 truck, 21.0ms
Speed: 2.0ms preprocess, 21.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 7 cars, 1 truck, 21.0ms
Speed: 2.0ms preprocess, 21.0ms i


0: 416x640 10 cars, 26.5ms
Speed: 2.0ms preprocess, 26.5ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 10 cars, 1 fire hydrant, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 10 cars, 1 fire hydrant, 25.9ms
Speed: 2.1ms preprocess, 25.9ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 11 cars, 1 fire hydrant, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 3.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 11 cars, 1 fire hydrant, 26.5ms
Speed: 2.0ms preprocess, 26.5ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 11 cars, 1 fire hydrant, 26.9ms
Speed: 2.1ms preprocess, 26.9ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 11 cars, 1 fire hydrant, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 10 cars, 1 fire h


0: 416x640 9 cars, 26.5ms
Speed: 1.0ms preprocess, 26.5ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 9 cars, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 9 cars, 26.0ms
Speed: 1.0ms preprocess, 26.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 9 cars, 27.0ms
Speed: 1.0ms preprocess, 27.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 9 cars, 26.1ms
Speed: 2.0ms preprocess, 26.1ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 9 cars, 26.1ms
Speed: 2.0ms preprocess, 26.1ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 10 cars, 25.5ms
Speed: 3.0ms preprocess, 25.5ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 10 cars, 26.5ms
Speed: 1.0ms preprocess, 26.5ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 41


0: 416x640 6 cars, 2 trucks, 26.5ms
Speed: 2.0ms preprocess, 26.5ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 6 cars, 2 trucks, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 6 cars, 2 trucks, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 6 cars, 2 trucks, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 6 cars, 2 trucks, 25.5ms
Speed: 3.0ms preprocess, 25.5ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 6 cars, 2 trucks, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 7 cars, 1 truck, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 7 cars, 2 trucks, 27.0ms
Speed: 1.7ms preprocess, 2

Speed: 1.0ms preprocess, 27.0ms inference, 1.5ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 2 persons, 7 cars, 1 traffic light, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 2 persons, 7 cars, 1 traffic light, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 2 persons, 7 cars, 1 traffic light, 25.5ms
Speed: 2.0ms preprocess, 25.5ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 2 persons, 7 cars, 1 traffic light, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 3 persons, 7 cars, 1 traffic light, 1 fire hydrant, 26.1ms
Speed: 2.0ms preprocess, 26.1ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 2 persons, 7 cars, 1 traffic light, 1 fire hydrant, 27.1ms
Speed: 1.0ms preprocess, 27.1ms inference, 1.0ms postpro


0: 416x640 5 persons, 6 cars, 2 traffic lights, 27.0ms
Speed: 1.0ms preprocess, 27.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 4 persons, 6 cars, 2 traffic lights, 26.5ms
Speed: 2.0ms preprocess, 26.5ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 4 persons, 6 cars, 2 traffic lights, 27.0ms
Speed: 2.0ms preprocess, 27.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 4 persons, 6 cars, 3 traffic lights, 27.0ms
Speed: 2.0ms preprocess, 27.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 4 persons, 6 cars, 3 traffic lights, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 4 persons, 6 cars, 3 traffic lights, 26.3ms
Speed: 1.0ms preprocess, 26.3ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 4 persons, 6 cars, 3 traffic lights, 27.0ms
Speed: 2.0ms preprocess, 2

Speed: 2.0ms preprocess, 26.5ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 6 cars, 2 traffic lights, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 6 cars, 2 traffic lights, 27.0ms
Speed: 1.0ms preprocess, 27.0ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 6 cars, 2 traffic lights, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 6 cars, 2 traffic lights, 26.5ms
Speed: 1.0ms preprocess, 26.5ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 2 persons, 6 cars, 2 traffic lights, 27.0ms
Speed: 2.0ms preprocess, 27.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 2 persons, 6 cars, 3 traffic lights, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 1.0ms postprocess per image at shape (1, 3,


0: 416x640 4 persons, 5 cars, 3 traffic lights, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 4 persons, 5 cars, 3 traffic lights, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 4 persons, 4 cars, 3 traffic lights, 27.0ms
Speed: 2.0ms preprocess, 27.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 4 persons, 4 cars, 3 traffic lights, 25.5ms
Speed: 2.0ms preprocess, 25.5ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 4 persons, 4 cars, 3 traffic lights, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 4 persons, 4 cars, 3 traffic lights, 27.0ms
Speed: 2.0ms preprocess, 27.0ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 5 persons, 4 cars, 3 traffic lights, 26.1ms
Speed: 2.0ms preprocess, 2


0: 416x640 5 persons, 5 cars, 4 traffic lights, 26.1ms
Speed: 2.0ms preprocess, 26.1ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 5 persons, 5 cars, 4 traffic lights, 26.6ms
Speed: 2.5ms preprocess, 26.6ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 5 persons, 5 cars, 4 traffic lights, 27.0ms
Speed: 2.0ms preprocess, 27.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 4 persons, 5 cars, 4 traffic lights, 27.1ms
Speed: 2.0ms preprocess, 27.1ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 4 persons, 5 cars, 4 traffic lights, 27.0ms
Speed: 2.0ms preprocess, 27.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 4 persons, 5 cars, 4 traffic lights, 26.0ms
Speed: 2.0ms preprocess, 26.0ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 4 persons, 5 cars, 4 traffic lights, 26.0ms
Speed: 2.0ms preprocess, 2