In [1]:
import cv2
import pandas as pd
from ultralytics import YOLO
from tracker import *

In [2]:
video_dir = 'highway.mp4' #Or can integrate with real-time

In [3]:
class_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
street_list = ['bicycle','car','motorcycle','bus','truck']

In [4]:
#Get tracker
tracker = Tracker()
count = 0
#Get model
model = YOLO('yolov9c.pt')

In [6]:
#Empty dictionary that will hold ids based on direction
down = {}
up = {}
#Empty dictionary to hold information about vehicle class
counter_down = {'car':[],
                'bicycle':[],
                'truck':[],
                'bus':[],
                'motorcycle':[]}
counter_up = {  'car':[],
                'bicycle':[],
                'truck':[],
                'bus':[],
                'motorcycle':[]}
cap = cv2.VideoCapture(video_dir)
#as long as the quir key is not presssed, run the program
while True:
    ret,frame = cap.read()
    if not ret:
        break
    count += 1
    frame = cv2.resize(frame,(1020,500))
    #We have a pre-built model that will predict the objects from frame
    results = model.predict(frame)
    a = results[0].boxes.data
    a = a.detach().cpu().numpy()
    #Stored information regarding result in a dataframe called px
    px = pd.DataFrame(a).astype('float')
    list = []
    for idx, row in px.iterrows():
        x1 = int(row[0])
        y1 = int(row[1])
        x2 = int(row[2])
        y2 = int(row[3])
        d = int(row[5])
        c = class_list[d]
        if c in street_list:
            #Only append list when vehicle in street_list is detected
            list.append([x1,y1,x2,y2,c])
    #Tracker returns object boundary box and id
    bbox_id,c = tracker.update(list)
    for bbox in bbox_id:
        x3,y3,x4,y4,id = bbox
        cx = int(x3+x4)//2
        cy = int(y3+y4)//2
        #Bring bounding box only after touching the line
        y_red = 200
        y_blue = 279
        offset = 2
        #So, draw the box only under the condition when it closes to or just leaves the line
        if y_red<(cy+offset) and y_red>(cy-offset):
            down[id] = cy
        if id in down:
            if y_blue<(cy+offset) and y_blue>(cy-offset):
                #Puts circle in the center of vehicle
                cv2.circle(frame,(cx,cy),4,(0,0,255),-1)
                counter_down[c].append(id)
        if y_blue<(cy+offset) and y_blue>(cy-offset):
            up[id] = cy
        if id in up:
            if y_red<(cy+offset) and y_red>(cy-offset):
                #Puts circle in the center of vehicle
                cv2.circle(frame,(cx,cy),4,(0,0,255),-1)
                counter_up[c].append(id)
    cv2.line(frame,(0,y_red),(1020,y_red),(0,0,255),3)
    cv2.line(frame,(0,y_blue),(1020,y_blue),(255,0,0),3)
    cv2.putText(frame,('Going Down'),(10,30),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,255),2,cv2.LINE_AA)
    cv2.putText(frame,('Bicycle- '+str(len(counter_down['bicycle']))),(10,60),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,255),2,cv2.LINE_AA)
    cv2.putText(frame,('Car- '+str(len(counter_down['car']))),(10,90),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,255),2,cv2.LINE_AA)
    cv2.putText(frame,('Bus- '+str(len(counter_down['bus']))),(10,120),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,255),2,cv2.LINE_AA)
    cv2.putText(frame,('Truck- '+str(len(counter_down['truck']))),(10,150),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,255),2,cv2.LINE_AA)
    cv2.putText(frame,('Motorcycle- '+str(len(counter_down['motorcycle']))),(10,180),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,255),2,cv2.LINE_AA)
    cv2.putText(frame,('Going Up'),(750,30),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,255),2,cv2.LINE_AA)
    cv2.putText(frame,('Bicycle- '+str(len(counter_up['bicycle']))),(750,60),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,255),2,cv2.LINE_AA)
    cv2.putText(frame,('Car- '+str(len(counter_up['car']))),(750,90),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,255),2,cv2.LINE_AA)
    cv2.putText(frame,('Bus- '+str(len(counter_up['bus']))),(750,120),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,255),2,cv2.LINE_AA)
    cv2.putText(frame,('Truck- '+str(len(counter_up['truck']))),(750,150),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,255),2,cv2.LINE_AA)
    cv2.putText(frame,('Motorcycle- '+str(len(counter_up['motorcycle']))),(750,180),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,255),2,cv2.LINE_AA)

    #To see the output
    cv2.imshow("frames",frame)
    #Escape button to exit
    if cv2.waitKey(1)&0xFF==27:
        break
cap.release()
cv2.destroyAllWindows()


0: 320x640 11 cars, 3 trucks, 1872.9ms
Speed: 6.0ms preprocess, 1872.9ms inference, 5.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 10 cars, 4 trucks, 1740.0ms
Speed: 5.0ms preprocess, 1740.0ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 9 cars, 4 trucks, 1783.0ms
Speed: 4.0ms preprocess, 1783.0ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 9 cars, 4 trucks, 1675.0ms
Speed: 4.0ms preprocess, 1675.0ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 9 cars, 4 trucks, 1822.0ms
Speed: 5.0ms preprocess, 1822.0ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 9 cars, 3 trucks, 1693.0ms
Speed: 6.0ms preprocess, 1693.0ms inference, 4.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 8 cars, 4 trucks, 1668.0ms
Speed: 5.0ms preprocess, 1668.0ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 8 cars, 4 trucks, 16