In [1]:
from ultralytics import YOLO
import cv2
import cvzone ## use to display the detections. 
import math
from sort import *

In [3]:
# Webcam object
cap = cv2.VideoCapture("videos/people.mp4")

model = YOLO("yolo_weights/yolov8l.pt ")

class_names = ['person','bicycle','car','motorcycle','airplane','bus',
                'train',    'truck',    'boat',    'traffic light','fire hydrant',
                'stop sign',    'parking meter',    'bench',    'bird',    'cat',    
                'dog',    'horse',    'sheep',    'cow',    'elephant',    'bear',    
                'zebra',    'giraffe',    'backpack',    'umbrella',    'handbag',    
                'tie',    'suitcase',    'frisbee',    'skis',    'snowboard',    
                'sports ball',    'kite',    'baseball bat',    'baseball glove',    
                'skateboard',    'surfboard',    'tennis racket',    'bottle',    
                'wine glass',    'cup',    'fork',    'knife',    'spoon',    
                'bowl',    'banana',    'apple',    'sandwich',    'orange',    
                'broccoli',    'carrot',    'hot dog',    'pizza',    'donut',    
                'cake',    'chair',    'couch',    'potted plant',    'bed',    
                'dining table',    'toilet',    'tv',    'laptop',    'mouse',    
                'remote',    'keyboard',    'cell phone',    'microwave',    'oven',    
                'toaster',    'sink',    'refrigerator',    'book',    'clock',    
                'vase',    'scissors',    'teddy bear',    'hair drier',    
                'toothbrush']




## We want to detect the cars that are only in the main road.
## we can mask the image or overlay an image that cutsoff the not needed part, see mask.png image.
mask = cv2.imread("mask.png")

## Tracker object
tracker = Sort(max_age=20 , min_hits=3 , iou_threshold=0.3 )

## Line

#         x1   y1  x2   y2
limitsup = [103,161,296,161]
limitsdown = [527,489,735,489]

## Counts 
total_countup = []
total_countdown = []
while True:
    success , img = cap.read()
    # After getting the video image we will overlay the mask
    imgRegion = cv2.bitwise_and(img,mask)

    #Graphics 
    imgGraphics = cv2.imread("graphics.png",cv2.IMREAD_UNCHANGED)
    img = cvzone.overlayPNG(img,imgGraphics,(730,260))
    
    # storing the result in a variable. its like prediction variable.
    results = model(imgRegion,stream=True)
    
    ## Array of detection used for tracking
    detections = np.empty((0,5))
    
    for r in results:
        # getting bbox for every result.
        boxes = r.boxes
        for box in boxes:
#             x1,y1,x2,y2 = box.xyxy[0]
            
#             # converting the boxes from tensor to integers to use it with cv2
#             x1,y1,x2,y2 = int(x1) , int(y1) , int(x2) , int(y2)
            
#             # Creating rectangles
#             # img , rectangle coordinates , color , thickness
#             cv2.rectangle(img,(x1,y1),(x2,y2),(255,0,0),3)
            
            ## Fancy rectangle using cvzone
            x1,y1,x2,y2 = box.xyxy[0]
            x1,y1,x2,y2 = int(x1) , int(y1) , int(x2) , int(y2)
            w,h = x2-x1 , y2-y1
            
            ## Confidence Score
            conf = math.ceil((box.conf[0]*100))/100 # box.conf[0] gives the conficence score of the particular box , and we are rounding it.
            
            ## putTextRect -> this takes in img , confidence score
            ## and the position of the text.
            ## while using (x1,y1-20) this shows the text but as the 
            ## upper part of the box goes outside of the camera 
            ## it doesn't show the score 
            ## to solve this we use max()
            
            ## class names
            classes = int(box.cls[0])
            current_class = class_names[classes]
            
            ## If the class is car bus truck or bike only then make bbox.
            if current_class == "person" and conf > 0.3:
                #cvzone.putTextRect(img,f"{current_class}{conf}",(max(0,x1),max(40,y1)),scale=1,thickness=1,offset=5)
                #cvzone.cornerRect(img,(x1,y1,w,h),l=8,t=2)
                ## This is for tracking , we are making a array with previous detections + the new ones.
                currenArray = np.array([x1,y1,x2,y2,conf])
                detections = np.vstack((detections , currenArray))
                



            
    results_tracker = tracker.update(detections)
    
    ## Line
    cv2.line(img,(limitsup[0],limitsup[1]),(limitsup[2],limitsup[3]),(0,0,255),5)
    cv2.line(img,(limitsdown[0],limitsdown[1]),(limitsdown[2],limitsdown[3]),(0,0,255),5)
    
    for results in results_tracker:
        x1,y1,x2,y2,ID = results
        x1,y1,x2,y2 = int(x1) , int(y1) , int(x2) , int(y2)
        ID = int(ID)
        w,h = x2-x1 , y2-y1
        cvzone.cornerRect(img,(x1,y1,w,h),l=8,t=2,colorR=(255,0,0))
        cvzone.putTextRect(img,f"{ID}",(max(0,x1),max(40,y1)),scale=1,thickness=1,offset=2)
        
        
        ## Finding if the car touched the line
        
        # finding the center of the detection box.
        cx ,cy = x1+w//2 , y1+h//2
        cv2.circle(img,(cx,cy),5,(0,0,255),cv2.FILLED)
        
        # for going up
        if limitsup[0]<cx<limitsup[2] and limitsup[1]-10 < cy < limitsup[3]+10:
            # if just use this total_count += 1 it will count the car as long as the car is in the region,
            # So if car a is present in that region in 3 frames that 1 car will be counted as 3.
            
            # To solve this, we will make a list of counts where we will have ids, if that id is present we will count only once if the id is not present we will add it to the list and count it.
            if total_countup.count(ID)==0:
                total_countup.append(ID)
                ## Changing the color of the line when it is counting
                cv2.line(img,(limitsup[0],limitsup[1]),(limitsup[2],limitsup[3]),(0,255,0),5)
                
        # for going down
        if limitsdown[0]<cx<limitsdown[2] and limitsdown[1]-10 < cy < limitsdown[3]+10:
            if total_countdown.count(ID)==0:
                total_countdown.append(ID)
                ## Changing the color of the line when it is counting
                cv2.line(img,(limitsdown[0],limitsdown[1]),(limitsdown[2],limitsdown[3]),(0,255,0),5)
            
    # cvzone.putTextRect(img,f"Count{len(total_countup)}",(50,50))
    # cvzone.putTextRect(img,f"Count{len(total_countdown)}",(50,50))
    # putting the count on the graphic.png
    cv2.putText(img,str(len(total_countup)),(929,345),cv2.FONT_HERSHEY_PLAIN,5,(139,195,75),7)
    cv2.putText(img,str(len(total_countdown)),(1192,345),cv2.FONT_HERSHEY_PLAIN,5,(50,50,230),7)
    cv2.imshow("Image",img)
    #cv2.imshow("ImageRegion",imgRegion)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()


0: 384x640 2 persons, 41.9ms
Speed: 0.0ms preprocess, 41.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 persons, 46.9ms
Speed: 1.0ms preprocess, 46.9ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 persons, 47.9ms
Speed: 2.0ms preprocess, 47.9ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 persons, 46.9ms
Speed: 1.0ms preprocess, 46.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 persons, 42.9ms
Speed: 1.0ms preprocess, 42.9ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 persons, 43.9ms
Speed: 0.0ms preprocess, 43.9ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 persons, 42.9ms
Speed: 1.0ms preprocess, 42.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 persons, 43.9ms
Speed: 1.0ms preprocess, 43.9ms inference, 3.0ms postprocess per image at shape (