In [9]:
from ultralytics import YOLO
import cv2
import numpy as np
from tqdm import tqdm
from ultralytics.utils.plotting import Annotator

In [10]:
model = YOLO('yolov8n.pt')
classes = model.names
classes

{0: 'person',
 1: 'bicycle',
 2: 'car',
 3: 'motorcycle',
 4: 'airplane',
 5: 'bus',
 6: 'train',
 7: 'truck',
 8: 'boat',
 9: 'traffic light',
 10: 'fire hydrant',
 11: 'stop sign',
 12: 'parking meter',
 13: 'bench',
 14: 'bird',
 15: 'cat',
 16: 'dog',
 17: 'horse',
 18: 'sheep',
 19: 'cow',
 20: 'elephant',
 21: 'bear',
 22: 'zebra',
 23: 'giraffe',
 24: 'backpack',
 25: 'umbrella',
 26: 'handbag',
 27: 'tie',
 28: 'suitcase',
 29: 'frisbee',
 30: 'skis',
 31: 'snowboard',
 32: 'sports ball',
 33: 'kite',
 34: 'baseball bat',
 35: 'baseball glove',
 36: 'skateboard',
 37: 'surfboard',
 38: 'tennis racket',
 39: 'bottle',
 40: 'wine glass',
 41: 'cup',
 42: 'fork',
 43: 'knife',
 44: 'spoon',
 45: 'bowl',
 46: 'banana',
 47: 'apple',
 48: 'sandwich',
 49: 'orange',
 50: 'broccoli',
 51: 'carrot',
 52: 'hot dog',
 53: 'pizza',
 54: 'donut',
 55: 'cake',
 56: 'chair',
 57: 'couch',
 58: 'potted plant',
 59: 'bed',
 60: 'dining table',
 61: 'toilet',
 62: 'tv',
 63: 'laptop',
 64: 'mou

In [11]:
roi_list = []

In [12]:
def selectROI(event, x, y, flags, param):
    if event == cv2.EVENT_LBUTTONDOWN:
        temp = [x,y]
        roi_list.append(temp)
        print(temp)

In [13]:
cv2.namedWindow('ROI')
cv2.setMouseCallback('ROI', selectROI)
cap = cv2.VideoCapture('Nr_ABVMCRI_Gate_FIX_1.mp4')
roi_list = []
while len(roi_list) < 2:
    ret,frame=cap.read()
    cv2.putText(frame,'SELECT ROI',(100,100),cv2.FONT_HERSHEY_SIMPLEX, 1,(0,0,255),4)
    if not ret:
        break
    cv2.imshow("ROI",frame)
    if cv2.waitKey(0)&0xFF==27:
        break
cap.release()
cv2.destroyAllWindows()

[705, 150]
[644, 396]
[605, 537]
[553, 781]
[528, 1037]
[1914, 1040]
[1619, 759]
[1446, 611]
[1305, 495]
[1202, 400]
[1086, 336]
[1015, 252]
[945, 191]
[864, 181]
[901, 233]
[975, 311]
[1059, 394]
[1197, 521]
[1334, 624]
[1458, 748]
[1655, 950]
[928, 910]
[780, 316]
[758, 203]
[754, 178]
[703, 174]


In [17]:
cv2.namedWindow('Main')
cap = cv2.VideoCapture('Nr_ABVMCRI_Gate_FIX_1.mp4')

avg_list = []
count = 0
while True:
    ret,frame=cap.read()
    if not ret:
        break
    count += 1
    if count % 3 != 0:
        continue
    # rois = []
    max = [0,0]
    min = [10000,10000]
    for i in range(len(roi_list)-1):
        if roi_list[i][0] > max[0]:
            max[0] = roi_list[i][0]
        if roi_list[i][1] > max[1]:
            max[1] = roi_list[i][1]
        if roi_list[i][0] < min[0]:
            min[0] = roi_list[i][0]
        if roi_list[i][1] < min[1]:
            min[1] = roi_list[i][1]
    frame_cropped = frame[min[1]:max[1],min[0]:max[0]]
    roi_corners = np.array([roi_list],dtype=np.int32)
    mask = np.zeros(frame.shape,dtype=np.uint8)
    mask.fill(255)
    channel_count = frame.shape[2]
    ignore_mask_color = (255,)*channel_count
    cv2.fillPoly(mask,roi_corners,0)
    mask_cropped = mask[min[1]:max[1],min[0]:max[0]]
    roi = cv2.bitwise_or(frame_cropped,mask_cropped)

    #roi = frame[roi_list[0][1]:roi_list[1][1],roi_list[0][0]:roi_list[1][0]]
    results = model.predict(roi)
    for r in results:
        boxes = r.boxes
        counter = 0
        for box in boxes:
            counter += 1
            name = classes[box.cls.numpy()[0]]
            conf = str(round(box.conf.numpy()[0],2))
            text = name+conf
            bbox = box.xyxy[0].numpy()
            cv2.rectangle(frame,(int(bbox[0])+min[0],int(bbox[1])+min[1]),(int(bbox[2])+min[0],int(bbox[3])+min[1]),(0,255,0),2)
            cv2.putText(frame,text,(int(bbox[0])+min[0],int(bbox[1])+min[1]-5),cv2.FONT_HERSHEY_SIMPLEX, 0.4,(0,0,255),2)
    stats = str(counter)
    cv2.putText(frame,stats,(min[0],min[1]),cv2.FONT_HERSHEY_SIMPLEX, 1,(0,0,0),4)
    if counter >= 5:
        cv2.putText(frame,'!!CONGESTION MORE THAN '+str(counter)+' Objects',(min[0]+20,min[1]+20),cv2.FONT_HERSHEY_SIMPLEX, 1,(0,0,0),4)
    cv2.polylines(frame,roi_corners,True,(255,0,0),2)
    cv2.putText(frame,'Objects in the Regions of Interest',(100,100),cv2.FONT_HERSHEY_SIMPLEX, 1,(0,0,255),4)
    cv2.imshow('Main',frame)
    if cv2.waitKey(1)&0xFF==27:
         break
cap.release()
cv2.destroyAllWindows()


0: 416x640 4 persons, 1 bus, 107.0ms
Speed: 2.0ms preprocess, 107.0ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 5 persons, 1 bus, 105.0ms
Speed: 2.0ms preprocess, 105.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 7 persons, 1 bus, 92.0ms
Speed: 2.0ms preprocess, 92.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 8 persons, 1 bus, 98.0ms
Speed: 2.0ms preprocess, 98.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 7 persons, 1 bus, 101.0ms
Speed: 2.0ms preprocess, 101.0ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 7 persons, 1 bus, 102.0ms
Speed: 3.0ms preprocess, 102.0ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 10 persons, 1 bus, 99.0ms
Speed: 3.0ms preprocess, 99.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 10 persons, 1 bus, 100.0ms
Speed: 3.0ms p

KeyboardInterrupt: 