In [None]:
import torch
import torchvision
import cv2
import numpy as np
from sort.sort import *
from deep_sort_realtime.deepsort_tracker import DeepSort

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


mdl = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True).to(device)
mdl.eval()

object_tracker = DeepSort(max_iou_distance=1, max_age=200,nms_max_overlap = 5)



FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [None]:
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

In [None]:
def detect_cars(img, model, threshold = 0.5):

    transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
    pic = transform(img).to(device)

    with torch.no_grad():
        res = model([pic])

    dts = []

    category1 = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(res[0]['labels'].cpu().numpy())]
    ctg2 = [[i[0], i[1], i[2], i[3]] for i in list(res[0]['boxes'].detach().cpu().numpy())]
    scr1 = list(res[0]['scores'].detach().cpu().numpy())
    lbl = list(res[0]['labels'].detach().cpu().numpy())

    result_list = []
    result_boxes = []
    scr_list = []
    lbls = []

    for i in range(len(category1)):
        if(category1[i] == 'car'):
            result_list.append(category1[i])
            result_boxes.append(ctg2[i])
            scr_list.append(scr1[i])
            lbls.append(lbl[i])

    for i in range(len(lbls)):
        if(scr_list[i] > threshold):
            cv2.rectangle(img, (int(result_boxes[i][0]), int(result_boxes[i][1])), (int(result_boxes[i][2]), int(result_boxes[i][3])), (0, 255, 0), 2)
            cv2.putText(img, "Car", (int(result_boxes[i][0])-10, int(result_boxes[i][1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
            dts.append((result_boxes[i], scr_list[i]))
    
    return dts

In [None]:
def update_track(tracks, c, IDs):
    for t in tracks:
        if not t.is_confirmed():
            continue
        t_id = t.track_id
        if(t_id not in IDs):
            c += 1
            IDs.append(t_id)
        
        ltrb = t.to_ltrb()
        bbox = ltrb

    return IDs, c

cap = cv2.VideoCapture('./tst.mp4')

IDs = []
c = 0 

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    dts = detect_cars(frame, mdl, 0.99)
    tracks = object_tracker.update_tracks(dts, frame=frame)
    IDs, c = update_track(tracks, c, IDs)
    cv2.putText(frame, f'c: {int(c)}', (20, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
    cv2.imshow('result', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()