In [107]:
!pip install ultralytics



In [108]:
from ultralytics import YOLO
import cv2
from google.colab.patches import cv2_imshow

In [109]:
# load yolov8 model after transfer learning
#model_name = 'yolov8n.pt'
model_name = '/content/best_100_epochs_fb.pt'
model = YOLO(model_name)

In [110]:
# map class IDs to class names
vehicles = {0: "auto", 1: "bike", 2: "bus", 3: "car", 4: "truck"}

In [111]:
# load test video
video_path = './dashcam_india_clip3.mp4'
cap = cv2.VideoCapture(video_path)

In [112]:
# read first frame
ret, frame = cap.read()

In [113]:
# set output video format with encodings
format_fourcc = cv2.VideoWriter_fourcc(*'MP4V')

# get frame rate of input video
frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
print(frame_rate)

# get frame size of input video
frame_size = (frame.shape[1], frame.shape[0])
print(frame_size)

30
(852, 480)


In [114]:
video_result_path = './dashcam_india_clip3_result.mp4'
cap_result = cv2.VideoWriter(video_result_path, format_fourcc, frame_rate, frame_size)

In [115]:
# compute time interval between frames
time_interval = 1 / frame_rate

In [116]:
# define a set of colours
import random
colours = [(random.randint(0,255), random.randint(0,255), random.randint(0,255)) for j in range(10)]

In [117]:
# store previous bounding box details for each track_id
previous_boxes = {}

In [118]:
ret = True
# read frames
while ret:
  # detect and track objects
  results = model.track(frame, persist=True)

  # plot results
  result = results[0]
  for r in result.boxes.data.tolist():
    # get coordinates, track_id, class_id
    print("Object details: ", r)

    x1, y1, x2, y2, track_id, score, class_id = r
    x1 = int(x1)
    x2 = int(x2)
    y1 = int(y1)
    y2 = int(y2)
    track_id = int(track_id)
    class_id = int(class_id)
    class_name = vehicles[class_id]
    colour = colours[track_id % len(colours)]
    line_width = 3
    font_width = 2

    # calculate bounding box center
    bbox_left = min(x1, x2)
    bbox_top = min(y1, y2)
    bbox_width = abs(x2 - x1)
    bbox_height = abs(y2 - y1)

    x_c = bbox_left + (bbox_width / 2)
    y_c = bbox_top + (bbox_height / 2)
    bbox_area = bbox_width * bbox_height

    # get or initialize previous box information for the track_id
    prev_box = previous_boxes.get(track_id, {"center": (x_c, y_c), "area": bbox_area})

    # estimate speed using size change
    area_change = bbox_area - prev_box["area"]
    speed = area_change / time_interval

    # log object and speed information
    print("Track ID: ", track_id)
    print("Class: ", class_name)
    print("Speed: ", speed, "pixel sq. / second")

    # display speed in terms of motion units (MU) per second [1 MU = 10000 pxs/s]
    speed_display = speed/10000

    # Update previous box information
    previous_boxes[track_id] = {"center": (x_c, y_c), "area": bbox_area}

    # plot bounding box with speed information
    cv2.rectangle(frame, (x1, y1), (x2, y2), colour, line_width)
    cv2.putText(frame, f"{class_name} {speed_display:.2f} MU", (x1+10, y1+25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, colour, font_width)

  #add result frame to result video
  cap_result.write(frame)

  #next frame
  ret, frame = cap.read()


0: 384x640 1 bus, 193.3ms
Speed: 6.2ms preprocess, 193.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Object details:  [455.96539306640625, 178.03988647460938, 558.3260498046875, 288.6855773925781, 1.0, 0.8553217053413391, 2.0]
Track ID:  1
Class:  bus
Speed:  0.0 pixel sq. / second

0: 384x640 1 bus, 141.1ms
Speed: 5.3ms preprocess, 141.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Object details:  [457.80267333984375, 175.22792053222656, 559.2281494140625, 288.7326965332031, 1.0, 0.833881139755249, 2.0]
Track ID:  1
Class:  bus
Speed:  5880.0 pixel sq. / second

0: 384x640 1 bus, 1 truck, 140.1ms
Speed: 5.7ms preprocess, 140.1ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)
Object details:  [462.61956787109375, 173.74472045898438, 560.9232788085938, 288.5044860839844, 1.0, 0.7305957078933716, 2.0]
Track ID:  1
Class:  bus
Speed:  -7680.0 pixel sq. / second
Object details:  [596.7944946289062, 186.89732360839844, 654.202

In [119]:
cap.release()
cap_result.release()