1) Data Preprocessing using openCV
(Convert a video into frames)


In [37]:
import cv2

# Load the video file
video_path = "Source/videoplayback.mp4"
cap = cv2.VideoCapture(video_path)

frame_count = 0
frames = []
while True:
    ret, frame = cap.read()
    if not ret:
        break
    frames.append(frame)
    frame_count += 1

cap.release()
print(f"Extracted {frame_count} frames from the video.")


KeyboardInterrupt: 

2.Object Detection using YOLO (Detect vehicles in each frame)

In [31]:
from ultralytics import YOLO

# Load YOLOv8 model
model = YOLO("yolov8n.pt")

# Detect objects in each frame
detected_frames = []
for idx, frame in enumerate(frames):
    # YOLO inference
    results = model(frame, verbose=False)

    # Annotate frame with detected objects
    annotated_frame = results[0].plot()
    detected_frames.append(annotated_frame)

    print(f"Processed frame {idx + 1}/{frame_count}")

print("Object detection completed.")


Processed frame 1/716
Processed frame 2/716
Processed frame 3/716
Processed frame 4/716
Processed frame 5/716
Processed frame 6/716
Processed frame 7/716
Processed frame 8/716
Processed frame 9/716
Processed frame 10/716
Processed frame 11/716
Processed frame 12/716
Processed frame 13/716
Processed frame 14/716
Processed frame 15/716
Processed frame 16/716
Processed frame 17/716
Processed frame 18/716
Processed frame 19/716
Processed frame 20/716
Processed frame 21/716
Processed frame 22/716
Processed frame 23/716
Processed frame 24/716
Processed frame 25/716
Processed frame 26/716
Processed frame 27/716
Processed frame 28/716
Processed frame 29/716
Processed frame 30/716
Processed frame 31/716
Processed frame 32/716
Processed frame 33/716
Processed frame 34/716
Processed frame 35/716
Processed frame 36/716
Processed frame 37/716
Processed frame 38/716
Processed frame 39/716
Processed frame 40/716
Processed frame 41/716
Processed frame 42/716
Processed frame 43/716
Processed frame 44/7

3. Extracting Bounding box width

In [32]:
# Extract bounding box width
for result in results:
    boxes = result.boxes
    for box in boxes:
        x_min, y_min, x_max, y_max = box.xyxy[0].tolist()  # Get coordinates in (x_min, y_min, x_max, y_max) format
        bbox_width = x_max - x_min  # Calculate bounding box width in pixels
        print(f"Bounding Box Width: {bbox_width} pixels")


Bounding Box Width: 74.4959716796875 pixels
Bounding Box Width: 67.7984619140625 pixels
Bounding Box Width: 114.96168518066406 pixels
Bounding Box Width: 77.26904296875 pixels
Bounding Box Width: 143.91998291015625 pixels
Bounding Box Width: 53.7501220703125 pixels
Bounding Box Width: 556.792236328125 pixels
Bounding Box Width: 45.3662109375 pixels
Bounding Box Width: 48.8248291015625 pixels
Bounding Box Width: 43.9747314453125 pixels
Bounding Box Width: 44.6851806640625 pixels
Bounding Box Width: 43.746826171875 pixels


4.Getting Calculate Scale

In [33]:
"""bbox_width = width of a bounding box pixels and real_world_width is the width of the object in real world"""
import numpy as np
def calculate_scale(bbox_width, real_world_width=2.0):
    return real_world_width / bbox_width
bbox_widths = [782.13, 196.79, 140.25, 123.45, 63.00, 32.49, 999.59, 394.61, 84.87]

# Calculate median bbox width
bbox_width = np.median(bbox_widths)
scale = calculate_scale(bbox_width)
print(f"Scale factor: {scale} meters per pixel")


Scale factor: 0.0142602495543672 meters per pixel


5.Calibration(Convert pixel distances to meters)

In [34]:
def estimate_road_width(bboxes, scale):

    x_positions = [bbox[0] for bbox in bboxes] + [bbox[2] for bbox in bboxes]
    road_width_pixels = max(x_positions) - min(x_positions)
    road_width_meters = road_width_pixels * scale
    return road_width_meters

cap = cv2.VideoCapture(video_path)

ret, frame = cap.read()
results = model(frame)

# Extract bounding box coordinates
bboxes = []  # List to store bounding box coordinates
for result in results:
    boxes = result.boxes  # Detected boxes
    for box in boxes:
        # Extract coordinates (x_min, y_min, x_max, y_max) as integers
        x_min, y_min, x_max, y_max = map(int, box.xyxy[0].tolist())
        bboxes.append([x_min, y_min, x_max, y_max])  # Append bbox to the list

scale =0.0142602495543672  #scale factor (meters per pixel)

# Estimate road width
road_width = estimate_road_width(bboxes, scale)

# Print results
print(f"Bounding Boxes: {bboxes}")
print(f"Estimated Road Width: {road_width} meters")

# Release the video capture
cap.release()


Bounding Boxes: [[1251, 46, 1384, 169], [1060, 404, 1372, 797], [1251, 48, 1386, 170], [1060, 402, 1372, 797]]
Estimated Road Width: 4.648841354723707 meters


6.Visualization

In [35]:
for idx, frame in enumerate(detected_frames):
    # Add road width text
    cv2.putText(frame, f"Road Width: {road_width:.2f} meters", (50, 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("Frame", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cv2.destroyAllWindows()


7.Saving Result

In [36]:
# Save video
output_path = "output_video.mp4"
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
height, width, _ = detected_frames[0].shape
out = cv2.VideoWriter(output_path, fourcc, 30, (width, height))

for frame in detected_frames:
    out.write(frame)

out.release()
print("Processed video saved.")

with open("road_width_results.txt", "w") as f:
    f.write(f"Estimated Road Width: {road_width:.2f} meters\n")
print("Results saved.")


Processed video saved.
Results saved.
