In [1]:
# Disable warnings in the notebook to maintain clean output cells
import warnings
warnings.filterwarnings('ignore')

# Import necessary libraries
import os
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import yaml
from PIL import Image
from ultralytics import YOLO
from IPython.display import Video

In [3]:
post_training_files_path = './runs/detect/train_50_epochs_yolov11'
# Construct the path to the best model weights file using os.path.join
best_model_path = os.path.join(post_training_files_path, 'weights/best.pt')

# Load the best model weights into the YOLO model
best_model = YOLO(best_model_path)

In [None]:
import cv2
from ultralytics import YOLO

# Load the fine-tuned YOLO model
model = best_model#YOLO(fine_tuned_model)

# Open the sample video
video_path = './Vehicle_Detection_Image_Dataset/sample_video.mp4'
cap = cv2.VideoCapture(video_path)

# Check if the video capture is opened successfully
if not cap.isOpened():
    print("Error: Unable to open the video.")
    exit()

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Initialize video writer to save the output
output_path = 'sample_video_inference.avi'
video_writer = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'XVID'), fps, (width, height))

# Perform inference on the video
while True:
    ret, frame = cap.read()
    if not ret:
        print("End of video or unable to read frame.")
        break

    # Perform inference on the current frame
    results = model.predict(source=frame, imgsz=640, conf=0.5)

    # Annotate the frame with detection results
    annotated_frame = results[0].plot(line_width=2)

    # Write the annotated frame to the output video
    video_writer.write(annotated_frame)

    # Display the annotated frame (optional)
    cv2.imshow("Video Inference", annotated_frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
video_writer.release()
cv2.destroyAllWindows()

print(f"Inference completed. Output saved to {output_path}")


0: 384x640 1 Vehicle, 41.6ms
Speed: 1.9ms preprocess, 41.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Vehicle, 30.1ms
Speed: 0.9ms preprocess, 30.1ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Vehicle, 36.2ms
Speed: 1.2ms preprocess, 36.2ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Vehicles, 35.7ms
Speed: 1.1ms preprocess, 35.7ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Vehicle, 28.5ms
Speed: 0.9ms preprocess, 28.5ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Vehicles, 25.7ms
Speed: 1.1ms preprocess, 25.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Vehicles, 37.9ms
Speed: 1.1ms preprocess, 37.9ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Vehicles, 27.6ms
Speed: 1.1ms preprocess, 27.6ms inference, 0.2ms postprocess per image at sha

Object tracking

In [21]:
import cv2
from ultralytics import YOLO
import argparse
import supervision as sv


def parse_arguments() -> argparse.Namespace:
    
    parser = argparse.ArgumentParser(description=" Vehicle speed estimation")
    parser.add_argument(
        '--source video path',
    required=True,
    type=str,
    help='Path to the input video file'
    )
    return parser.parse_args()

if __name__ == "__main__":
    #args = parse_arguments()

    video_info = sv.VideoInfo.from_video_path("./Vehicle_Detection_Image_Dataset/sample_video.mp4")
    # Load the fine-tuned YOLO model
    model = best_model#YOLO(fine_tuned_model)
    
    byte_track = sv.ByteTrack(frame_rate=video_info.fps)
    #thickness = sv.calculate_dynamic_line_thickness(
    #    resolution_wh=video_info.resolution_wh)
    #text_scale = sv.calculate_dynamic_text_scale(resolution_wh=video_info.resolution_wh)
    frame_generator = sv.get_video_frames_generator("./Vehicle_Detection_Image_Dataset/sample_video.mp4")
    bounding_box_annotator = sv.BoundingBoxAnnotator()#text_scale=text_scale)#,thickness=thickness)
    label_annotator = sv.LabelAnnotator()#text_scale=text_scale)
    for frame in frame_generator:
        result = model(frame)[0]
        detections = sv.Detections.from_ultralytics(result)
        detections = byte_track.update_with_detections(detections=detections)

        labels =[
            f"ID: {tracker_id}" for tracker_id in detections.tracker_id
        ]
        annotated_frame = frame.copy()
        annotated_frame = bounding_box_annotator.annotate(scene=annotated_frame, detections=detections)
        annotated_frame = label_annotator.annotate(scene=annotated_frame,detections=detections,labels=labels)#text_scale=text_scale, detections=detections)
        cv2.imshow("Vehicle Detection", annotated_frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cv2.destroyAllWindows()
# Open the sample video
video_path = './Vehicle_Detection_Image_Dataset/sample_video.mp4'
cap = cv2.VideoCapture(video_path)




0: 384x640 5 Vehicles, 38.8ms
Speed: 1.7ms preprocess, 38.8ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Vehicles, 31.1ms
Speed: 1.1ms preprocess, 31.1ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 Vehicles, 27.4ms
Speed: 1.0ms preprocess, 27.4ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Vehicles, 27.8ms
Speed: 1.2ms preprocess, 27.8ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 Vehicles, 25.0ms
Speed: 1.0ms preprocess, 25.0ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Vehicles, 26.5ms
Speed: 1.1ms preprocess, 26.5ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Vehicles, 27.9ms
Speed: 0.9ms preprocess, 27.9ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Vehicles, 29.1ms
Speed: 1.3ms preprocess, 29.1ms inference, 0.2ms postprocess per image at

Applying perspective transformation

In [9]:
import cv2
from ultralytics import YOLO
import argparse
import supervision as sv
from collections import defaultdict, deque
#SOURCE = np.array([[708,523],[1212,523],[2092,1079],[-192,1079]])

SOURCE = np.array([[472,354],[808,354],[1394,719],[-128,719]])
TARGET_WIDTH = 25
TARGET_HEIGHT = 110

TARGET = np.array(
    [
    [0, 0],
    [TARGET_WIDTH-1, 0],
    [TARGET_WIDTH-1, TARGET_HEIGHT-1],
    [0, TARGET_HEIGHT-1],
    ]
    )



class ViewTransformer:
    def __init__(self, source: np.ndarray, target: np.ndarray):
        source = source.astype(np.float32)
        target = target.astype(np.float32)
        self.m= cv2.getPerspectiveTransform(source, target)

    def transform_points(self, points: np.ndarray) -> np.ndarray:
        if points is None or len(points) == 0:
            print("Warning: No points to transform")
            return np.empty((0, 2))  # or return points directly

        reshape_points = points.reshape(-1, 1, 2).astype(np.float32)
        transformed_points = cv2.perspectiveTransform(reshape_points, self.m)
    
        if transformed_points is None:
            print("Error: perspectiveTransform returned None")
            return np.empty((0, 2))

        return transformed_points.reshape(-1, 2)
    
    '''
    def transform_points(self, points: np.ndarray) -> np.ndarray:
        reshape_points = points.reshape(-1,1,2).astype(np.float32)
        transformed_points = cv2.perspectiveTransform(reshape_points, self.m)
        transformed_points = transformed_points.reshape(-1,2)
        return transformed_points
    '''

def parse_arguments() -> argparse.Namespace:
    
    parser = argparse.ArgumentParser(description=" Vehicle speed estimation")
    parser.add_argument(
        '--source video path',
    required=True,
    type=str,
    help='Path to the input video file'
    )
    return parser.parse_args()

if __name__ == "__main__":
    #args = parse_arguments()

    video_info = sv.VideoInfo.from_video_path("./Vehicle_Detection_Image_Dataset/sample_video.mp4")
    # Load the fine-tuned YOLO model
    model = best_model#YOLO(fine_tuned_model)
    
    byte_track = sv.ByteTrack(frame_rate=video_info.fps)
    #thickness = sv.calculate_dynamic_line_thickness(
    #    resolution_wh=video_info.resolution_wh)
    #text_scale = sv.calculate_dynamic_text_scale(resolution_wh=video_info.resolution_wh)
    frame_generator = sv.get_video_frames_generator("./Vehicle_Detection_Image_Dataset/sample_video.mp4")
    
    polygon_zone = sv.PolygonZone(polygon=SOURCE)
    ViewTransformer = ViewTransformer(source=SOURCE, target=TARGET)
    bounding_box_annotator = sv.BoundingBoxAnnotator()#text_scale=text_scale)#,thickness=thickness)
    label_annotator = sv.LabelAnnotator()#text_scale=text_scale)
    coordinates = defaultdict(lambda: deque(maxlen=video_info.fps))

    for frame in frame_generator:
        result = model(frame)[0]
        detections = sv.Detections.from_ultralytics(result)
        detections = detections[polygon_zone.trigger(detections=detections)]
        detections = byte_track.update_with_detections(detections=detections)

        points= detections.get_anchors_coordinates(anchor=sv.Position.BOTTOM_CENTER) # get the bottom center of the bounding box
        points = ViewTransformer.transform_points(points=points).astype(int)

        labels =[]
        ## Speed Estimation
        for tracker_id, [_,y] in zip(detections.tracker_id, points):
            coordinates[tracker_id].append(y)
            if len(coordinates[tracker_id]) < video_info.fps/2:
                labels.append(f"ID: {tracker_id} Speed: Calculating")
            else:
                coordinate_start = coordinates[tracker_id][0]
                coordinate_end = coordinates[tracker_id][-1]
                distance = coordinate_end - coordinate_start
                time = len(coordinates[tracker_id]) / video_info.fps
                speed = abs(distance / time) # * 3.6
                labels.append(f"ID: {tracker_id} Speed: {speed:.2f} km/h")

        
        #labels =[
        #    f"x:{x}, y:{y}" for x,y in points
        #]
        annotated_frame = frame.copy()
        annotated_frame = bounding_box_annotator.annotate(scene=annotated_frame, detections=detections)
        annotated_frame = sv.draw_polygon(annotated_frame, polygon=SOURCE, color=sv.Color.RED)
        annotated_frame = label_annotator.annotate(scene=annotated_frame,detections=detections,labels=labels)#text_scale=text_scale, detections=detections)
        cv2.imshow("Vehicle Detection", annotated_frame)
        # get video resolution
        video_width = annotated_frame.shape[1]
        video_height = annotated_frame.shape[0]
        print(f"Video Resolution: {video_width}x{video_height}")
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cv2.destroyAllWindows()
# Open the sample video
video_path = './Vehicle_Detection_Image_Dataset/sample_video.mp4'
cap = cv2.VideoCapture(video_path)




0: 384x640 5 Vehicles, 37.5ms
Speed: 3.9ms preprocess, 37.5ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 5 Vehicles, 30.5ms
Speed: 0.9ms preprocess, 30.5ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 4 Vehicles, 29.0ms
Speed: 0.8ms preprocess, 29.0ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)




Video Resolution: 1280x720

0: 384x640 5 Vehicles, 27.7ms
Speed: 0.9ms preprocess, 27.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 4 Vehicles, 25.2ms
Speed: 0.9ms preprocess, 25.2ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 5 Vehicles, 21.6ms
Speed: 0.9ms preprocess, 21.6ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 5 Vehicles, 26.5ms
Speed: 0.9ms preprocess, 26.5ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 5 Vehicles, 23.9ms
Speed: 1.0ms preprocess, 23.9ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 5 Vehicles, 27.9ms
Speed: 0.9ms preprocess, 27.9ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 5 Vehicles, 25.7ms
Speed: 1.0ms

Capture Overspeeding vehicles

In [11]:
import cv2
from ultralytics import YOLO
import argparse
import supervision as sv
from collections import defaultdict, deque
import pandas as pd
#SOURCE = np.array([[708,523],[1212,523],[2092,1079],[-192,1079]])

SOURCE = np.array([[472,354],[808,354],[1394,719],[-128,719]])
TARGET_WIDTH = 25
TARGET_HEIGHT = 110

output_path = 'output/'
#df=pd.write_csv(output_path+'speeding_vehicles.csv',index=False)


TARGET = np.array(
    [
    [0, 0],
    [TARGET_WIDTH-1, 0],
    [TARGET_WIDTH-1, TARGET_HEIGHT-1],
    [0, TARGET_HEIGHT-1],
    ]
    )



class ViewTransformer:
    def __init__(self, source: np.ndarray, target: np.ndarray):
        source = source.astype(np.float32)
        target = target.astype(np.float32)
        self.m= cv2.getPerspectiveTransform(source, target)

    def transform_points(self, points: np.ndarray) -> np.ndarray:
        if points is None or len(points) == 0:
            print("Warning: No points to transform")
            return np.empty((0, 2))  # or return points directly

        reshape_points = points.reshape(-1, 1, 2).astype(np.float32)
        transformed_points = cv2.perspectiveTransform(reshape_points, self.m)
    
        if transformed_points is None:
            print("Error: perspectiveTransform returned None")
            return np.empty((0, 2))

        return transformed_points.reshape(-1, 2)
    
    '''
    def transform_points(self, points: np.ndarray) -> np.ndarray:
        reshape_points = points.reshape(-1,1,2).astype(np.float32)
        transformed_points = cv2.perspectiveTransform(reshape_points, self.m)
        transformed_points = transformed_points.reshape(-1,2)
        return transformed_points
    '''

def parse_arguments() -> argparse.Namespace:
    
    parser = argparse.ArgumentParser(description=" Vehicle speed estimation")
    parser.add_argument(
        '--source video path',
    required=True,
    type=str,
    help='Path to the input video file'
    )
    return parser.parse_args()

if __name__ == "__main__":
    #args = parse_arguments()

    video_info = sv.VideoInfo.from_video_path("./Vehicle_Detection_Image_Dataset/sample_video.mp4")
    # Load the fine-tuned YOLO model
    model = best_model#YOLO(fine_tuned_model)
    
    byte_track = sv.ByteTrack(frame_rate=video_info.fps)
    #thickness = sv.calculate_dynamic_line_thickness(
    #    resolution_wh=video_info.resolution_wh)
    #text_scale = sv.calculate_dynamic_text_scale(resolution_wh=video_info.resolution_wh)
    frame_generator = sv.get_video_frames_generator("./Vehicle_Detection_Image_Dataset/sample_video.mp4")
    
    polygon_zone = sv.PolygonZone(polygon=SOURCE)
    ViewTransformer = ViewTransformer(source=SOURCE, target=TARGET)
    bounding_box_annotator = sv.BoundingBoxAnnotator()#text_scale=text_scale)#,thickness=thickness)
    label_annotator = sv.LabelAnnotator()#text_scale=text_scale)
    coordinates = defaultdict(lambda: deque(maxlen=video_info.fps))

    for frame in frame_generator:
        result = model(frame)[0]
        detections = sv.Detections.from_ultralytics(result)
        detections = detections[polygon_zone.trigger(detections=detections)]
        detections = byte_track.update_with_detections(detections=detections)

        points= detections.get_anchors_coordinates(anchor=sv.Position.BOTTOM_CENTER) # get the bottom center of the bounding box
        points = ViewTransformer.transform_points(points=points).astype(int)

        labels =[]
        ## Speed Estimation
        for tracker_id, [_,y] in zip(detections.tracker_id, points):
            #print(detections.tracker_id)
            #print(f"Tracker ID: {tracker_id}, Y Coordinate: {y}, Points: {points}")
            coordinates[tracker_id].append(y)
            #print(f"Coordinates: {coordinates[tracker_id]}")
            if len(coordinates[tracker_id]) < video_info.fps/2:
                labels.append(f"ID: {tracker_id} Speed: Calculating")
            else:
                coordinate_start = coordinates[tracker_id][0]
                coordinate_end = coordinates[tracker_id][-1]
                distance = coordinate_end - coordinate_start
                time = len(coordinates[tracker_id]) / video_info.fps
                speed = abs(distance / time) # * 3.6
                labels.append(f"ID: {tracker_id} Speed: {speed:.2f} km/h")
                
                
                if speed > 55:
                    for i,j in enumerate(detections.tracker_id):
                        if j == tracker_id:
                            #print(f"Vehicle ID: {tracker_id} is speeding!")
                            # get bounding box coordinates
                            x1, y1, x2, y2 = detections.xyxy[i]
                            frame2= frame.copy()
                            # draw bounding box on the frame
                            cv2.rectangle(frame2, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
                            # save the image with bounding box
                            cv2.imwrite(f"{output_path}/speeding_vehicles/speeding_vehicle_{tracker_id}.jpg", frame2)
                            #cv2.imwrite(f"speeding_vehicle_{tracker_id}.jpg", detections.xyxy[i])
                            break 
                    # Get the bounding box coordinates corresponding to the detected vehicle
                    
                

        
        #labels =[
        #    f"x:{x}, y:{y}" for x,y in points
        #]
        annotated_frame = frame.copy()
        annotated_frame = bounding_box_annotator.annotate(scene=annotated_frame, detections=detections)
        annotated_frame = sv.draw_polygon(annotated_frame, polygon=SOURCE, color=sv.Color.RED)
        annotated_frame = label_annotator.annotate(scene=annotated_frame,detections=detections,labels=labels)#text_scale=text_scale, detections=detections)
        cv2.imshow("Vehicle Detection", annotated_frame)
        # get video resolution
        video_width = annotated_frame.shape[1]
        video_height = annotated_frame.shape[0]
        print(f"Video Resolution: {video_width}x{video_height}")
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cv2.destroyAllWindows()
# Open the sample video
video_path = './Vehicle_Detection_Image_Dataset/sample_video.mp4'
cap = cv2.VideoCapture(video_path)




0: 384x640 5 Vehicles, 39.1ms
Speed: 3.5ms preprocess, 39.1ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 5 Vehicles, 30.5ms
Speed: 0.9ms preprocess, 30.5ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 4 Vehicles, 29.2ms
Speed: 1.0ms preprocess, 29.2ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)




Video Resolution: 1280x720

0: 384x640 5 Vehicles, 28.7ms
Speed: 0.8ms preprocess, 28.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 4 Vehicles, 28.5ms
Speed: 1.5ms preprocess, 28.5ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 5 Vehicles, 26.2ms
Speed: 0.8ms preprocess, 26.2ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 5 Vehicles, 26.0ms
Speed: 1.3ms preprocess, 26.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 5 Vehicles, 28.0ms
Speed: 0.9ms preprocess, 28.0ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 5 Vehicles, 23.7ms
Speed: 1.0ms preprocess, 23.7ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)
Video Resolution: 1280x720

0: 384x640 5 Vehicles, 27.6ms
Speed: 1.0ms

In [13]:
print(tracker_id)
print(detections)

60
Detections(xyxy=array([[     405.18,      371.08,      470.15,       431.9],
       [     486.33,      402.36,      548.12,      465.18]], dtype=float32), mask=None, confidence=array([    0.87896,     0.83535], dtype=float32), class_id=array([0, 0]), tracker_id=array([61, 60]), data={'class_name': array(['Vehicle', 'Vehicle'], dtype='<U7')}, metadata={})


Calculate vehicle density in both lanes

In [None]:
import cv2
#cv2.setUseOptimized(True)
from ultralytics import YOLO
import argparse
import supervision as sv
from collections import defaultdict, deque
import pandas as pd
import time
#SOURCE = np.array([[708,523],[1212,523],[2092,1079],[-192,1079]])

SOURCE = np.array([[472,354],[808,354],[1394,719],[-128,719]])
TARGET_WIDTH = 25
TARGET_HEIGHT = 110

LANE_THRESHOLD_X_PIXEL= (930 * 1280) /1920

output_path = 'output/'
#df=pd.write_csv(output_path+'speeding_vehicles.csv',index=False)


TARGET = np.array(
    [
    [0, 0],
    [TARGET_WIDTH-1, 0],
    [TARGET_WIDTH-1, TARGET_HEIGHT-1],
    [0, TARGET_HEIGHT-1],
    ]
    )



class ViewTransformer:
    def __init__(self, source: np.ndarray, target: np.ndarray):
        source = source.astype(np.float32)
        target = target.astype(np.float32)
        self.m= cv2.getPerspectiveTransform(source, target)

    def transform_points(self, points: np.ndarray) -> np.ndarray:
        if points is None or len(points) == 0:
            print("Warning: No points to transform")
            return np.empty((0, 2))  # or return points directly

        reshape_points = points.reshape(-1, 1, 2).astype(np.float32)
        transformed_points = cv2.perspectiveTransform(reshape_points, self.m)
    
        if transformed_points is None:
            print("Error: perspectiveTransform returned None")
            return np.empty((0, 2))

        return transformed_points.reshape(-1, 2)
    
    '''
    def transform_points(self, points: np.ndarray) -> np.ndarray:
        reshape_points = points.reshape(-1,1,2).astype(np.float32)
        transformed_points = cv2.perspectiveTransform(reshape_points, self.m)
        transformed_points = transformed_points.reshape(-1,2)
        return transformed_points
    '''

def parse_arguments() -> argparse.Namespace:
    
    parser = argparse.ArgumentParser(description=" Vehicle speed estimation")
    parser.add_argument(
        '--source video path',
    required=True,
    type=str,
    help='Path to the input video file'
    )
    return parser.parse_args()



if __name__ == "__main__":
    #args = parse_arguments()

    video_info = sv.VideoInfo.from_video_path("./Vehicle_Detection_Image_Dataset/sample_video.mp4")
    # Load the fine-tuned YOLO model
    model = best_model#YOLO(fine_tuned_model)
    
    byte_track = sv.ByteTrack(frame_rate=video_info.fps)
    #thickness = sv.calculate_dynamic_line_thickness(
    #    resolution_wh=video_info.resolution_wh)
    #text_scale = sv.calculate_dynamic_text_scale(resolution_wh=video_info.resolution_wh)
    frame_generator = sv.get_video_frames_generator("./Vehicle_Detection_Image_Dataset/sample_video.mp4")
    
    polygon_zone = sv.PolygonZone(polygon=SOURCE)
    ViewTransformer = ViewTransformer(source=SOURCE, target=TARGET)
    bounding_box_annotator = sv.BoundingBoxAnnotator()#text_scale=text_scale)#,thickness=thickness)
    label_annotator = sv.LabelAnnotator()#text_scale=text_scale)
    coordinates = defaultdict(lambda: deque(maxlen=video_info.fps))

    start_time = time.time()
    frame_count = 0
    fps= 0
    for frame in frame_generator:
        result = model(frame)[0]
        frame_count += 1
        detections = sv.Detections.from_ultralytics(result)
        detections = detections[polygon_zone.trigger(detections=detections)]
        detections = byte_track.update_with_detections(detections=detections)

        points= detections.get_anchors_coordinates(anchor=sv.Position.BOTTOM_CENTER) # get the bottom center of the bounding box
        points = ViewTransformer.transform_points(points=points).astype(int)

        labels =[]
        ## Speed Estimation
        for tracker_id, [_,y] in zip(detections.tracker_id, points):

            left_lane_count=0
            right_lane_count=0
            
            
            for i,j in enumerate(detections.tracker_id):
                x1, y1, x2, y2 = detections.xyxy[i]
                mid_point_x = int((x1 + x2) / 2)
                #print(f"Mid Point X: {mid_point_x}")
                if mid_point_x < LANE_THRESHOLD_X_PIXEL:
                    left_lane_count+=1
                else:
                    right_lane_count+=1
                #print(f"Left Lane Count: {left_lane_count}, Right Lane Count: {right_lane_count}")
            
            cv2.putText(frame, f"Left Lane Count: {left_lane_count}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.putText(frame, f"Right Lane Count: {right_lane_count}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            
            #print(detections.tracker_id)
            #print(f"Tracker ID: {tracker_id}, Y Coordinate: {y}, Points: {points}")
            coordinates[tracker_id].append(y)
            #print(f"Coordinates: {coordinates[tracker_id][0]}")
            if len(coordinates[tracker_id]) < video_info.fps/2:
                labels.append(f"ID: {tracker_id} Speed: Calculating")
            else:
                coordinate_start = coordinates[tracker_id][0]
                coordinate_end = coordinates[tracker_id][-1]
                distance = coordinate_end - coordinate_start
                time_ = len(coordinates[tracker_id]) / video_info.fps
                speed = abs(distance / time_) # * 3.6
                labels.append(f"ID: {tracker_id} Speed: {speed:.2f} km/h")
                
                
                if speed > 55:
                    for i,j in enumerate(detections.tracker_id):
                        if j == tracker_id:
                            #print(f"Vehicle ID: {tracker_id} is speeding!")
                            # get bounding box coordinates
                            x1, y1, x2, y2 = detections.xyxy[i]
                            frame2= frame.copy()
                            # draw bounding box on the frame
                            cv2.rectangle(frame2, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
                            cv2.putText(frame2, f"Vehicle is overspeeding at Speed: {speed:.2f} km/h", (int(x1), int(y1)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                            # save the image with bounding box
                            cv2.polylines(frame2, [SOURCE], isClosed=True, color=(0, 0, 255), thickness=2)
                            cv2.imwrite(f"{output_path}/speeding_vehicles/speeding_vehicle_{tracker_id}.jpg", frame2)
                            
                            #cv2.imwrite(f"speeding_vehicle_{tracker_id}.jpg", detections.xyxy[i])
                            break 
                    # Get the bounding box coordinates corresponding to the detected vehicle
                    
                

        
        #labels =[
        #    f"x:{x}, y:{y}" for x,y in points
        #]
        annotated_frame = frame.copy()
        annotated_frame = bounding_box_annotator.annotate(scene=annotated_frame, detections=detections)
        annotated_frame = sv.draw_polygon(annotated_frame, polygon=SOURCE, color=sv.Color.RED)
        annotated_frame = label_annotator.annotate(scene=annotated_frame,detections=detections,labels=labels)#text_scale=text_scale, detections=detections)
        
        end_time = time.time()
        time_elapsed = end_time - start_time
    
        if time_elapsed >= 1: # Update and display FPS every second
            fps = frame_count / time_elapsed
            #print(f"Processing FPS: {fps:.2f}")
    
            frame_count = 0
            start_time = time.time()
        cv2.putText(annotated_frame, f"FPS: {fps:.2f}", (800, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        
        cv2.imshow("Vehicle Detection", annotated_frame)
        # get video resolution
        video_width = annotated_frame.shape[1]
        video_height = annotated_frame.shape[0]
        #print(f"Video Resolution: {video_width}x{video_height}")
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cv2.destroyAllWindows()
# Open the sample video
video_path = './Vehicle_Detection_Image_Dataset/sample_video.mp4'
cap = cv2.VideoCapture(video_path)




0: 384x640 5 Vehicles, 49.3ms
Speed: 2.6ms preprocess, 49.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Vehicles, 25.9ms
Speed: 0.8ms preprocess, 25.9ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 Vehicles, 21.5ms
Speed: 0.9ms preprocess, 21.5ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)





0: 384x640 5 Vehicles, 26.1ms
Speed: 0.8ms preprocess, 26.1ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 Vehicles, 28.4ms
Speed: 0.9ms preprocess, 28.4ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Vehicles, 24.1ms
Speed: 1.0ms preprocess, 24.1ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Vehicles, 24.4ms
Speed: 0.9ms preprocess, 24.4ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Vehicles, 25.8ms
Speed: 0.9ms preprocess, 25.8ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Vehicles, 23.7ms
Speed: 0.8ms preprocess, 23.7ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Vehicles, 21.9ms
Speed: 0.8ms preprocess, 21.9ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Vehicles, 24.0ms
Speed: 1.0ms preprocess, 24.0ms inference, 0.2ms postprocess per image at

: 

In [None]:
dir(points)
print(points)
x=int(time.time()

[[19 83]]


AttributeError: 'float' object has no attribute 'time'