## Import library

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
from ultralytics import YOLO

## Functions to process frames of the video

In [2]:
def process_frame(frame):
    # Convert the frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Flip the frame horizontally
    flipped = cv2.flip(gray, 1)
    
    # Rotate the frame by a random degree
    angle = np.random.randint(0, 360)
    rows, cols = flipped.shape[:2]
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
    rotated = cv2.warpAffine(flipped, M, (cols, rows))

    # Add random noise to the frame
    noise = np.zeros(rotated.shape, np.uint8)
    cv2.randn(noise, 0, 180)
    noisy_frame = cv2.add(rotated, noise)

    return noisy_frame

## Process the video

In [3]:
video = cv2.VideoCapture('Intersection.mp4')

frame_count = 0

while True:
    ret, frame = video.read()

    # Break the loop if there are no more frames
    if not ret:
        break

    # Process the frame
    processed_frame = process_frame(frame)

    # Save the processed frame as an image file
    output_path = f'processed_images/frame_{frame_count}.jpg'
    cv2.imwrite(output_path, processed_frame)

    frame_count += 1
video.release()
cv2.destroyAllWindows()

## Object detection

In [4]:
def detect_objects(output_path):
    # Load the YOLOv8 model.
    model = YOLO("yolov8x.pt")

    # Read the input video.
    video_capture = cv2.VideoCapture('Intersection.mp4')

    # Initialize the JSON file.
    json_file = open(output_path, "w")

    # Iterate over the frames in the video.
    frame_number = 0
    while video_capture.isOpened():
        # Read the next frame.
        success, frame = video_capture.read()

        # If the frame was not read successfully, break the loop.
        if not success:
            break

        # Detect objects in the frame.        
        detections = model.predict(frame)
        
        detection = detections[0]
        
        # Iterate over the detected objects.
        for i in range(len(detection.boxes)):
            # Get the object's class, bounding box, and confidence.
            box = detection.boxes[i]
            class_name = detection.names[box.cls[0].item()]
            bounding_box = box.xyxy[0].tolist()
            confidence = box.conf[0].item()

            # Add the object to the JSON file.
            json_file.write(json.dumps({
                "FrameID": frame_number,
                "ObjectClass": class_name,
                "BoundingBox": bounding_box,
                "Confidence": confidence
            }) + "\n")
            
        # Increment the frame number.
        frame_number += 1

    # Close the JSON file.
    json_file.close()

In [5]:
# The path to the output JSON file.
output_path = "Detected_information_of_objects.json"

# Detect objects in the video and save the detected information into a JSON file.
detect_objects(output_path)

Downloading https:\github.com\ultralytics\assets\releases\download\v0.0.0\yolov8x.pt to yolov8x.pt...
100%|█████████████████████████████████████████████████████████████████████████████████████████████| 131M/131M [05:16<00:00, 433kB/s]

0: 384x640 2 persons, 18 cars, 2 motorcycles, 1 bus, 1 truck, 3 traffic lights, 851.0ms
Speed: 8.0ms preprocess, 851.0ms inference, 19.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 18 cars, 1 motorcycle, 1 bus, 1 truck, 3 traffic lights, 737.7ms
Speed: 3.0ms preprocess, 737.7ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 18 cars, 1 motorcycle, 1 bus, 1 truck, 3 traffic lights, 786.5ms
Speed: 2.2ms preprocess, 786.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 17 cars, 1 motorcycle, 1 bus, 1 truck, 3 traffic lights, 792.5ms
Speed: 2.0ms preprocess, 792.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 perso

In [10]:
model = YOLO('yolov8n.pt')

# Open the video file
video_path = "Intersection.mp4"
cap = cv2.VideoCapture(video_path)

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 inference on the frame
        results = model(frame)

        # Visualize the results on the frame
        annotated_frame = results[0].plot()

        # Display the annotated frame
        cv2.imshow("YOLOv8 Inference", annotated_frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()


0: 384x640 16 cars, 78.4ms
Speed: 3.0ms preprocess, 78.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 15 cars, 102.2ms
Speed: 3.0ms preprocess, 102.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 15 cars, 89.3ms
Speed: 2.0ms preprocess, 89.3ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 14 cars, 76.6ms
Speed: 2.0ms preprocess, 76.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 14 cars, 79.1ms
Speed: 2.0ms preprocess, 79.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 16 cars, 82.5ms
Speed: 3.5ms preprocess, 82.5ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 16 cars, 78.9ms
Speed: 3.0ms preprocess, 78.9ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 17 cars, 77.8ms
Speed: 2.5ms preprocess, 77.8ms inference, 1.5ms postprocess per image at s