In [73]:
import numpy as np
import cv2
import time

def main():
    # Define video file paths
    input_video_path = '/content/traffic_4.mp4'
    output_video_path = '/content/output traffic-cars.mp4'

    # Load YOLO
    labels, network, layers_names_output = load_yolo()

    # Open video file
    video = cv2.VideoCapture(input_video_path)
    if not video.isOpened():
        print("Error: Could not open video.")
        return

    # Prepare video writer
    writer = None
    h, w = None, None

    # Processing loop
    frame_count = 0
    total_time = 0
    while True:
        ret, frame = video.read()
        if not ret:
            break

        if w is None or h is None:
            h, w = frame.shape[:2]

        blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
        network.setInput(blob)
        start_time = time.time()
        output_from_network = network.forward(layers_names_output)
        end_time = time.time()

        frame_count += 1
        total_time += end_time - start_time
        print(f'Frame {frame_count} took {end_time - start_time:.5f} seconds')

        bounding_boxes, confidences, classIDs = process_detections(output_from_network, frame.shape[1], frame.shape[0])

        results = cv2.dnn.NMSBoxes(bounding_boxes, confidences, 0.5, 0.3)

        if len(results) > 0:
            for i in results.flatten():
                x_min, y_min = bounding_boxes[i][0], bounding_boxes[i][1]
                box_width, box_height = bounding_boxes[i][2], bounding_boxes[i][3]
                color = colours[classIDs[i]].tolist()

                cv2.rectangle(frame, (x_min, y_min), (x_min + box_width, y_min + box_height), color, 2)
                text = f'{labels[classIDs[i]]}: {confidences[i]:.4f}'
                cv2.putText(frame, text, (x_min, y_min - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        if writer is None:
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            writer = cv2.VideoWriter(output_video_path, fourcc, 30, (frame.shape[1], frame.shape[0]), True)

        writer.write(frame)

    video.release()
    writer.release()

    print(f'\nTotal number of frames: {frame_count}')
    print(f'Total amount of time: {total_time:.5f} seconds')
    print(f'FPS: {frame_count / total_time:.1f}')

def load_yolo():
    labels_path = '/content/coco.names'
    config_path = '/content/yolov3-tiny.cfg'
    weights_path = '/content/yolov3-tiny.weights'

    with open(labels_path) as f:
        labels = [line.strip() for line in f]

    network = cv2.dnn.readNetFromDarknet(config_path, weights_path)
    layers_names_all = network.getLayerNames()
    out_layers = network.getUnconnectedOutLayers()

    if isinstance(out_layers, np.ndarray):
        layers_names_output = [layers_names_all[i - 1] for i in out_layers.flatten()]
    else:
        layers_names_output = [layers_names_all[i[0] - 1] for i in out_layers]

    global colours
    colours = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')

    return labels, network, layers_names_output

def process_detections(output_from_network, frame_width, frame_height):
    bounding_boxes = []
    confidences = []
    classIDs = []

    for result in output_from_network:
        for detected_objects in result:
            scores = detected_objects[5:]
            class_current = np.argmax(scores)
            confidence_current = scores[class_current]

            if confidence_current > 0.5:
                box_current = detected_objects[0:4] * np.array([frame_width, frame_height, frame_width, frame_height])
                x_center, y_center, box_width, box_height = box_current
                x_min = int(x_center - (box_width / 2))
                y_min = int(y_center - (box_height / 2))

                bounding_boxes.append([x_min, y_min, int(box_width), int(box_height)])
                confidences.append(float(confidence_current))
                classIDs.append(class_current)

    return bounding_boxes, confidences, classIDs

if __name__ == "__main__":
    main()


Frame 1 took 0.40904 seconds
Frame 2 took 0.09751 seconds
Frame 3 took 0.13250 seconds
Frame 4 took 0.12491 seconds
Frame 5 took 0.10900 seconds
Frame 6 took 0.10077 seconds
Frame 7 took 0.10190 seconds
Frame 8 took 0.09563 seconds
Frame 9 took 0.12914 seconds
Frame 10 took 0.09959 seconds
Frame 11 took 0.10277 seconds
Frame 12 took 0.12024 seconds
Frame 13 took 0.10394 seconds
Frame 14 took 0.10475 seconds
Frame 15 took 0.10357 seconds
Frame 16 took 0.11025 seconds
Frame 17 took 0.10882 seconds
Frame 18 took 0.10075 seconds
Frame 19 took 0.10642 seconds
Frame 20 took 0.12835 seconds
Frame 21 took 0.10915 seconds
Frame 22 took 0.09920 seconds
Frame 23 took 0.11644 seconds
Frame 24 took 0.09796 seconds
Frame 25 took 0.10200 seconds
Frame 26 took 0.10843 seconds
Frame 27 took 0.12954 seconds
Frame 28 took 0.09614 seconds
Frame 29 took 0.09947 seconds
Frame 30 took 0.10672 seconds
Frame 31 took 0.11533 seconds
Frame 32 took 0.09521 seconds
Frame 33 took 0.10446 seconds
Frame 34 took 0.094

In [67]:
import cv2
import numpy as np
from google.colab.patches import cv2_imshow

# Load YOLO
net = cv2.dnn.readNet('/content/yolov3-tiny.weights', '/content/yolov3-tiny.cfg')
layer_names = net.getLayerNames()
output_layers_indices = net.getUnconnectedOutLayers()

# Handle layer indices
output_layers = [layer_names[i - 1] for i in output_layers_indices.flatten()]

# Load video file
input_video_path = '/content/traffic_4.mp4'
output_video_path = '/content/output_video.mp4'
cap = cv2.VideoCapture(input_video_path)

# Get video properties
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Initialize VideoWriter
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# Check type of `out`
print(f'Type of out before processing: {type(out)}')

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Prepare the frame for YOLO
    height, width, channels = frame.shape
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    # Process YOLO outputs
    class_ids = []
    confidences = []
    boxes = []

    for out in outs:
        for detection in out:
            if detection.ndim == 2:  # Ensure detection is 2D
                for obj in detection:
                    if obj.size >= 85:  # YOLOv3 output size
                        scores = obj[5:]
                        class_id = np.argmax(scores)
                        confidence = scores[class_id]
                        if confidence > 0.5:
                            center_x = int(obj[0] * width)
                            center_y = int(obj[1] * height)
                            w = int(obj[2] * width)
                            h = int(obj[3] * height)
                            x = int(center_x - w / 2)
                            y = int(center_y - h / 2)
                            boxes.append([x, y, w, h])
                            confidences.append(float(confidence))
                            class_ids.append(class_id)

    # Apply non-max suppression to remove duplicate boxes
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    if len(indices) > 0:
        for i in indices.flatten():
            box = boxes[i]
            x, y, w, h = box
            label = str(class_ids[i])
            confidence = confidences[i]
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
            cv2.putText(frame, f'{label} {confidence:.2f}', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        out.write(frame)

# Check type of `out` before releasing
print(f'Type of out before releasing: {type(out)}')

# Release the capture and writer properly

Type of out before processing: <class 'cv2.VideoWriter'>
Type of out before releasing: <class 'numpy.ndarray'>


In [69]:
import cv2

# Initialize VideoCapture and VideoWriter
cap = cv2.VideoCapture('/content/traffic_4.mp4')
out = cv2.VideoWriter('/content/output_video.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 20.0, (640, 480))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    # Process the frame (e.g., write it to the output video)
    out.write(frame)

# Release the VideoCapture and VideoWriter objects
cap.release()
out.release()
cv2.destroyAllWindows()


In [70]:
output_cap = cv2.VideoCapture(output_video_path)
for _ in range(10):  # Display the first 10 frames
    ret, frame = output_cap.read()
    if not ret:
        break
    cv2_imshow(frame)

output_cap.release()
