In [1]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
input_path = '/content/drive/MyDrive/Driving_OD/driving_ai.mp4'
output_path = '/content/drive/MyDrive/Driving_OD/output_detected.mp4'

In [3]:
!pip install ultralytics opencv-python




In [5]:
import cv2
import numpy as np
from ultralytics import YOLO



video_path = '/content/drive/MyDrive/Driving_OD/driving_ai.mp4'
output_path = '/content/drive/MyDrive/Driving_OD/output_detected.mp4'

# Load YOLOv8 COCO model
yolo_model = YOLO('yolov8s.pt')

cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))

skip_labels = ['kite']  # Add any other unwanted labels here

while True:
    ret, frame = cap.read()
    if not ret:
        break
    # YOLO inference
    results = yolo_model.predict(frame, save=False)
    for r in results:
        boxes = r.boxes.xyxy
        labels = r.boxes.cls
        names = r.names
        for box, cls in zip(boxes, labels):
            label = names[int(cls)] if names else str(cls)
            if label in skip_labels:
                continue  # Skip drawing this label/class
            x1, y1, x2, y2 = [int(b) for b in box]
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
            cv2.putText(frame, label, (x1, max(15, y1 - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
    out.write(frame)

cap.release()
out.release()
print("Processed video saved to", output_path)





0: 384x640 2 persons, 1 car, 463.1ms
Speed: 5.7ms preprocess, 463.1ms inference, 6.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 1 car, 421.7ms
Speed: 4.5ms preprocess, 421.7ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 1 car, 423.7ms
Speed: 4.8ms preprocess, 423.7ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 car, 508.7ms
Speed: 4.4ms preprocess, 508.7ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 1 car, 654.5ms
Speed: 4.4ms preprocess, 654.5ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 cars, 656.0ms
Speed: 4.4ms preprocess, 656.0ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 cars, 661.4ms
Speed: 5.7ms preprocess, 661.4ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 car, 659.5ms
Speed: 5.7

In [7]:
import cv2
import numpy as np

# Paths to your videos
video_path_raw = '/content/drive/MyDrive/Driving_OD/driving_ai.mp4'
video_path_processed = '/content/drive/MyDrive/Driving_OD/output_detected.mp4'
output_path = '/content/drive/MyDrive/Driving_OD_merge.mp4'

# Open both videos
cap1 = cv2.VideoCapture(video_path_raw)
cap2 = cv2.VideoCapture(video_path_processed)

fps = int(cap1.get(cv2.CAP_PROP_FPS))
width1 = int(cap1.get(cv2.CAP_PROP_FRAME_WIDTH))
height1 = int(cap1.get(cv2.CAP_PROP_FRAME_HEIGHT))
width2 = int(cap2.get(cv2.CAP_PROP_FRAME_WIDTH))
height2 = int(cap2.get(cv2.CAP_PROP_FRAME_HEIGHT))
height = max(height1, height2)
output_width = width1 + width2

print(f"Raw video: {width1}x{height1}, Processed: {width2}x{height2}, Output: {output_width}x{height}")

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (output_width, height))

if not out.isOpened():
    print("Failed to open VideoWriter! Check codec and path.")
else:
    print("VideoWriter initialized correctly.")

font = cv2.FONT_HERSHEY_SIMPLEX
frame_count = 0

while cap1.isOpened() and cap2.isOpened():
    ret1, frame1 = cap1.read()
    ret2, frame2 = cap2.read()
    if not ret1 or not ret2:
        break

    # Resize frames to match heights
    if frame1.shape[0] != height:
        frame1 = cv2.resize(frame1, (width1, height))
    if frame2.shape[0] != height:
        frame2 = cv2.resize(frame2, (width2, height))

    # Add labels
    cv2.putText(frame1, 'Raw Footage', (30, 50), font, 1.5, (0, 255, 255), 3, cv2.LINE_AA)
    cv2.putText(frame2, 'Detected Objects using Yolo', (30, 50), font, 1.5, (0, 255, 0), 3, cv2.LINE_AA)

    # Combine and check resulting shape
    combined = np.hstack((frame1, frame2))
    if combined.shape[1] != output_width or combined.shape[0] != height:
        print(f"Frame size mismatch: got {combined.shape}, expected ({height}, {output_width})")
        combined = cv2.resize(combined, (output_width, height))

    out.write(combined)
    frame_count += 1
    if frame_count % 100 == 0:
        print(f'Processed {frame_count} frames...')

cap1.release()
cap2.release()
out.release()
cv2.destroyAllWindows()

print("Processing completed. Check your Drive folder for 'Driving_OD_merge.mp4'.")


Raw video: 848x478, Processed: 848x478, Output: 1696x478
VideoWriter initialized correctly.
Processed 100 frames...
Processed 200 frames...
Processed 300 frames...
Processed 400 frames...
Processed 500 frames...
Processed 600 frames...
Processed 700 frames...
Processed 800 frames...
Processed 900 frames...
Processed 1000 frames...
Processing completed. Check your Drive folder for 'Driving_OD_merge.mp4'.


In [10]:
import cv2
import numpy as np

video_path = '/content/drive/MyDrive/Driving_OD_merge.mp4'
output_path = '/content/drive/MyDrive/Driving_OD_merge_banner.mp4'

scroll_text = ("AI in action! Left: Pure driving reality. Right: YOLOv8 detects, tracks, analyzes every move. "
               "Welcome to the future of smart vision! -- Learn with Kanayi's AI Lab")
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1
font_color = (0, 255, 255)
thickness = 2
banner_height = 60

# Open video
cap = cv2.VideoCapture(video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
output_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
output_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

(text_w, text_h), _ = cv2.getTextSize(scroll_text, font, font_scale, thickness)
scroll_distance = 2 * (output_width + text_w)
scroll_speed = max(1, int(scroll_distance / total_frames))  # Scroll exactly 2 times

print(f"Calculated scroll speed: {scroll_speed} px/frame")

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (output_width, output_height + banner_height))

scroll_x = output_width  # Initial scroll position

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Create bottom banner
    banner = np.zeros((banner_height, output_width, 3), dtype=np.uint8)
    cv2.putText(banner, scroll_text, (scroll_x, banner_height - 20), font, font_scale, font_color, thickness, cv2.LINE_AA)

    # Stack frame and banner
    combined_with_banner = np.vstack((frame, banner))

    # Update scroll position for the ticker
    scroll_x -= scroll_speed
    if scroll_x < -text_w:
        scroll_x = output_width

    out.write(combined_with_banner)

cap.release()
out.release()
cv2.destroyAllWindows()

print("Bannered video saved! Path is:", output_path)


Calculated scroll speed: 7 px/frame
Bannered video saved! Path is: /content/drive/MyDrive/Driving_OD_merge_banner.mp4


In [11]:
from google.colab import files

# Provide the path to the bannered video file in your Google Drive
files.download(output_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>